/*
* Publication Harvester
* Copyright (c) 2003-2006 Stellman & Greene Consulting
* Developed for Joshua Zivin and Pierre Azoulay, Columbia University
* http://www.stellman-greene.com/PublicationHarvester
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with
* this program (GPL.txt); if not, write to the Free Software Foundation, Inc., 51
* Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
using System;
using System.Collections.Generic;
using System.Text;
using System.Xml;
using System.Net;
using System.IO;
namespace Com.StellmanGreene.PubMed
{
///
/// The NCBI class issues web queries to the NCBI server. The NCBI server
/// expects two queries. First, it wants a query to the esearch page, which
/// finds the data and puts it in a cache on the server. The second query
/// to efetch retrieves that data in the format specified by FetchMethod.
///
public class NCBI
{
public static bool UsePostRequest { get; set; }
private string FetchMethod;
///
/// Constructor
///
/// The fetch method ("docsum", "medline", "xml", etc.)
public NCBI(string FetchMethod)
{
this.FetchMethod = FetchMethod;
NCBI.UsePostRequest = false;
}
///
/// Execute a query against NCBI
/// This is a virtual function because MockNCBI must override it
///
/// The query string to search for
/// The results of the search in the format specified when the instance was initializd
public virtual string Search(string Query)
{
EsearchResults esearchResults = ExecuteEsearch(Query);
return ExecuteFetch(esearchResults);
}
///
/// Issue the first NCBI query to initialize the search.
///
/// The Medline query to issue
/// A string containing the XML result header
private static EsearchResults ExecuteEsearch(string Query)
{
string sURL = "http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi";
WebRequest request = null;
// If we're using a GET request (default) instead of a POST request, create the query with the request term
if (!UsePostRequest)
{
sURL += "?db=Pubmed&retmax=1&usehistory=y&term=";
sURL += Query;
request = WebRequest.Create(sURL);
}
else
{
// Create the POST request
request = WebRequest.Create(sURL);
request.Method = "POST";
request.ContentType = "application/x-www-form-urlencoded";
byte[] byteArray = UTF8Encoding.UTF8.GetBytes("db=Pubmed&retmax=1&usehistory=y&term=" + Query);
request.ContentLength = byteArray.Length;
using (Stream dataStream = request.GetRequestStream())
{
dataStream.Write(byteArray, 0, byteArray.Length);
}
}
using (WebResponse response = request.GetResponse())
using (Stream responseStream = response.GetResponseStream())
using (StreamReader reader = new StreamReader(responseStream))
{
string ResultString = reader.ReadToEnd();
return ParseSearchResults(ResultString);
}
}
///
/// Issue the second NCBI query to fetch the results.
///
/// The results of the first query.
/// A string containing the results in NCBI text format
private string ExecuteFetch(EsearchResults results)
{
string sURL = "http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?rettype=" + this.FetchMethod + "&retmode=text&restart=0&db=Pubmed";
sURL = sURL + "&retmax=" + results.Count;
sURL = sURL + "&query_key=" + results.QueryKey;
sURL = sURL + "&WebEnv=" + results.WebEnv;
WebRequest request = WebRequest.Create(sURL);
using (WebResponse response = request.GetResponse())
using (Stream responseStream = response.GetResponseStream())
using (StreamReader reader = new StreamReader(responseStream))
{
string Results = reader.ReadToEnd();
return Results;
}
}
///
/// Parse the results from ExecuteFetch()
///
/// The string containing the XML returned by the NCBI server
///
internal static EsearchResults ParseSearchResults(string ResultString)
{
XmlDocument xml = new XmlDocument();
try
{
xml.LoadXml(ResultString);
}
catch
{
// If the XML is malformed, write it to a log file called pubharvester_error.log and throw an exception
using (StreamWriter writer = new StreamWriter(Environment.CurrentDirectory + "\\pubharvester_error.log", true))
{
writer.WriteLine("XML data received " + System.DateTime.Now.ToString());
writer.WriteLine("--- begin data ---");
writer.WriteLine(ResultString);
writer.WriteLine("--- end data ---");
writer.WriteLine();
writer.WriteLine();
}
throw new Exception("Unable to process XML returned by the NCBI server. Offending XML has been written to pubharvester_error.log.");
}
// Build the EsearchResults
NCBI.EsearchResults results = new NCBI.EsearchResults();
if (xml.DocumentElement.FirstChild.Name == "ERROR")
{
// No results were found
results.Found = false;
results.Count = 0;
results.WebEnv = "";
return results;
}
// Go through the XML and pull out WebEnv, QueryKey and Count
int i;
for (i = 0; i <= xml.DocumentElement.ChildNodes.Count - 1; i++)
{
if (xml.DocumentElement.ChildNodes.Item(i).Name == "WebEnv")
{
results.WebEnv = xml.DocumentElement.ChildNodes.Item(i).InnerText;
}
else if (xml.DocumentElement.ChildNodes.Item(i).Name == "QueryKey")
{
results.QueryKey = System.Convert.ToInt32(xml.DocumentElement.ChildNodes.Item(i).InnerText);
}
else if (xml.DocumentElement.ChildNodes.Item(i).Name == "Count")
{
results.Count = System.Convert.ToInt32(xml.DocumentElement.ChildNodes.Item(i).InnerText);
}
}
// Set Found to true if there were results found
if (results.Count > 0)
{
results.Found = true;
}
else
{
results.Found = false;
}
return results;
}
/// EsearchResults is returned by ExecuteEsearch(), and used as input for ExecuteFetch()
/// The NCBI web search requires two or more queries. The first one feeds it the search terms, while the rest of the queries page through the results.
///
internal class EsearchResults
{
///
/// Count contains the number of results found.
///
private int _count;
public int Count
{
get { return _count; }
set { _count = value; }
}
///
/// Found is set to false if no results are returned.
///
private bool _found;
public bool Found
{
get { return _found; }
set { _found = value; }
}
///
/// The query key is a parameter returned by NCBI to identify the results.
///
private int _queryKey;
public int QueryKey
{
get { return _queryKey; }
set { _queryKey = value; }
}
///
/// WebEnv is a parameter returned by NCBI to identify the results.
///
private string _webEnv;
public string WebEnv
{
get { return _webEnv; }
set { _webEnv = value; }
}
}
}
}