/* * Publication Harvester * Copyright (c) 2003-2006 Stellman & Greene Consulting * Developed for Joshua Zivin and Pierre Azoulay, Columbia University * http://www.stellman-greene.com/PublicationHarvester * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program (GPL.txt); if not, write to the Free Software Foundation, Inc., 51 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ using System; using System.Collections.Generic; using System.Text; using System.Xml; using System.Net; using System.IO; namespace Com.StellmanGreene.PubMed { /// /// The NCBI class issues web queries to the NCBI server. The NCBI server /// expects two queries. First, it wants a query to the esearch page, which /// finds the data and puts it in a cache on the server. The second query /// to efetch retrieves that data in the format specified by FetchMethod. /// public class NCBI { public static bool UsePostRequest { get; set; } private string FetchMethod; /// /// Constructor /// /// The fetch method ("docsum", "medline", "xml", etc.) public NCBI(string FetchMethod) { this.FetchMethod = FetchMethod; NCBI.UsePostRequest = false; } /// /// Execute a query against NCBI /// This is a virtual function because MockNCBI must override it /// /// The query string to search for /// The results of the search in the format specified when the instance was initializd public virtual string Search(string Query) { EsearchResults esearchResults = ExecuteEsearch(Query); return ExecuteFetch(esearchResults); } /// /// Issue the first NCBI query to initialize the search. /// /// The Medline query to issue /// A string containing the XML result header private static EsearchResults ExecuteEsearch(string Query) { string sURL = "http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"; WebRequest request = null; // If we're using a GET request (default) instead of a POST request, create the query with the request term if (!UsePostRequest) { sURL += "?db=Pubmed&retmax=1&usehistory=y&term="; sURL += Query; request = WebRequest.Create(sURL); } else { // Create the POST request request = WebRequest.Create(sURL); request.Method = "POST"; request.ContentType = "application/x-www-form-urlencoded"; byte[] byteArray = UTF8Encoding.UTF8.GetBytes("db=Pubmed&retmax=1&usehistory=y&term=" + Query); request.ContentLength = byteArray.Length; using (Stream dataStream = request.GetRequestStream()) { dataStream.Write(byteArray, 0, byteArray.Length); } } using (WebResponse response = request.GetResponse()) using (Stream responseStream = response.GetResponseStream()) using (StreamReader reader = new StreamReader(responseStream)) { string ResultString = reader.ReadToEnd(); return ParseSearchResults(ResultString); } } /// /// Issue the second NCBI query to fetch the results. /// /// The results of the first query. /// A string containing the results in NCBI text format private string ExecuteFetch(EsearchResults results) { string sURL = "http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?rettype=" + this.FetchMethod + "&retmode=text&restart=0&db=Pubmed"; sURL = sURL + "&retmax=" + results.Count; sURL = sURL + "&query_key=" + results.QueryKey; sURL = sURL + "&WebEnv=" + results.WebEnv; WebRequest request = WebRequest.Create(sURL); using (WebResponse response = request.GetResponse()) using (Stream responseStream = response.GetResponseStream()) using (StreamReader reader = new StreamReader(responseStream)) { string Results = reader.ReadToEnd(); return Results; } } /// /// Parse the results from ExecuteFetch() /// /// The string containing the XML returned by the NCBI server /// internal static EsearchResults ParseSearchResults(string ResultString) { XmlDocument xml = new XmlDocument(); try { xml.LoadXml(ResultString); } catch { // If the XML is malformed, write it to a log file called pubharvester_error.log and throw an exception using (StreamWriter writer = new StreamWriter(Environment.CurrentDirectory + "\\pubharvester_error.log", true)) { writer.WriteLine("XML data received " + System.DateTime.Now.ToString()); writer.WriteLine("--- begin data ---"); writer.WriteLine(ResultString); writer.WriteLine("--- end data ---"); writer.WriteLine(); writer.WriteLine(); } throw new Exception("Unable to process XML returned by the NCBI server. Offending XML has been written to pubharvester_error.log."); } // Build the EsearchResults NCBI.EsearchResults results = new NCBI.EsearchResults(); if (xml.DocumentElement.FirstChild.Name == "ERROR") { // No results were found results.Found = false; results.Count = 0; results.WebEnv = ""; return results; } // Go through the XML and pull out WebEnv, QueryKey and Count int i; for (i = 0; i <= xml.DocumentElement.ChildNodes.Count - 1; i++) { if (xml.DocumentElement.ChildNodes.Item(i).Name == "WebEnv") { results.WebEnv = xml.DocumentElement.ChildNodes.Item(i).InnerText; } else if (xml.DocumentElement.ChildNodes.Item(i).Name == "QueryKey") { results.QueryKey = System.Convert.ToInt32(xml.DocumentElement.ChildNodes.Item(i).InnerText); } else if (xml.DocumentElement.ChildNodes.Item(i).Name == "Count") { results.Count = System.Convert.ToInt32(xml.DocumentElement.ChildNodes.Item(i).InnerText); } } // Set Found to true if there were results found if (results.Count > 0) { results.Found = true; } else { results.Found = false; } return results; } /// EsearchResults is returned by ExecuteEsearch(), and used as input for ExecuteFetch() /// The NCBI web search requires two or more queries. The first one feeds it the search terms, while the rest of the queries page through the results. /// internal class EsearchResults { /// /// Count contains the number of results found. /// private int _count; public int Count { get { return _count; } set { _count = value; } } /// /// Found is set to false if no results are returned. /// private bool _found; public bool Found { get { return _found; } set { _found = value; } } /// /// The query key is a parameter returned by NCBI to identify the results. /// private int _queryKey; public int QueryKey { get { return _queryKey; } set { _queryKey = value; } } /// /// WebEnv is a parameter returned by NCBI to identify the results. /// private string _webEnv; public string WebEnv { get { return _webEnv; } set { _webEnv = value; } } } } }