/* * Publication Harvester * Copyright (c) 2003-2006 Stellman & Greene Consulting * Developed for Joshua Zivin and Pierre Azoulay, Columbia University * http://www.stellman-greene.com/PublicationHarvester * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program (GPL.txt); if not, write to the Free Software Foundation, Inc., 51 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ using System; using System.Collections.Generic; using System.Text; using System.Xml; using System.Net; using System.IO; namespace Com.StellmanGreene.PubMed { /// /// The NCBI class issues web queries to the NCBI server. The NCBI server /// expects two queries. First, it wants a query to the esearch page, which /// finds the data and puts it in a cache on the server. The second query /// to efetch retrieves that data in the format specified by FetchMethod. /// public class NCBI { private string FetchMethod; /// /// Constructor /// /// The fetch method ("docsum", "medline", "xml", etc.) public NCBI(string FetchMethod) { this.FetchMethod = FetchMethod; } /// /// Execute a query against NCBI /// This is a virtual function because MockNCBI must override it /// /// The query string to search for /// The results of the search in the format specified when the instance was initializd public virtual string Search(string Query) { EsearchResults esearchResults = ExecuteEsearch(Query); return ExecuteFetch(esearchResults); } /// /// Issue the first NCBI query to initialize the search. /// /// The Medline query to issue /// A string containing the XML result header private static EsearchResults ExecuteEsearch(string Query) { string sURL = "http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=Pubmed&retmax=1&usehistory=y&term="; sURL += Query; WebRequest request = WebRequest.Create(sURL); WebResponse response = request.GetResponse(); Stream responseStream = response.GetResponseStream(); StreamReader reader = new StreamReader(responseStream); string ResultString = reader.ReadToEnd(); return ParseSearchResults(ResultString); } /// /// Issue the second NCBI query to fetch the results. /// /// The results of the first query. /// A string containing the results in NCBI text format private string ExecuteFetch(EsearchResults results) { string sURL = "http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?rettype=" + this.FetchMethod + "&retmode=text&restart=0&db=Pubmed"; sURL = sURL + "&retmax=" + results.Count; sURL = sURL + "&query_key=" + results.QueryKey; sURL = sURL + "&WebEnv=" + results.WebEnv; WebRequest request = WebRequest.Create(sURL); WebResponse response = request.GetResponse(); Stream responseStream = response.GetResponseStream(); StreamReader reader = new StreamReader(responseStream); string Results = reader.ReadToEnd(); return Results; } /// /// Parse the results from ExecuteFetch() /// /// The string containing the XML returned by the NCBI server /// internal static EsearchResults ParseSearchResults(string ResultString) { XmlDocument xml = new XmlDocument(); try { xml.LoadXml(ResultString); } catch { // If the XML is malformed, write it to a log file called pubharvester_error.log and throw an exception StreamWriter writer = new StreamWriter(Environment.CurrentDirectory + "\\pubharvester_error.log", true); writer.WriteLine("XML data received " + System.DateTime.Now.ToString()); writer.WriteLine("--- begin data ---"); writer.WriteLine(ResultString); writer.WriteLine("--- end data ---"); writer.WriteLine(); writer.WriteLine(); writer.Close(); throw new Exception("Unable to process XML returned by the NCBI server. Offending XML has been written to pubharvester_error.log."); } // Build the EsearchResults NCBI.EsearchResults results = new NCBI.EsearchResults(); if (xml.DocumentElement.FirstChild.Name == "ERROR") { // No results were found results.Found = false; results.Count = 0; results.WebEnv = ""; return results; } // Go through the XML and pull out WebEnv, QueryKey and Count int i; for (i = 0; i <= xml.DocumentElement.ChildNodes.Count - 1; i++) { if (xml.DocumentElement.ChildNodes.Item(i).Name == "WebEnv") { results.WebEnv = xml.DocumentElement.ChildNodes.Item(i).InnerText; } else if (xml.DocumentElement.ChildNodes.Item(i).Name == "QueryKey") { results.QueryKey = System.Convert.ToInt32(xml.DocumentElement.ChildNodes.Item(i).InnerText); } else if (xml.DocumentElement.ChildNodes.Item(i).Name == "Count") { results.Count = System.Convert.ToInt32(xml.DocumentElement.ChildNodes.Item(i).InnerText); } } // Set Found to true if there were results found if (results.Count > 0) { results.Found = true; } else { results.Found = false; } return results; } /// EsearchResults is returned by ExecuteEsearch(), and used as input for ExecuteFetch() /// The NCBI web search requires two or more queries. The first one feeds it the search terms, while the rest of the queries page through the results. /// internal class EsearchResults { /// /// Count contains the number of results found. /// private int _count; public int Count { get { return _count; } set { _count = value; } } /// /// Found is set to false if no results are returned. /// private bool _found; public bool Found { get { return _found; } set { _found = value; } } /// /// The query key is a parameter returned by NCBI to identify the results. /// private int _queryKey; public int QueryKey { get { return _queryKey; } set { _queryKey = value; } } /// /// WebEnv is a parameter returned by NCBI to identify the results. /// private string _webEnv; public string WebEnv { get { return _webEnv; } set { _webEnv = value; } } } } }