/* * Publication Harvester * Copyright (c) 2003-2006 Stellman & Greene Consulting * Developed for Joshua Zivin and Pierre Azoulay, Columbia University * http://www.stellman-greene.com/PublicationHarvester * * This program is free software; you can redistribute it and/or modify it under * the terms of the GNU General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) any later * version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * this program (GPL.txt); if not, write to the Free Software Foundation, Inc., 51 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ using System; using System.Collections.Generic; using System.Text; using System.Data; using NUnit.Framework; namespace Com.StellmanGreene.PubMed.Unit_Tests { /// /// Test the Publication() class /// Note: GetAuthorPosition is verified in TestHarvester /// [TestFixture] public class TestPublication { private PublicationTypes pubTypes; [TestFixtureSetUp] public void TestPublicationSetUp() { pubTypes = new PublicationTypes( AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes", "PublicationTypes.csv" ); } [Test] public void TestPMID() { Publication p = new Publication(); Publications.ProcessMedlineTag(ref p, "PMID- 16319490", pubTypes); Assert.IsTrue(p.PMID == 16319490); } [Test] public void TestPublicationDate() { Publication p = new Publication(); // Year will always be set, so don't test for null Assert.IsNull(p.Month); Assert.IsNull(p.Day); Publications.ProcessMedlineTag(ref p, "DP - 2005 Nov 24", pubTypes); Assert.IsTrue(p.Year == 2005); Assert.IsTrue(p.Month == "Nov"); Assert.IsTrue(p.Day == "24"); } [Test] public void TestTitle() { Publication p = new Publication(); Assert.IsNull(p.Title); Publications.ProcessMedlineTag(ref p, "TI - The Bias Introduced by Population Stratification in IBD Based Linkage Analysis.", pubTypes); Assert.IsTrue(p.Title == "The Bias Introduced by Population Stratification in IBD Based Linkage Analysis."); } [Test] public void TestJournal() { Publication p = new Publication(); Assert.IsNull(p.Journal); Publications.ProcessMedlineTag(ref p, "TA - Hum Hered", pubTypes); Assert.IsTrue(p.Journal == "Hum Hered"); } [Test] public void TestAuthors() { Publication p = new Publication(); Assert.IsNull(p.Authors); Publications.ProcessMedlineTag(ref p, "AU - Wang T", pubTypes); Assert.IsTrue(p.Authors.Length == 1); Assert.IsTrue(p.Authors[0] == "Wang T"); Publications.ProcessMedlineTag(ref p, "AU - Elston RC", pubTypes); Assert.IsTrue(p.Authors.Length == 2); Assert.IsTrue(p.Authors[1] == "Elston RC"); } [Test] public void TestVolume() { Publication p = new Publication(); Assert.IsNull(p.Volume); Publications.ProcessMedlineTag(ref p, "VI - 60", pubTypes); Assert.IsTrue(p.Volume == "60"); } [Test] public void TestIssue() { Publication p = new Publication(); Assert.IsNull(p.Issue); Publications.ProcessMedlineTag(ref p, "IP - 3", pubTypes); Assert.IsTrue(p.Issue == "3"); } [Test] public void TestPages() { Publication p = new Publication(); Assert.IsNull(p.Pages); Publications.ProcessMedlineTag(ref p, "PG - 134-142", pubTypes); Assert.IsTrue(p.Pages == "134-142"); } [Test] public void TestGrantID() { Publication p = new Publication(); Assert.IsNull(p.Grants); Publications.ProcessMedlineTag(ref p, "GR - GM 28356/GM/NIGMS", pubTypes); Assert.IsTrue(p.Grants.Contains("GM 28356/GM/NIGMS")); } [Test] public void TestLongGrantID() { Publication p = new Publication(); Assert.IsNull(p.Grants); Publications.ProcessMedlineTag(ref p, "GR - GM 28356/GM/NIGMS", pubTypes); Publications.ProcessMedlineTag(ref p, "GR - AM18811/AM/NIADDK", pubTypes); Publications.ProcessMedlineTag(ref p, "GR - HD09690/HD/NICHD", pubTypes); Publications.ProcessMedlineTag(ref p, "GR - AG11624/AG/NIA", pubTypes); Publications.ProcessMedlineTag(ref p, "GR - AG20557/AG/NIA", pubTypes); Assert.IsTrue(p.Grants.Count == 5); foreach (string Grant in new string[] { "GM 28356/GM/NIGMS", "AM18811/AM/NIADDK", "HD09690/HD/NICHD", "AG11624/AG/NIA", "AG20557/AG/NIA" } ) { Assert.IsTrue(p.Grants.Contains(Grant)); } } [Test] public void TestPubType() { Publication p = new Publication(); Assert.IsNull(p.PubType); Publications.ProcessMedlineTag(ref p, "PT - Clinical Trial", pubTypes); Assert.IsTrue(p.PubType == "Clinical Trial"); Publications.ProcessMedlineTag(ref p, "PT - Journal Article", pubTypes); Assert.IsTrue(p.PubType == "Clinical Trial"); } [Test] public void TestMeSHHeadings() { Publication p = new Publication(); Assert.IsNull(p.MeSHHeadings); Publications.ProcessMedlineTag(ref p, "MH - Case-Control Studies", pubTypes); Assert.IsTrue(p.MeSHHeadings.Count == 1); Assert.IsTrue(p.MeSHHeadings.Contains("Case-Control Studies")); Publications.ProcessMedlineTag(ref p, "MH - Charcoal/*adverse effects", pubTypes); Assert.IsTrue(p.MeSHHeadings.Count == 2); Assert.IsTrue(p.MeSHHeadings.Contains("Charcoal/*adverse effects")); } private Publication CreateTestPublication() { string MedlineData = @"PMID- 15904469 OWN - NLM STAT- MEDLINE DA - 20050520 DCOM- 20050728 PUBM- Print IS - 1347-9032 VI - 96 IP - 5 DP - 2005 May TI - Charcoal cigarette filters and lung cancer risk in Aichi Prefecture, Japan. PG - 283-7 AB - The lung cancer mortality rate has been lower in Japan than in the United States for several decades. We hypothesized that this difference is due to the Japanese preference for cigarettes with charcoal-containing filters, which efficiently absorb selected gas phase components of mainstream smoke including the carcinogen 4-(methylnitrosamino)-1-(3-pyridyl)-1-butanone. We analyzed a subset of smokers (396 cases and 545 controls) from a case-control study of lung cancer conducted in Aichi Prefecture, Japan. The risk associated with charcoal filters (73% of all subjects) was evaluated after adjusting for age, sex, education and smoking dose. The odds ratio (OR) associated with charcoal compared with 'plain' cigarette filters was 1.2 (95% confidence intervals [CI] 0.9, 1.6). The histologic-specific risks were similar (e.g. OR = 1.3, 95% CI 0.9, 2.1 for adenocarcinoma). The OR was 1.7 (95% CI 1.1, 2.9) in smokers who switched from 'plain' to charcoal brands. The mean daily number of cigarettes smoked in subjects who switched from 'plain' to charcoal brands was 22.5 and 23.0, respectively. The findings from this study did not indicate that charcoal filters were associated with an attenuated risk of lung cancer. As the detection of a modest benefit or risk (e.g. 10-20%) that can have significant public health impact requires large samples, the findings should be confirmed or refuted in larger studies. AD - Department of Health Evaluation Sciences, Penn State Cancer Institute, Division of Population Sciences, Penn State College of Medicine, Hershey, PA 17033, USA. FAU - Muscat, Joshua E AU - Muscat JE FAU - Takezaki, Toshiro AU - Takezaki T FAU - Tajima, Kazuo AU - Tajima K FAU - Stellman, Steven D AU - Stellman SD LA - eng GR - CA-17613/CA/NCI GR - CA-68387/CA/NCI PT - Clinical Trial PT - Journal Article PL - England TA - Cancer Sci JID - 101168776 RN - 16291-96-6 (Charcoal) SB - IM MH - Adult MH - Aged MH - Aged, 80 and over MH - Case-Control Studies MH - Charcoal/*adverse effects MH - Female MH - Filtration/*utilization MH - Humans MH - Japan MH - Lung Neoplasms/*etiology/pathology MH - Male MH - Middle Aged MH - Research Support, N.I.H., Extramural MH - Research Support, U.S. Gov't, Non-P.H.S. MH - Research Support, U.S. Gov't, P.H.S. MH - Risk Factors MH - Smoking/*adverse effects EDAT- 2005/05/21 09:00 MHDA- 2005/07/29 09:00 AID - CAS045 [pii] AID - 10.1111/j.1349-7006.2005.00045.x [doi] PST - ppublish SO - Cancer Sci 2005 May;96(5):283-7."; PublicationTypes ptc = new PublicationTypes( AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes", "PublicationTypes.csv" ); Publications Reader = new Publications(MedlineData, ptc); Assert.IsTrue(Reader.PublicationList.Length == 1); return Reader.PublicationList[0]; } /// /// Test that a person contains the publication created with CreateTestPublication /// /// Person to test private void TestCreatedPublication(Publication p) { Assert.IsTrue(p.PMID == 15904469); Assert.IsTrue(p.Year == 2005); Assert.IsTrue(p.Month == "May"); Assert.IsNull(p.Day); Assert.IsTrue(p.Volume == "96"); Assert.IsTrue(p.Issue == "5"); Assert.IsTrue(p.Title == "Charcoal cigarette filters and lung cancer risk in Aichi Prefecture, Japan."); Assert.IsTrue(p.Pages == "283-7"); Assert.IsTrue(p.Authors.Length == 4); Assert.IsTrue(p.Authors[0] == "Muscat JE"); Assert.IsTrue(p.Authors[3] == "Stellman SD"); Assert.IsTrue(p.Grants.Count == 2); Assert.IsTrue(p.Grants.Contains("CA-17613/CA/NCI")); Assert.IsTrue(p.Grants.Contains("CA-68387/CA/NCI")); Assert.IsTrue(p.Journal == "Cancer Sci"); Assert.IsTrue(p.MeSHHeadings.Count == 17); Assert.IsTrue(p.MeSHHeadings.Contains("Adult")); Assert.IsTrue(p.MeSHHeadings.Contains("Smoking/*adverse effects")); } [Test] public void TestWholePublication() { Publication p = CreateTestPublication(); TestCreatedPublication(p); } /// /// Verify that a publication is written to the database properly /// [Test] public void TestWriteToDB() { // Create a new database and write the publication to it Database DB = new Database("Publication Harvester Unit Test"); Harvester h = new Harvester(DB); h.CreateTables(); Publication p = CreateTestPublication(); PublicationTypes ptc = new PublicationTypes( AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes", "PublicationTypes.csv" ); Publications.WriteToDB(p, DB, ptc, new string[] {"eng"}); // Verify the publication has been added DataTable Results = DB.ExecuteQuery( @"SELECT Journal, Year, Authors, Month, Day, Title, Volume, Issue, Pages, PubType, PubTypeCategoryID FROM Publications WHERE PMID = 15904469" ); Assert.IsTrue(Results.Rows[0]["Year"].Equals(2005)); Assert.IsTrue(Results.Rows[0]["Authors"].Equals(4)); Assert.IsTrue(Results.Rows[0]["Year"].Equals(2005)); Assert.IsTrue(Results.Rows[0]["Month"].Equals("May")); Assert.IsTrue(Results.Rows[0]["Day"].Equals(DBNull.Value)); Assert.IsTrue(Results.Rows[0]["Title"].Equals("Charcoal cigarette filters and lung cancer risk in Aichi Prefecture, Japan.")); Assert.IsTrue(Results.Rows[0]["Volume"].Equals("96")); Assert.IsTrue(Results.Rows[0]["Issue"].Equals("5")); Assert.IsTrue(Results.Rows[0]["Pages"].Equals("283-7")); Assert.IsTrue(Results.Rows[0]["PubType"].Equals("Clinical Trial")); Assert.IsTrue(Results.Rows[0]["PubTypeCategoryID"].Equals((short) 1)); // Verify the authors Assert.IsTrue(DB.GetIntValue("SELECT Count(*) FROM PublicationAuthors") == 4); Results = DB.ExecuteQuery("SELECT PMID, Position, Author, First, Last FROM PublicationAuthors ORDER BY Position"); Assert.IsTrue(Results.Rows[0]["PMID"].Equals(15904469)); Assert.IsTrue(Results.Rows[1]["PMID"].Equals(15904469)); Assert.IsTrue(Results.Rows[2]["PMID"].Equals(15904469)); Assert.IsTrue(Results.Rows[3]["PMID"].Equals(15904469)); Assert.IsTrue(Results.Rows[0]["Position"].Equals(1)); Assert.IsTrue(Results.Rows[1]["Position"].Equals(2)); Assert.IsTrue(Results.Rows[2]["Position"].Equals(3)); Assert.IsTrue(Results.Rows[3]["Position"].Equals(4)); Assert.IsTrue(Results.Rows[0]["Author"].ToString() == "Muscat JE"); Assert.IsTrue(Results.Rows[1]["Author"].ToString() == "Takezaki T"); Assert.IsTrue(Results.Rows[2]["Author"].ToString() == "Tajima K"); Assert.IsTrue(Results.Rows[3]["Author"].ToString() == "Stellman SD"); Assert.IsTrue(Results.Rows[0]["First"].Equals((short) 1)); Assert.IsTrue(Results.Rows[1]["First"].Equals((short) 0)); Assert.IsTrue(Results.Rows[2]["First"].Equals((short) 0)); Assert.IsTrue(Results.Rows[3]["First"].Equals((short) 0)); Assert.IsTrue(Results.Rows[0]["Last"].Equals((short) 0)); Assert.IsTrue(Results.Rows[1]["Last"].Equals((short) 0)); Assert.IsTrue(Results.Rows[2]["Last"].Equals((short) 0)); Assert.IsTrue(Results.Rows[3]["Last"].Equals((short) 1)); // Verifying the MeSH headings // Note the join to PublicationMeSHHeadings to make sure it was populated properly Assert.IsTrue(DB.GetIntValue("SELECT Count(*) FROM PublicationMeSHHeadings WHERE PMID = " + p.PMID.ToString()) == 17); Results = DB.ExecuteQuery( @"SELECT m.Heading FROM MeSHHeadings m, PublicationMeSHHeadings p WHERE p.MeSHHeadingID = m.ID AND p.PMID = " + p.PMID.ToString() ); Assert.IsTrue(Results.Rows.Count == 17); Assert.IsTrue(Results.Rows[0]["Heading"].ToString() == "Adult"); Assert.IsTrue(Results.Rows[5]["Heading"].ToString() == "Female"); Assert.IsTrue(Results.Rows[9]["Heading"].ToString() == "Lung Neoplasms/*etiology/pathology"); Assert.IsTrue(Results.Rows[16]["Heading"].ToString() == "Smoking/*adverse effects"); // Verify the grants Assert.IsTrue(DB.GetIntValue("SELECT Count(*) FROM PublicationGrants WHERE PMID = " + p.PMID.ToString()) == 2); Results = DB.ExecuteQuery( @"SELECT GrantID FROM PublicationGrants WHERE PMID = " + p.PMID.ToString() + " ORDER BY GrantID ASC" ); Assert.IsTrue(Results.Rows[0]["GrantID"].ToString() == "CA-17613/CA/NCI"); Assert.IsTrue(Results.Rows[1]["GrantID"].ToString() == "CA-68387/CA/NCI"); } /// /// Test the constructor that takes a database and a PMID and reads /// the publications for that person /// [Test] public void TestCreateFromDatabase() { // Add a publication to the database Database DB = new Database("Publication Harvester Unit Test"); Harvester harvester = new Harvester(DB); harvester.CreateTables(); Publication write = CreateTestPublication(); PublicationTypes ptc = new PublicationTypes( AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes", "PublicationTypes.csv" ); Publications.WriteToDB(write, DB, ptc, new string[] { "eng" }); // Read it back Publication read; Assert.IsTrue(Publications.GetPublication(DB, 15904469, out read, false)); TestCreatedPublication(read); } /// /// For PMID 14332364, there are 3 authors. MATHIES is the 2nd author. The software /// should count him as a "middle author" pub, not a "second author" pub, because /// there are only 3 authors. /// [Test] public void TestMiddleAuthor() { string MedlineData = @"PMID- 14332364 OWN - NLM STAT- OLDMEDLINE DA - 19651101 DCOM- 19961201 LR - 20051116 PUBM- Print IS - 0002-922X (Print) VI - 110 DP - 1965 Sep TI - CONGENITAL SCALP DEFECTS IN TWIN SISTERS. PG - 293-5 FAU - HODGMAN, J E AU - HODGMAN JE FAU - MATHIES, A W Jr AU - MATHIES AW Jr FAU - LEVAN, N E AU - LEVAN NE LA - eng PT - Journal Article PL - UNITED STATES TA - Am J Dis Child JT - American journal of diseases of children (1960) JID - 0370471 SB - OM MH - *Abnormalities MH - *Diseases in Twins MH - *Infant, Newborn MH - *Scalp OTO - NLM OT - *ABNORMALITIES OT - *DISEASES IN TWINS OT - *INFANT, NEWBORN OT - *SCALP EDAT- 1965/09/01 MHDA- 1965/09/01 00:01 PST - ppublish SO - Am J Dis Child. 1965 Sep;110:293-5."; // Add a person to the database Database DB = new Database("Publication Harvester Unit Test"); Person Mathies = new Person("A5703161", "Allan", "W", "MATHIES", false, new String[] { "MATHIES AW Jr" }, "MATHIES AW Jr[au]"); // Create the publication PublicationTypes ptc = new PublicationTypes( AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes", "PublicationTypes.csv" ); Publications Pubs = new Publications(MedlineData, ptc); Assert.IsTrue(Pubs.PublicationList.Length == 1); Publication Pub = Pubs.PublicationList[0]; Assert.IsTrue(Pub.Authors[1] == "MATHIES AW Jr"); // Clear the database Harvester harvester = new Harvester(DB); harvester.CreateTables(); // Write the person and publication to the database // NOTE: The author position is determined in WritePeoplePublicationsToDB() PublicationTypes PubTypes = new PublicationTypes( AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes", "PublicationTypes.csv" ); Mathies.WriteToDB(DB); Publications.WriteToDB(Pub, DB, PubTypes, new string[] { "eng" }); Publications.WritePeoplePublicationsToDB(DB, Mathies, Pub); // Verify that Mathies is the middle author, not the second author Publications PubsFromDB = new Publications(DB, Mathies, false); Assert.IsTrue(PubsFromDB.PublicationList.Length == 1); Assert.IsTrue(PubsFromDB.PublicationList[0].PMID == 14332364); Harvester.AuthorPositions PositionType; Assert.IsTrue(Publications.GetAuthorPosition(DB, 14332364, Mathies, out PositionType, "PeoplePublications") == 2); Assert.IsTrue(PositionType == Harvester.AuthorPositions.Middle); } } }