/*
* Publication Harvester
* Copyright (c) 2003-2006 Stellman & Greene Consulting
* Developed for Joshua Zivin and Pierre Azoulay, Columbia University
* http://www.stellman-greene.com/PublicationHarvester
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with
* this program (GPL.txt); if not, write to the Free Software Foundation, Inc., 51
* Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using System.Data;
using NUnit.Framework;
namespace Com.StellmanGreene.PubMed.Unit_Tests
{
///
/// Test the harvester class
/// Note that CreateTables() is tested thoroughly in the other unit tests
///
[TestFixture]
public class TestHarvester
{
///
/// Set up the database with data from Input1.XLS using the Mock NCBI object
/// (this is also called from TestReports())
///
/// True if the MockNCBI object is supposed to throw an error
public static void GetPublicationsFromInput1XLS_Using_MockNCBI(bool NCBISearchThrowsAnError, string[] Languages, int ExpectedPublications)
{
bool TablesCreated;
int NumPeople;
int NumHarvestedPeople;
int NumPublications;
int NumErrors;
Database DB = new Database("Publication Harvester Unit Test");
// Drop all tables and make sure the database reports as empty
foreach (string Table in new string[] {
"meshheadings", "people", "peoplepublications", "publicationauthors",
"publicationmeshheadings", "publications", "pubtypecategories"
})
{
DB.ExecuteNonQuery("DROP TABLE IF EXISTS " + Table);
DB.GetStatus(out TablesCreated, out NumPeople, out NumHarvestedPeople, out NumPublications, out NumErrors);
Assert.IsFalse(TablesCreated);
Assert.AreEqual(NumPeople, 0);
Assert.AreEqual(NumHarvestedPeople, 0);
Assert.AreEqual(NumPublications, 0);
Assert.AreEqual(NumErrors, 0);
}
// Create and populate the tables
Harvester harvester = new Harvester(DB);
harvester.Languages = Languages;
MockNCBI mockNCBI = new MockNCBI("medline");
mockNCBI.SearchThrowsAnError = NCBISearchThrowsAnError;
PublicationTypes ptc = new PublicationTypes(
AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes",
"PublicationTypes.csv"
);
// Reinitialize the database
harvester.CreateTables();
ptc.WriteToDB(DB);
// Retrieve the publications for each person in input1.xls using GetPublications()
People PeopleFromFile = new People(
AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPeople",
"input1.xls");
// Make an anonymous callback function that keeps track of the callback data
int Callbacks = 0; // this will count all of the publications
Harvester.GetPublicationsStatus StatusCallback = delegate(int number, int total, int averageTime)
{
Callbacks++;
};
// Make an anonymous callback function to do nothing for GetPublicationsMessage
Harvester.GetPublicationsMessage MessageCallback = delegate(string Message, bool StatusBarOnly)
{
//
};
// Make an anonymous callback function to return false for CheckForInterrupt
Harvester.CheckForInterrupt InterruptCallback = delegate()
{
return false;
};
// Verify that the database was created and populated properly
DB.GetStatus(out TablesCreated, out NumPeople, out NumHarvestedPeople, out NumPublications, out NumErrors);
Assert.IsTrue(TablesCreated);
Assert.AreEqual(NumPeople, 0);
Assert.AreEqual(NumHarvestedPeople, 0);
Assert.AreEqual(NumPublications, 0);
Assert.AreEqual(NumErrors, 0);
int PeopleCount = 0;
int HarvestedCount = 0;
int PubCount = 0;
foreach (Person person in PeopleFromFile.PersonList)
{
double AverageMilliseconds;
// First write the person to the database
person.WriteToDB(DB);
PeopleCount++;
// Check that the database status is updated properly
DB.GetStatus(out TablesCreated, out NumPeople, out NumHarvestedPeople, out NumPublications, out NumErrors);
Assert.IsTrue(TablesCreated);
Assert.AreEqual(NumPeople, PeopleCount);
if (!NCBISearchThrowsAnError)
Assert.AreEqual(NumHarvestedPeople, HarvestedCount);
else
Assert.AreEqual(NumHarvestedPeople, 0);
Assert.AreEqual(NumPublications, PubCount);
if (!NCBISearchThrowsAnError)
Assert.AreEqual(NumErrors, 0);
else
Assert.AreEqual(NumErrors, PeopleCount - 1);
// Harvest the person's publications
PubCount += harvester.GetPublications(mockNCBI, ptc, person, StatusCallback, MessageCallback, InterruptCallback, out AverageMilliseconds);
HarvestedCount++;
// Check the status again after the people were harvested
DB.GetStatus(out TablesCreated, out NumPeople, out NumHarvestedPeople, out NumPublications, out NumErrors);
Assert.IsTrue(TablesCreated);
Assert.AreEqual(NumPeople, PeopleCount);
if (!NCBISearchThrowsAnError)
Assert.AreEqual(NumHarvestedPeople, HarvestedCount);
else
Assert.AreEqual(NumHarvestedPeople, 0);
Assert.AreEqual(NumPublications, PubCount);
if (!NCBISearchThrowsAnError)
Assert.AreEqual(NumErrors, 0);
else
Assert.AreEqual(NumErrors, PeopleCount);
}
// Verify that the database was written properly
if (!NCBISearchThrowsAnError)
Assert.IsTrue(Callbacks == 24);
else
Assert.IsTrue(Callbacks == 0);
DB.GetStatus(out TablesCreated, out NumPeople, out NumHarvestedPeople, out NumPublications, out NumErrors);
Assert.IsTrue(TablesCreated);
Assert.AreEqual(NumPeople, 4);
if (!NCBISearchThrowsAnError)
{
Assert.AreEqual(NumHarvestedPeople, 4);
Assert.AreEqual(NumPublications, ExpectedPublications);
Assert.AreEqual(NumErrors, 0);
}
else
{
Assert.AreEqual(NumHarvestedPeople, 0);
Assert.AreEqual(NumPublications, 0);
Assert.AreEqual(NumErrors, 4);
}
}
///
/// Import people from a file, then verify they were written correcty
/// Use input1.xls from TestPeople
///
[Test]
public void GetPeopleFromInputXLS()
{
// Import input1.xls into the database
Database DB = new Database("Publication Harvester Unit Test");
Harvester harvester = new Harvester(DB);
harvester.CreateTables();
harvester.ImportPeople(AppDomain.CurrentDomain.BaseDirectory
+ "\\Unit Tests\\TestPeople\\input1.xls");
DataTable Results = DB.ExecuteQuery(
@"SELECT Setnb, First, Middle, Last, Name1, Name2, Name3, Name4,
MedlineSearch, Harvested, Error, ErrorMessage
FROM People"
);
// Test each person
for (int Row = 0; Row < Results.Rows.Count; Row++)
{
Person person = new Person(Results.Rows[Row], Results.Columns);
switch (person.Setnb)
{
case "A6009400":
Assert.IsTrue(person.First == "Jan");
Assert.IsTrue(person.Middle == "");
Assert.IsTrue(person.Last == "Van Eys");
Assert.IsTrue(person.Names.Length == 3);
Assert.IsTrue(person.Names[0] == "van eys j");
Assert.IsTrue(person.Names[1] == "vaneys j");
Assert.IsTrue(person.Names[2] == "eys jv");
Assert.IsTrue(person.MedlineSearch == "(\"van eys j\"[au] OR \"vaneys j\"[au] OR \"eys jv\"[au])");
break;
case "A5401532":
Assert.IsTrue(person.First == "Louis");
Assert.IsTrue(person.Middle == "");
Assert.IsTrue(person.Last == "Tobian");
Assert.IsTrue(person.Names.Length == 3);
Assert.IsTrue(person.Names[0] == "tobian l");
Assert.IsTrue(person.Names[1] == "tobian l jr");
Assert.IsTrue(person.Names[2] == "tobian lj");
Assert.IsTrue(person.MedlineSearch == "(\"tobian l\"[au] OR \"tobian l jr\"[au] OR \"tobian lj\"[au])");
break;
case "A5501586":
Assert.IsTrue(person.First == "Keith");
Assert.IsTrue(person.Middle == "B");
Assert.IsTrue(person.Last == "Reemtsma");
Assert.IsTrue(person.Names.Length == 4);
Assert.IsTrue(person.Names[0] == "reemtsma k");
Assert.IsTrue(person.Names[1] == "reemtsma kb");
Assert.IsTrue(person.Names[2] == "test data");
Assert.IsTrue(person.Names[3] == "more test data");
Assert.IsTrue(person.MedlineSearch == "((\"reemtsma k\"[au] OR \"reemtsma kb\"[au]) AND 1956:2000[dp])");
break;
case "A5702471":
Assert.IsTrue(person.First == "Roger");
Assert.IsTrue(person.Middle == "");
Assert.IsTrue(person.Last == "Guillemin");
Assert.IsTrue(person.Names.Length == 2);
Assert.IsTrue(person.Names[0] == "guillemin r");
Assert.IsTrue(person.Names[1] == "guillemin rc");
Assert.IsTrue(person.MedlineSearch ==
"(\"guillemin rc\"[au] OR (\"guillemin r\"[au] NOT (Electrodiagn Ther[ta] OR Phys Rev Lett[ta] OR vegas[ad] OR lindle[au])))"
);
break;
}
}
}
///
/// Test GetPublications using a mock NCBI object
///
[Test]
public void GetPublications()
{
// Set up the database
TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, new string[] { "eng" }, 22);
Database DB = new Database("Publication Harvester Unit Test");
PublicationTypes ptc = new PublicationTypes(
AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes",
"PublicationTypes.csv"
);
// Verify the correct publications were written (including publication type and author position)
People people = new People(DB);
Assert.IsTrue(people.PersonList.Count == 4);
int FoundPublications = 0;
foreach (Person person in people.PersonList)
{
Publications pubs = new Publications(DB, person, false);
switch (person.Setnb)
{
case "A6009400": // Van Eys
Assert.IsTrue(pubs.PublicationList.Length == 8);
foreach (Publication pub in pubs.PublicationList)
switch (pub.PMID)
{
case 9876482:
FoundPublications++;
Assert.IsTrue(pub.Title == "Benefits of nutritional intervention on nutritional status, quality of life and survival.");
Assert.IsTrue(pub.Pages == "66-8");
Assert.IsTrue(pub.Year == 1998);
Assert.IsTrue(pub.Month == null);
Assert.IsTrue(pub.Day == null);
Assert.IsTrue(pub.Journal == "Int J Cancer Suppl");
Assert.IsTrue(pub.Volume == "11");
Assert.IsTrue(pub.Issue == null);
Assert.IsTrue(pub.Authors.Length == 1);
Assert.IsTrue(pub.Authors[0] == "Van Eys J");
// Verify publication type
Assert.IsTrue(pub.PubType == "Journal Article");
Assert.IsTrue(ptc.GetCategoryNumber(pub.PubType) == 3);
// Verify MeSH headings
Assert.IsTrue(pub.MeSHHeadings.Count == 8);
Assert.IsTrue(pub.MeSHHeadings.Contains("Child"));
Assert.IsTrue(pub.MeSHHeadings.Contains("Nutrition Disorders/*complications/*therapy"));
Assert.IsTrue(pub.MeSHHeadings.Contains("Survival Rate"));
// Verify position type
Harvester.AuthorPositions PositionType;
int AuthorPosition = Publications.GetAuthorPosition(DB, pub.PMID, person, out PositionType, "PeoplePublications");
Assert.IsTrue(AuthorPosition == 1);
Assert.IsTrue(PositionType == Harvester.AuthorPositions.First);
AuthorPosition = person.GetAuthorPosition(DB, pub, out PositionType);
Assert.IsTrue(AuthorPosition == 1);
Assert.IsTrue(PositionType == Harvester.AuthorPositions.First);
break;
case 8403744:
FoundPublications++;
Assert.IsTrue(pub.Title == "Early hospital discharge and the timing of newborn metabolic screening.");
Assert.IsTrue(pub.Pages == "463-6");
Assert.IsTrue(pub.Year == 1993);
Assert.IsTrue(pub.Month == "Aug");
Assert.IsTrue(pub.Day == null);
Assert.IsTrue(pub.Journal == "Clin Pediatr (Phila)");
Assert.IsTrue(pub.Volume == "32");
Assert.IsTrue(pub.Issue == "8");
Assert.IsTrue(pub.Authors.Length == 4);
Assert.IsTrue(pub.Authors[0] == "Coody D");
Assert.IsTrue(pub.Authors[1] == "Yetman RJ");
Assert.IsTrue(pub.Authors[2] == "Montgomery D");
Assert.IsTrue(pub.Authors[3] == "van Eys J");
// Verify publication type
Assert.IsTrue(pub.PubType == "Consensus Development Conference, NIH");
Assert.IsTrue(ptc.GetCategoryNumber(pub.PubType) == 1);
// Verify MeSH headings
Assert.IsTrue(pub.MeSHHeadings.Count == 15);
Assert.IsTrue(pub.MeSHHeadings.Contains("Cesarean Section"));
Assert.IsTrue(pub.MeSHHeadings.Contains("Hospitals, Private"));
Assert.IsTrue(pub.MeSHHeadings.Contains("*Insurance, Health"));
Assert.IsTrue(pub.MeSHHeadings.Contains("United States"));
// Verify position type
AuthorPosition = Publications.GetAuthorPosition(DB, pub.PMID, person, out PositionType, "PeoplePublications");
Assert.IsTrue(AuthorPosition == 4);
Assert.IsTrue(PositionType == Harvester.AuthorPositions.Last);
AuthorPosition = person.GetAuthorPosition(DB, pub, out PositionType);
Assert.IsTrue(AuthorPosition == 4);
Assert.IsTrue(PositionType == Harvester.AuthorPositions.Last);
break;
}
break;
case "A5401532": // Tobian
Assert.IsTrue(pubs.PublicationList.Length == 5);
foreach (Publication pub in pubs.PublicationList)
switch (pub.PMID)
{
case 9931073:
FoundPublications++;
Assert.IsTrue(pub.Title == "Story of the birth of the journal called Hypertension.");
Assert.IsTrue(pub.Pages == "7");
Assert.IsTrue(pub.Year == 1999);
Assert.IsTrue(pub.Month == "Jan");
Assert.IsTrue(pub.Day == null);
Assert.IsTrue(pub.Volume == "33");
Assert.IsTrue(pub.Issue == "1");
Assert.IsTrue(pub.Authors.Length == 1);
Assert.IsTrue(pub.Authors[0] == "Tobian L");
// Verify publication type
Assert.IsTrue(pub.PubType == "Historical Article");
Assert.IsTrue(ptc.GetCategoryNumber(pub.PubType) == 0);
// Verify MeSH headings
Assert.IsTrue(pub.MeSHHeadings.Count == 5);
Assert.IsTrue(pub.MeSHHeadings.Contains("American Heart Association/*history"));
Assert.IsTrue(pub.MeSHHeadings.Contains("*Hypertension"));
Assert.IsTrue(pub.MeSHHeadings.Contains("United States"));
// Verify position type
Harvester.AuthorPositions PositionType;
int AuthorPosition = Publications.GetAuthorPosition(DB, pub.PMID, person, out PositionType, "PeoplePublications");
Assert.IsTrue(AuthorPosition == 1);
Assert.IsTrue(PositionType == Harvester.AuthorPositions.First);
AuthorPosition = person.GetAuthorPosition(DB, pub, out PositionType);
Assert.IsTrue(AuthorPosition == 1);
Assert.IsTrue(PositionType == Harvester.AuthorPositions.First);
break;
}
break;
case "A5501586": // Reemtsma
Assert.IsTrue(pubs.PublicationList.Length == 3);
foreach (Publication pub in pubs.PublicationList)
switch (pub.PMID)
{
case 11528018:
FoundPublications++;
Assert.IsTrue(pub.Title == "Xenotransplantation: A Historical Perspective.");
Assert.IsTrue(pub.Pages == "9-12");
Assert.IsTrue(pub.Year == 1995);
Assert.IsTrue(pub.Month == null);
Assert.IsTrue(pub.Day == null);
Assert.IsTrue(pub.Volume == "37");
Assert.IsTrue(pub.Issue == "1");
Assert.IsTrue(pub.Authors.Length == 1);
Assert.IsTrue(pub.Authors[0] == "Reemtsma K");
// Verify publication type
Assert.IsTrue(pub.PubType == "JOURNAL ARTICLE");
Assert.IsTrue(ptc.GetCategoryNumber(pub.PubType) == 3);
// Verify MeSH headings
Assert.IsTrue(pub.MeSHHeadings == null);
// Verify position type
Harvester.AuthorPositions PositionType;
int AuthorPosition = Publications.GetAuthorPosition(DB, pub.PMID, person, out PositionType, "PeoplePublications");
Assert.IsTrue(AuthorPosition == 1);
Assert.IsTrue(PositionType == Harvester.AuthorPositions.First);
AuthorPosition = person.GetAuthorPosition(DB, pub, out PositionType);
Assert.IsTrue(AuthorPosition == 1);
Assert.IsTrue(PositionType == Harvester.AuthorPositions.First);
break;
}
break;
case "A5702471": // Guillemin
Assert.IsTrue(pubs.PublicationList.Length == 6);
foreach (Publication pub in pubs.PublicationList)
switch (pub.PMID)
{
case 15642779:
// For this publication, we're just concerned that
// the publication type is "Review" -- even though
// it's the second publication type in the citation,
// it's flagged as an "override first pubtype"
// in PublicationTypes.csv
Assert.IsTrue(pub.PubType == "Review");
break;
// NOTE: The title has a quote (laureates') that gets stripped off
case 12462241:
FoundPublications++;
Assert.IsTrue(pub.Title == "Nobel laureates letter to President Bush.");
Assert.IsTrue(pub.Pages == "A02");
Assert.IsTrue(pub.Year == 2001);
Assert.IsTrue(pub.Month == "Feb");
Assert.IsTrue(pub.Day == "22");
Assert.IsTrue(pub.Journal == "Washington Post");
Assert.IsTrue(pub.Volume == null);
Assert.IsTrue(pub.Issue == null);
Assert.IsTrue(pub.Authors.Length == 82);
Assert.IsTrue(pub.Authors[0] == "Arrow KJ");
Assert.IsTrue(pub.Authors[26] == "Guillemin R");
Assert.IsTrue(pub.Authors[81] == "Wilson RW");
// Verify publication type
Assert.IsTrue(pub.PubType == "Newspaper Article");
Assert.IsTrue(ptc.GetCategoryNumber(pub.PubType) == 0);
// Verify MeSH headings
Assert.IsTrue(pub.MeSHHeadings.Count == 9);
Assert.IsTrue(pub.MeSHHeadings.Contains("Embryo Disposition"));
Assert.IsTrue(pub.MeSHHeadings.Contains("National Institutes of Health (U.S.)"));
Assert.IsTrue(pub.MeSHHeadings.Contains("United States"));
// Verify position type
Harvester.AuthorPositions PositionType;
int AuthorPosition = Publications.GetAuthorPosition(DB, pub.PMID, person, out PositionType, "PeoplePublications");
Assert.IsTrue(AuthorPosition == 27);
Assert.IsTrue(PositionType == Harvester.AuthorPositions.Middle);
AuthorPosition = person.GetAuthorPosition(DB, pub, out PositionType);
Assert.IsTrue(AuthorPosition == 27);
Assert.IsTrue(PositionType == Harvester.AuthorPositions.Middle);
break;
case 3086749:
// This publication was altered to contain six GrantIDs in order to
// test the GrantID column length in the database
FoundPublications++;
Assert.IsTrue(pub.Title == "Pituitary FSH is released by a heterodimer of the beta-subunits from the two forms of inhibin.");
Assert.IsTrue(pub.Pages == "779-82");
Assert.IsTrue(pub.Year == 1986);
Assert.IsTrue(pub.Month == "Jun");
Assert.IsTrue(pub.Day == "19-25");
Assert.IsTrue(pub.Journal == "Nature");
Assert.IsTrue(pub.Volume == "321");
Assert.IsTrue(pub.Issue == "6072");
Assert.IsTrue(pub.Authors.Length == 7);
Assert.IsTrue(pub.Authors[0] == "Ling N");
Assert.IsTrue(pub.Authors[4] == "Esch F");
Assert.IsTrue(pub.Authors[6] == "Guillemin R");
// Verify publication type
Assert.IsTrue(pub.PubType == "Journal Article");
Assert.IsTrue(ptc.GetCategoryNumber(pub.PubType) == 3);
// Verify MeSH headings
Assert.IsTrue(pub.MeSHHeadings.Count == 14);
Assert.IsTrue(pub.MeSHHeadings.Contains("Amino Acid Sequence"));
Assert.IsTrue(pub.MeSHHeadings.Contains("Follicle Stimulating Hormone/*secretion"));
Assert.IsTrue(pub.MeSHHeadings.Contains("Swine"));
// Verify position type
AuthorPosition = Publications.GetAuthorPosition(DB, pub.PMID, person, out PositionType, "PeoplePublications");
Assert.IsTrue(AuthorPosition == 7);
Assert.IsTrue(PositionType == Harvester.AuthorPositions.Last);
AuthorPosition = person.GetAuthorPosition(DB, pub, out PositionType);
Assert.IsTrue(AuthorPosition == 7);
Assert.IsTrue(PositionType == Harvester.AuthorPositions.Last);
break;
}
break;
}
}
Assert.IsTrue(FoundPublications == 6);
// Verify that People.Harvested has been updated for each person
DataTable Results = DB.ExecuteQuery("SELECT Setnb, Harvested FROM People");
Assert.IsTrue(Results.Rows.Count == 4);
foreach (DataRow Row in Results.Rows)
{
Assert.IsTrue((bool)Row["Harvested"] == true);
}
}
///
/// Use OtherPeople.dat to make sure the data is processed properly
/// OtherPeople.dat contains Medline citations for boundary cases
/// that required bug fixes.
///
[Test]
public void TestOtherPeople()
{
// Note: Publication 14560782 was added to OtherPeople.dat to verify
// that the software handles the situation where a publication has
// no authors listed.
Database DB = new Database("Publication Harvester Unit Test");
Harvester harvester = new Harvester(DB);
MockNCBI mockNCBI = new MockNCBI("medline");
PublicationTypes ptc = new PublicationTypes(
AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes",
"PublicationTypes.csv"
);
// Reinitialize the database
harvester.CreateTables();
// Make an anonymous callback function that keeps track of the callback data
int Callbacks = 0; // this will count all of the publications
Harvester.GetPublicationsStatus StatusCallback = delegate(int number, int total, int averageTime)
{
Callbacks++;
};
// Make an anonymous callback function to do nothing for GetPublicationsMessage
Harvester.GetPublicationsMessage MessageCallback = delegate(string Message, bool StatusBarOnly)
{
//
};
// Make an anonymous callback function to return false for CheckForInterrupt
Harvester.CheckForInterrupt InterruptCallback = delegate()
{
return false;
};
// Create a new person to test
string[] names = new string[2];
names[0] = "Klein RG";
names[1] = "Guillemin R";
Person person = new Person("A1234567", "FIRST", "MIDDLE", "LAST",
false, names, "Special query for OtherPeople.dat");
person.WriteToDB(DB);
double AverageMilliseconds;
harvester.GetPublications(mockNCBI, ptc, person, StatusCallback, MessageCallback, InterruptCallback, out AverageMilliseconds);
// Verify that the data was written properly
int FoundPublications = 0;
Publications pubs = new Publications(DB, person, false);
foreach (Publication pub in pubs.PublicationList)
{
FoundPublications++;
switch (pub.PMID)
{
case 12679283:
// The weird part of this publication is the second MeSH heading, which is very long
Assert.IsTrue(pub.MeSHHeadings.Count == 23);
Assert.IsTrue(pub.MeSHHeadings.Contains(
"Attention Deficit and Disruptive Behavior Disorders/etiology/*prevention & control/psychology"));
break;
case 2417121:
// The weird part of this publication is the date, which has a weird format that causes
// the day to be long
Assert.IsTrue(pub.Day == "19-1986 Jan 1");
break;
case 6148773:
// One of the headers is long
Assert.IsTrue(pub.MeSHHeadings.Contains("Peptide Fragments/antagonists & inhibitors/chemical synthesis/diagnostic use/isolation & purification/pharmacology/*physiology"));
break;
case 16291338:
// One of the authors is long
Assert.IsTrue(pub.Authors.Length == 8);
Assert.IsTrue(pub.Authors[7] == "For The Michigan Alliance For The National Children's Study");
break;
case 15451956:
// Volume is long
Assert.IsTrue(pub.Volume == "Suppl Web Exclusives");
break;
case 14653276:
// Issue is long
Assert.IsTrue(pub.Issue == "5 Suppl Nitric Oxide");
break;
case 9965612:
// Journal name is long
Assert.IsTrue(pub.Journal == "PHYSICAL REVIEW. E. STATISTICAL PHYSICS, PLASMAS, FLUIDS, AND RELATED INTERDISCIPLINARY TOPICS");
break;
case 9469584:
// Title is long
Assert.IsTrue(pub.Title ==
Database.Left("Down-regulation of cholesterol biosynthesis in sitosterolemia: diminished activities of acetoacetyl-CoA thiolase, 3-hydroxy-3-methylglutaryl-CoA synthase, reductase, squalene synthase, and 7-dehydrocholesterol delta7-reductase in liver and mononuclear leukocytes."
, 244));
break;
case 2545230:
// Month is long
Assert.IsTrue(pub.Month == "Spring-Summer");
break;
default:
break;
}
}
Assert.IsTrue(FoundPublications == 13);
}
///
/// Verify that ClearDataAfterInterruption() actually clears data properly
/// (using data inserted in Test
///
[Test]
public void TestClearingDataAfterInterruption()
{
// Set up the database
TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, new string[] { "eng" }, 22);
Database DB = new Database("Publication Harvester Unit Test");
Harvester harvester = new Harvester(DB);
// Add a grant for publication 12462241, since it doesn't have one
ArrayList Parameters = new ArrayList();
Parameters.Add(Database.Parameter(12462241));
Parameters.Add(Database.Parameter("Fake grant ID"));
DB.ExecuteNonQuery("INSERT INTO PublicationGrants (PMID, GrantID) VALUES ( ? , ? )", Parameters);
// Verify that InterruptedDataExists and UnharvestedPeopleExist both return false
Assert.IsFalse(harvester.InterruptedDataExists());
Assert.IsFalse(harvester.UnharvestedPeopleExist());
// Verify that there are publications for Tobian (setnb A5401532)
// (there should be 5 publications)
Assert.IsTrue(DB.GetIntValue(
@"SELECT Count(*)
FROM PeoplePublications
WHERE Setnb = 'A5401532'") == 5);
int TotalPublicationsBeforeClear
= DB.GetIntValue("SELECT Count(*) FROM PeoplePublications");
// Verify that there are authors and MeSH headings for publication 12462241
// (which belongs to Guillemin, and should have 82 authors and 9 headings)
Assert.IsTrue(DB.GetIntValue(
@"SELECT Count(*)
FROM PublicationMeSHHeadings
WHERE PMID = 12462241") == 9);
int TotalHeadingsBeforeClear
= DB.GetIntValue("SELECT Count(*) FROM PublicationMeSHHeadings");
Assert.IsTrue(DB.GetIntValue(
@"SELECT Count(*)
FROM PublicationAuthors
WHERE PMID = 12462241") == 82);
int TotalAuthorsBeforeClear
= DB.GetIntValue("SELECT Count(*) FROM PublicationAuthors");
Assert.IsTrue(DB.GetIntValue(
@"SELECT Count(*)
FROM PublicationGrants
WHERE PMID = 12462241") == 1);
int TotalGrantsBeforeClear
= DB.GetIntValue("SELECT Count(*) FROM PublicationGrants");
// Set Tobian's Harvested to 0
DB.ExecuteNonQuery("UPDATE People SET Harvested = 0 WHERE Setnb = 'A5401532'");
// Remove publication 12462241
DB.ExecuteNonQuery("DELETE FROM Publications WHERE PMID = 12462241");
// Verify that InterruptedDataExists and UnharvestedPeopleExist both return true
Assert.IsTrue(harvester.InterruptedDataExists());
Assert.IsTrue(harvester.UnharvestedPeopleExist());
// Execute ClearDataAfterInterruption
harvester.ClearDataAfterInterruption();
// Verify that InterruptedDataExists returns false again, now that
// the intterrupted data has been cleared, but that there are still
// unharvested people
Assert.IsFalse(harvester.InterruptedDataExists());
Assert.IsTrue(harvester.UnharvestedPeopleExist());
// Verify that only Tobian's publications have been removed
Assert.IsTrue(
DB.GetIntValue("SELECT Count(*) FROM PeoplePublications")
== TotalPublicationsBeforeClear - 5);
// Verify that only publication 12462241's grants, headings and authors have been removed
Assert.IsTrue(
DB.GetIntValue("SELECT Count(*) FROM PublicationMeSHHeadings")
== TotalHeadingsBeforeClear - 9);
Assert.IsTrue(
DB.GetIntValue("SELECT Count(*) FROM PublicationAuthors")
== TotalAuthorsBeforeClear - 82);
Assert.IsTrue(
DB.GetIntValue("SELECT Count(*) FROM PublicationGrants")
== TotalGrantsBeforeClear - 1);
}
///
/// Verify that when the NCBI object throws an error, the correct error
/// values are recorded in the database.
///
[Test]
public void TestNCBIError()
{
// Set up the database -- tell MockNCBI to throw an error
// This function does all of the necessary testing.
TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, new string[] { "eng" }, 22);
}
///
/// Verify that two people with the same names and search criteria will
/// both be retrieved when one of them is harvested
///
[Test]
public void TestTwoPeopleWithSameNames()
{
Database DB = new Database("Publication Harvester Unit Test");
// Set up the database
TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, new string[] { "eng" }, 22);
// Add two people to the database with the same names and search criteria
// (where the search should make MockNCBI use OtherPeople.dat)
string[] names = new string[2];
names[0] = "Guy JF";
names[1] = "Guy J";
Person Joe = new Person("A1234567", "JOE", "FIRST", "GUY",
false, names, "Special query for OtherPeople.dat");
Joe.WriteToDB(DB);
Person Jane = new Person("Z7654321", "JANE", "FIFTH", "GUY",
false, names, "Special query for OtherPeople.dat");
Jane.WriteToDB(DB);
// Also add Jim, but give him an error so we can make sure it's cleared
Person Jim = new Person("Q2222222", "JIM", "FOURTEENTH", "GUY",
false, names, "Special query for OtherPeople.dat");
Jim.WriteToDB(DB);
Jim.WriteErrorToDB(DB, "This is an error message");
PublicationTypes ptc = new PublicationTypes(
AppDomain.CurrentDomain.BaseDirectory + "\\Unit Tests\\TestPublicationTypes",
"PublicationTypes.csv"
);
// Make an anonymous callback function that keeps track of the callback data
int Callbacks = 0; // this will count all of the publications
Harvester.GetPublicationsStatus StatusCallback = delegate(int number, int total, int averageTime)
{
Callbacks++;
};
// Make an anonymous callback function to do nothing for GetPublicationsMessage
int MessageCallbacks = 0;
Harvester.GetPublicationsMessage MessageCallback = delegate(string Message, bool StatusBarOnly)
{
// Only increment MessageCallbacks if the message contains Joe's Setnb
// and the word "same"
if ((Message.Contains("A1234567") || (Message.Contains("Q2222222"))
&& Message.Contains("same")))
MessageCallbacks++;
};
// Make an anonymous callback function to return false for CheckForInterrupt
Harvester.CheckForInterrupt InterruptCallback = delegate()
{
return false;
};
// More stuff for the harvester
Harvester harvester = new Harvester(DB);
MockNCBI mockNCBI = new MockNCBI("medline");
double AverageMilliseconds;
// Harvest the people
harvester.GetPublications(mockNCBI, ptc, Jane, StatusCallback, MessageCallback, InterruptCallback, out AverageMilliseconds);
// Make sure the harvester got Jane's publications
DataTable Results = DB.ExecuteQuery("SELECT PMID FROM PeoplePublications WHERE Setnb = 'Z7654321'");
Assert.AreEqual(Results.Rows.Count, 3);
foreach (DataRow Row in Results.Rows)
{
Assert.IsTrue(
(Row["PMID"].ToString() == "2417121")
|| (Row["PMID"].ToString() == "12679283")
|| (Row["PMID"].ToString() == "14653276"));
}
ArrayList Parameters = new ArrayList();
Parameters.Add(Database.Parameter(Jane.Setnb));
Results = DB.ExecuteQuery("SELECT Harvested, Error, ErrorMessage FROM People WHERE Setnb = ?", Parameters);
Assert.AreEqual(Results.Rows[0]["Harvested"], true);
Assert.AreEqual(Results.Rows[0]["Error"], DBNull.Value);
Assert.AreEqual(Results.Rows[0]["ErrorMessage"].ToString(), "");
// It should also get Joe's publications. It should call MessageCallback()
// twice to let us know Joe'and Jim's s publications were found, and it
// should add the appropriate rows to PeoplePublications.
Assert.AreEqual(MessageCallbacks, 2);
Results = DB.ExecuteQuery("SELECT PMID FROM PeoplePublications WHERE Setnb = 'A1234567'");
Assert.AreEqual(Results.Rows.Count, 3);
foreach (DataRow Row in Results.Rows)
{
Assert.IsTrue(
(Row["PMID"].ToString() == "2417121")
|| (Row["PMID"].ToString() == "12679283")
|| (Row["PMID"].ToString() == "14653276"));
}
Parameters = new ArrayList();
Parameters.Add(Database.Parameter(Joe.Setnb));
Results = DB.ExecuteQuery("SELECT " + Database.PEOPLE_COLUMNS + " FROM People WHERE Setnb = ?", Parameters);
bool boolValue; // needed for GetBoolValue workaround for bit field bug in MySQL
Assert.IsTrue(Database.GetBoolValue(Results.Rows[0]["Harvested"], out boolValue));
Assert.IsTrue(boolValue);
Assert.IsTrue(Database.GetBoolValue(Results.Rows[0]["Error"], out boolValue));
Assert.IsFalse(boolValue);
Assert.AreEqual(Results.Rows[0]["Error"], DBNull.Value);
Assert.AreEqual(Results.Rows[0]["ErrorMessage"].ToString(), "");
// It should also get Jim's publications -- and it should also clear his error.
Assert.AreEqual(MessageCallbacks, 2);
Results = DB.ExecuteQuery("SELECT PMID FROM PeoplePublications WHERE Setnb = 'A1234567'");
Assert.AreEqual(Results.Rows.Count, 3);
foreach (DataRow Row in Results.Rows)
{
Assert.IsTrue(
(Row["PMID"].ToString() == "2417121")
|| (Row["PMID"].ToString() == "12679283")
|| (Row["PMID"].ToString() == "14653276"));
}
Parameters = new ArrayList();
Parameters.Add(Database.Parameter(Jim.Setnb));
Results = DB.ExecuteQuery("SELECT " + Database.PEOPLE_COLUMNS + "FROM People WHERE Setnb = ?", Parameters);
Assert.IsTrue(Database.GetBoolValue(Results.Rows[0]["Harvested"], out boolValue));
Assert.AreEqual(boolValue, true);
Assert.AreEqual(Results.Rows[0]["Error"], DBNull.Value);
Assert.AreEqual(Results.Rows[0]["ErrorMessage"].ToString(), "");
}
///
/// Verify that the Languages parameter works
///
[Test]
public void TestLanguages()
{
// GetPublicationsFromInput1XLS_Using_MockNCBI reads four people from
// Input1.xls and writes their publications to the datbase using
// MockNCBI. MockNCBI gets its test data from TestHarvester\*.dat.
// There are 22 English publications that belong to the four people
string[] Languages = { "eng" };
int ExpectedPublications = 22;
TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, Languages, ExpectedPublications);
// There's one Russian publication
Languages = new string[] { "rus" };
ExpectedPublications = 1;
TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, Languages, ExpectedPublications);
// There's one Spanish publication
Languages = new string[] { "spa" };
ExpectedPublications = 1;
TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, Languages, ExpectedPublications);
// There's one Russian and one Spanish publication -- two total
Languages = new string[] { "spa", "rus" };
ExpectedPublications = 2;
TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, Languages, ExpectedPublications);
// There are no French or Portugese publications
Languages = new string[] { "fre", "por" };
ExpectedPublications = 0;
TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, Languages, ExpectedPublications);
// There are 22 English and one Russian publication
Languages = new string[] { "eng", "rus" };
ExpectedPublications = 23;
TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, Languages, ExpectedPublications);
Languages = null;
ExpectedPublications = 24;
TestHarvester.GetPublicationsFromInput1XLS_Using_MockNCBI(false, Languages, ExpectedPublications);
}
}
}