using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Data; using System.Data.Odbc; namespace ScientificDistance { public static class Overlap { /// /// Get the keyword counts from a Publications object /// /// Publications object that contains the keywords to count /// Dictionary that contains the keyword counts public static Dictionary KeywordCounts(Publications publications) { Dictionary counts = new Dictionary(); foreach (int pmid in publications.PMIDs) foreach (string heading in publications.Headings(pmid)) { if (!counts.Keys.Contains(heading)) counts.Add(heading, 1); else counts[heading]++; } return counts; } /// /// Find the intersection of two keyword counts /// /// Keyword count #1 /// Keyword count #2 /// Dictionary that contains the intersection counts public static Dictionary Intersection(Dictionary counts1, Dictionary counts2) { Dictionary intersection = new Dictionary(); foreach (string heading in counts1.Keys) if (counts2.Keys.Contains(heading)) intersection.Add(heading, Math.Min(counts1[heading], counts2[heading])); return intersection; } /// /// Generate a report row for two Publications objects /// /// Publications object for first scientist /// Publications object for second scientist /// A string containing the row for the report public static string GenerateOverlapReportRow(Publications publications1, Publications publications2) { if ((publications1 == null) || (publications2 == null)) throw new ArgumentNullException("GenerateReportRow(): Publications must not be null"); Dictionary counts1 = KeywordCounts(publications1); int nb_unq_keywords1 = counts1.Count(); int nb_frq_keywords1 = 0; foreach (string heading in counts1.Keys) nb_frq_keywords1 += counts1[heading]; Dictionary counts2 = KeywordCounts(publications2); int nb_unq_keywords2 = counts2.Count(); int nb_frq_keywords2 = 0; foreach (string heading in counts2.Keys) nb_frq_keywords2 += counts2[heading]; Dictionary intersection = Intersection(counts1, counts2); int nb_unq_ovrlp = intersection.Count(); int nb_frq_ovrlp = 0; foreach (string heading in intersection.Keys) nb_frq_ovrlp += intersection[heading]; return String.Format("{0:s},{1:s},{2:s},{3:s},{4:d},{5:d},{6:d},{7:d},{8:d},{9:d}", publications1.Setnb, publications1.Window, publications2.Setnb, publications2.Window, nb_unq_keywords1, nb_frq_keywords1, nb_unq_keywords2, nb_frq_keywords2, nb_unq_ovrlp, nb_frq_ovrlp); } /// /// Generate rows to be appended to the MeSH headings report /// /// Setnb of the scientist /// Window that was generated /// Dictionary that contains the heading counts /// public static string[] GenerateMeSHHeadingsReportRows(string setnb, string window, Dictionary counts) { if (setnb == null || window == null || counts == null) throw new ArgumentNullException("GenerateMeSHHeadingsReportRows() got a null parameter"); string[] rows = new string[counts.Count()]; int row = 0; foreach (string heading in counts.Keys) { int count = counts[heading]; string quotedHeading = heading; if (heading.Contains(',') || heading.Contains('"')) quotedHeading = "\"" + heading.Replace("\"", "\"\"") + "\""; rows[row] = String.Format("{0},{1},{2},{3:d}", setnb, window, quotedHeading, count); row++; } return rows; } /// /// Generate rolling windows for two scientists /// /// Database for scientist #1 /// Setnb for scientist #1 /// Database for scientist #2 /// Setnb for scientist #2 /// Number of years in the window /// public static string[] RollingWindows(Database db1, string setnb1, Database db2, string setnb2, int windowLength) { if (db1 == null || db2 == null) throw new ArgumentNullException("RollingWindows(): Database must not be null"); if (String.IsNullOrEmpty(setnb1) || String.IsNullOrEmpty(setnb2)) throw new ArgumentException("RollingWindows(): Setnb must not be null"); if (windowLength <= 0) throw new ArgumentException("RollingWindows(): Window length must be positive"); string[] windows; // Get the publication years for scientist #1 DataTable years1 = db1.ExecuteQuery( @"SELECT year FROM publications p LEFT JOIN peoplepublications pp ON pp.PMID = p.PMID WHERE pp.setnb = ? ORDER BY year", new List() { Database.Parameter(setnb1) }); // Get the publication years for scientist #2 DataTable years2 = db2.ExecuteQuery( @"SELECT year FROM publications p LEFT JOIN peoplepublications pp ON pp.PMID = p.PMID WHERE pp.setnb = ? ORDER BY year", new List() { Database.Parameter(setnb2) }); if (years1.Rows.Count == 0 || years2.Rows.Count == 0) return new string[0]; // Find the earliest years int earliest1 = (int)years1.Rows[0]["year"]; int earliest2 = (int)years2.Rows[0]["year"]; // Get the starting year of the first window int start = Math.Max(earliest1, earliest2) - windowLength + 1; // Find the latest years int latest1 = (int)years1.Rows[years1.Rows.Count - 1]["year"]; int latest2 = (int)years2.Rows[years2.Rows.Count - 1]["year"]; // Get the starting year of the last window int end = Math.Min(latest1, latest2); // If there are no overlapping windows, return an empty list if (end < start) return new string[0]; else windows = new string[end - start + 1]; for (int year = start; year <= end; year++) { windows[year - start] = String.Format("{0:d}-{1:d}", year, year + windowLength - 1); } return windows; } } }