using System; using System.Collections.Generic; using System.Text; using System.IO; using System.Collections; using System.Data; using Com.StellmanGreene.PubMed; namespace SCGen { /// /// Class to generate the Star Colleagues Report /// class StarColleaguesReport { public static string[] ReportColumns = { "1L", "L1", "1M", "M1", "MM", "LM", "ML", "21", "12", "2M", "M2", "2L", "L2", "2NTL", "NTL2", "NTL1", "1NTL", "NTLM", "MNTL", "NTLL", "LNTL" }; /// /// Generate the Star Colleagues report /// public static void Generate(TextWriter writer, Database DB, string JournalWeightsFilename, ArrayList SetnbsToSkip, ReportsDialog ParentForm, bool WriteHeaderRow) { Reports reports = new Reports(DB, JournalWeightsFilename); if (WriteHeaderRow) WriteHeaderRowToReport(writer); // Retrieve all of the star/colleague pairs from the StarColleagues table. // The report is generated colleague by colleague (so that it's sorted by // the first column -- for the fault tolerance). DataTable ColleagueStarPairs = DB.ExecuteQuery("SELECT Setnb, StarSetnb FROM StarColleagues ORDER BY Setnb, StarSetnb"); if (ColleagueStarPairs.Rows.Count == 0) { if (ParentForm != null) ParentForm.AddLogEntry("Unable to generate StarColleagues report: No colleagues found!"); return; } // Go through the publications and collect the counts. Hashtable CountsPerYear = new Hashtable(); Hashtable WeightsPerYear = new Hashtable(); // Create hashtables for the four global counts for each row Hashtable Nbcoauth1 = new Hashtable(); Hashtable Wghtd_Nbcoauth1 = new Hashtable(); Hashtable Nbcoauth2 = new Hashtable(); Hashtable Wghtd_Nbcoauth2 = new Hashtable(); // For each star/colleague pair, produce the report rows. // Note that report rows are only produced for years where there // are collaborations. int Pairs = ColleagueStarPairs.Rows.Count; for (int Row = 0; Row < Pairs; Row++) { string Setnb = ColleagueStarPairs.Rows[Row]["Setnb"].ToString(); string StarSetnb = ColleagueStarPairs.Rows[Row]["StarSetnb"].ToString(); // If this is a continuation of a previous report, SetnbsToSkip will be // populated with colleague Setnb values from the rows that were already // in the file. if (!SetnbsToSkip.Contains(Setnb)) { // Generate the row for the colleague /* * This query retrieves all of the publications that a star and colleague * have in common. It takes two parameters, the star setnb and the * colleague setnb, and returns all of the data necessary to produce the * Star Colleagues Report rows for that star/colleague pair. It's sorted * by year so that it can be processed efficiently, and so that the first * and last rows in the results can be used to find the years of first and * last collaboration. * * SELECT p.Year, p.PMID, pp.PositionType AS StarPositionType, * cp.PositionType AS ColleaguePositionType, p.Journal * FROM Publications p, ColleaguePublications cp, PeoplePublications pp * WHERE pp.Setnb = ? * AND cp.Setnb = ? * AND p.PMID = pp.PMID * AND p.PMID = cp.PMID * ORDER BY p.Year ASC * */ ArrayList Parameters = new ArrayList(); Parameters.Add(Database.Parameter(StarSetnb)); Parameters.Add(Database.Parameter(Setnb)); using (DataTable PubData = DB.ExecuteQuery( @"SELECT p.Year, p.PMID, pp.PositionType AS StarPositionType, cp.PositionType AS ColleaguePositionType, p.Journal FROM Publications p, ColleaguePublications cp, PeoplePublications pp WHERE pp.Setnb = ? AND cp.Setnb = ? AND p.PMID = pp.PMID AND p.PMID = cp.PMID ORDER BY p.Year ASC", Parameters)) { if (PubData.Rows.Count == 0) { if (ParentForm != null) ParentForm.AddLogEntry("No publications found for colleague " + Setnb + ", star " + StarSetnb); } else { // Get the first and last years of collaboration int FirstCollabYear = Convert.ToInt32(PubData.Rows[0]["Year"]); int LastCollabYear = Convert.ToInt32(PubData.Rows[PubData.Rows.Count - 1]["Year"]); // Go through the publications and collect the counts. CountsPerYear.Clear(); WeightsPerYear.Clear(); // Create hashtables for the four global counts for each row Nbcoauth1.Clear(); Wghtd_Nbcoauth1.Clear(); Nbcoauth2.Clear(); Wghtd_Nbcoauth2.Clear(); for (int RowNum = 0; RowNum < PubData.Rows.Count; RowNum++) { // Get the information about the publication from the dataset DataRow PubRow = PubData.Rows[RowNum]; UpdateCounts(reports, PubRow, CountsPerYear, WeightsPerYear, Nbcoauth1, Wghtd_Nbcoauth1, Nbcoauth2, Wghtd_Nbcoauth2); } // Write the rows to the report int RowsWritten = WriteReportrows(Setnb, StarSetnb, FirstCollabYear, LastCollabYear, Nbcoauth1, Wghtd_Nbcoauth1, Nbcoauth2, Wghtd_Nbcoauth2, writer, CountsPerYear, WeightsPerYear); //ParentForm.AddLogEntry("Wrote " + RowsWritten.ToString() + " rows for colleague " + Setnb + ", star " + StarSetnb + " (" + Row.ToString() + " of " + Pairs.ToString() + ")"); if (ParentForm != null) ParentForm.SetProgressBar(0, Pairs, Row); } } } } } /// /// Write the header row to the report /// /// TextWriter to write to private static void WriteHeaderRowToReport(TextWriter writer) { writer.Write("setnb,star_setnb,year,Nbcoauth1,Wghtd_Nbcoauth1,Nbcoauth2,Wghtd_Nbcoauth2,"); for (int Column = 0; Column < ReportColumns.Length; Column++) { writer.Write("Nbcoauth_" + ReportColumns[Column] + ",Wghtd_Nbcoauth_" + ReportColumns[Column] + ","); } writer.WriteLine("Frst_collab_year,Last_collab_year"); } /// /// Updated the counts for a publication, given a row from the query in Generate(). /// The counts are collected in a hashtable indexed by year. Each item in the /// hashtable is another hashtable, indexed by the report column category /// ("NTL2", "LM", etc.). This in turn contains the count. There are two master /// hashtables of these counts, one for weighted and the other for unweighted. /// /// Reports object initialized with journal weights /// Row from the query in Generate() /// Hashtable that contains the counts for each year /// Hashtable that containst he weights for each year private static void UpdateCounts(Reports reports, DataRow Row, Hashtable CountsPerYear, Hashtable WeightsPerYear, Hashtable Nbcoauth1, Hashtable Wghtd_Nbcoauth1, Hashtable Nbcoauth2, Hashtable Wghtd_Nbcoauth2) { int Year = Convert.ToInt32(Row["Year"]); Harvester.AuthorPositions ColleaguePositionType = (Harvester.AuthorPositions) Convert.ToInt32(Row["ColleaguePositionType"]); Harvester.AuthorPositions StarPositionType = (Harvester.AuthorPositions) Convert.ToInt32(Row["StarPositionType"]); string Journal = Row["Journal"].ToString(); // Use the Reports object to get the publication's journal weight. Single Weight; if (reports.Weights.ContainsKey(Journal)) Weight = (Single)reports.Weights[Journal]; else Weight = 0; // Create or retrieve the correct counts hashtable if (!CountsPerYear.ContainsKey(Year)) CountsPerYear[Year] = new Hashtable(); Hashtable Counts = (Hashtable) CountsPerYear[Year]; // Create or retrieve the correct weights hashtables if (!WeightsPerYear.ContainsKey(Year)) WeightsPerYear[Year] = new Hashtable(); Hashtable Weights = (Hashtable) WeightsPerYear[Year]; // Update Nbcoauth1 and Wghtd_Nbcoauth1 for every publication if (!Nbcoauth1.ContainsKey(Year)) Nbcoauth1[Year] = (int)0; int Nbcoauth1_value = (int) Nbcoauth1[Year]; Nbcoauth1_value ++; Nbcoauth1[Year] = Nbcoauth1_value; if (!Wghtd_Nbcoauth1.ContainsKey(Year)) Wghtd_Nbcoauth1[Year] = (Single)0.0; Single Wghtd_Nbcoauth1_value = (Single)Wghtd_Nbcoauth1[Year]; Wghtd_Nbcoauth1_value += Weight; Wghtd_Nbcoauth1[Year] = Wghtd_Nbcoauth1_value; // Update Nbcoauth2 and Wghtd_Nbcoauth2 for publications where either // the star or colleague are first or last if ((ColleaguePositionType == Harvester.AuthorPositions.First) || (ColleaguePositionType == Harvester.AuthorPositions.Last) || (StarPositionType == Harvester.AuthorPositions.First) || (StarPositionType == Harvester.AuthorPositions.Last)) { if (!Nbcoauth2.ContainsKey(Year)) Nbcoauth2[Year] = (int)0; int Nbcoauth2_value = (int)Nbcoauth2[Year]; Nbcoauth2_value++; Nbcoauth2[Year] = Nbcoauth2_value; if (!Wghtd_Nbcoauth2.ContainsKey(Year)) Wghtd_Nbcoauth2[Year] = (Single)0.0; Single Wghtd_Nbcoauth2_value = (Single)Wghtd_Nbcoauth2[Year]; Wghtd_Nbcoauth2_value += Weight; Wghtd_Nbcoauth2[Year] = Wghtd_Nbcoauth2_value; } // Update the count and weight for each column for (int ColumnNum = 0; ColumnNum < ReportColumns.Length; ColumnNum++) { // Figure out which colleague position needs to be counted for the column. // The column will indicate which articles are to be counted, based // on author position: 1 (first), L (last), 2 (seond), NTL (next-to-last), // M (middle). The column starts with the colleague value, and ends // with the star value. string Column = ReportColumns[ColumnNum]; Harvester.AuthorPositions ColleagueExpected; if (Column.StartsWith("NTL")) // look for NTL first to disambiguate from L ColleagueExpected = Harvester.AuthorPositions.NextToLast; else if (Column.StartsWith("1")) ColleagueExpected = Harvester.AuthorPositions.First; else if (Column.StartsWith("L")) ColleagueExpected = Harvester.AuthorPositions.Last; else if (Column.StartsWith("2")) ColleagueExpected = Harvester.AuthorPositions.Second; else ColleagueExpected = Harvester.AuthorPositions.Middle; // Figure out which star position needs to be counted for the column. Harvester.AuthorPositions StarExpected; if (Column.EndsWith("NTL")) // look for NTL first to disambiguate from L StarExpected = Harvester.AuthorPositions.NextToLast; else if (Column.EndsWith("1")) StarExpected = Harvester.AuthorPositions.First; else if (Column.EndsWith("L")) StarExpected = Harvester.AuthorPositions.Last; else if (Column.EndsWith("2")) StarExpected = Harvester.AuthorPositions.Second; else StarExpected = Harvester.AuthorPositions.Middle; // Only the weight to the correct hash for the star and colleague // position types. if ((ColleaguePositionType == ColleagueExpected) && (StarPositionType == StarExpected)) { // Retrieve or initialize the Counts and Weights hashes if (!Counts.ContainsKey(Column)) Counts[Column] = (int) 0; if (!Weights.ContainsKey(Column)) Weights[Column] = (Single) 0.0; // Update the count and weight. int ColumnCount = (int) Counts[Column]; ColumnCount ++; Counts[Column] = ColumnCount; Single ColumnWeight = (Single) Weights[Column]; ColumnWeight += Weight; Weights[Column] = ColumnWeight; } } // Save the updated hashtables CountsPerYear[Year] = Counts; WeightsPerYear[Year] = Weights; } /// /// Write the report rows for a set of publications /// /// Writer to add the report rows to /// Hashtable that contains the counts /// Hashtable that contains the weights /// Number of rows written to the report public static int WriteReportrows(string Setnb, string StarSetnb, int FirstCollabYear, int LastCollabYear, Hashtable Nbcoauth1, Hashtable Wghtd_Nbcoauth1, Hashtable Nbcoauth2, Hashtable Wghtd_Nbcoauth2, TextWriter writer, Hashtable CountsPerYear, Hashtable WeightsPerYear) { int RowsWritten = 0; // The report contains one row per year for which there are publications. // The hashes are keyed by year, so they can be retrieved from the keys. int[] Years = new int[CountsPerYear.Keys.Count]; CountsPerYear.Keys.CopyTo(Years, 0); Array.Sort(Years); for (int i = 0; i < Years.Length; i++) { // The CountsPerYear and WeightsPerYear hashtables are keyed on year. // They contain hashtables keyed on the column name from ReportsColumns // and contain the counts and weights. int Year = Years[i]; Hashtable Counts = (Hashtable)CountsPerYear[Year]; Hashtable Weights = (Hashtable)WeightsPerYear[Year]; writer.Write(Setnb + "," + StarSetnb + "," + Year.ToString() + ","); // Write Nbcoauth1, Wghtd_Nbcoath1, Nbcoauth2, Wghtd_Nbcoath2 writer.Write(HashValueOrZero(Nbcoauth1, Year) + ","); writer.Write(HashValueOrZero(Wghtd_Nbcoauth1, Year) + ","); writer.Write(HashValueOrZero(Nbcoauth2, Year) + ","); writer.Write(HashValueOrZero(Wghtd_Nbcoauth2, Year) + ","); for (int j = 0; j < ReportColumns.Length; j++) { string Column = ReportColumns[j]; // Write the count for the column writer.Write(HashValueOrZero(Counts, Column)); writer.Write(","); // Write the weight for the column writer.Write(HashValueOrZero(Weights, Column)); writer.Write(","); } writer.WriteLine(FirstCollabYear.ToString() + "," + LastCollabYear.ToString()); RowsWritten++; } return RowsWritten; } /// /// Look up the key in the hashtable, return the string value (or "0" if /// the hashtable doesn't contain the key) /// /// Hashtable to look up the value /// Key to look up /// private static string HashValueOrZero(Hashtable hashtable, object Key) { if (hashtable.ContainsKey(Key)) return hashtable[Key].ToString(); else return "0"; } } }