using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
namespace ScientificDistance
{
public class ReportGenerator
{
///
/// The Warning event lets the ReportGenerator object send messages that
/// will be included in the log. It includes an EventArgs subclass and
/// an OnWarning() method.
///
/// The Progress event lets the ReportGenerator object report progress.
///
#region Event
public event EventHandler Warning;
public event EventHandler Progress;
public class WarningEventArgs : EventArgs
{
public string Message { get; private set; }
public WarningEventArgs(string message)
{
Message = message;
}
}
public class ProgressEventArgs : EventArgs
{
public DateTime Timestamp { get; private set; }
public string Message { get; private set; }
public int Value { get; private set; }
public int Max { get; private set; }
public ProgressEventArgs(DateTime timestamp, string message, int value, int max)
{
Timestamp = timestamp;
Message = message;
Value = value;
Max = max;
}
}
protected void OnWarning(string message)
{
WarningEventArgs args = new WarningEventArgs(message);
if (Warning != null)
Warning(this, (EventArgs)args);
}
protected void OnProgress(DateTime timestamp, string message, int value, int max)
{
ProgressEventArgs args = new ProgressEventArgs(timestamp, message, value, max);
if (Progress != null)
Progress(this, (EventArgs)args);
}
#endregion
public enum RollingWindow
{
none,
threeYear,
fiveYear,
}
private List inputRows;
public string InputFilename { get; private set; }
public string ReportFilename { get; private set; }
public string MeshFilename { get; private set; }
public RollingWindow Window { get; private set; }
public bool FaultTolerance { get; private set; }
public bool RemoveCommonPublications { get; private set; }
public string DSN1 { get; private set; }
public string DSN2 { get; private set; }
public ReportGenerator(string DSN1, string DSN2, string inputFilename, string reportFilename,
string meshFilename, RollingWindow window, bool faultTolerance, bool removeCommonPublications)
{
if (!File.Exists(inputFilename))
throw new OperationCanceledException(inputFilename + " not found");
inputRows = Files.ReadInput(File.ReadAllLines(inputFilename));
InputFilename = inputFilename;
if (faultTolerance)
{
if (window != RollingWindow.none)
throw new OperationCanceledException("Fault tolerance is not supported for rolling windows");
if (!File.Exists(reportFilename))
throw new OperationCanceledException("Fault tolerance requires an existing report file; specified report file does not exist");
if (!File.Exists(meshFilename))
throw new OperationCanceledException("Fault tolerance requires an existing MeSH headings report file; specified MeSH headings report file does not exist");
}
FaultTolerance = faultTolerance;
RemoveCommonPublications = removeCommonPublications;
ReportFilename = reportFilename;
MeshFilename = meshFilename;
Window = window;
this.DSN1 = DSN1;
this.DSN2 = DSN2;
}
///
/// Generate the report
///
public void Generate()
{
OnProgress(DateTime.Now, "Starting report generation", 0, 0);
bool append = false;
// If there's fault tolerance, call FaultTolerantCopy() to copy the files and truncate the input
if (FaultTolerance)
{
List newInputRows = Files.FaultTolerantCopy(ReportFilename, MeshFilename, (Window != RollingWindow.none), inputRows);
if (newInputRows.Count == inputRows.Count)
OnWarning("No input rows were skipped due to fault tolerance");
else
OnWarning((inputRows.Count - newInputRows.Count) + " input rows were skipped due to fault tolerance");
inputRows = newInputRows;
append = true;
}
// Open the StreamWriters and databases
using (StreamWriter reportWriter = new StreamWriter(ReportFilename, append))
using (StreamWriter meshWriter = new StreamWriter(MeshFilename, append))
using (Database db1 = new Database(DSN1))
using (Database db2 = new Database(DSN2))
{
// Write the headers
if (!append)
{
reportWriter.WriteLine(Files.ReportHeader);
meshWriter.WriteLine(Files.MeSHHeader);
}
// Write the reports
int number = 0;
foreach (InputRow inputRow in inputRows)
{
number++;
Publications pubs1;
Publications pubs2;
switch (Window)
{
case RollingWindow.none:
pubs1 = new Publications(db1, inputRow.Scientist1, inputRow.Window1, null);
pubs2 = new Publications(db2, inputRow.Scientist2, inputRow.Window2, null);
if (RemoveCommonPublications)
RecreatePublicationsWithoutDuplicates(db1, ref pubs1, db2, ref pubs2);
OnProgress(DateTime.Now, String.Format("Writing {0} ({1}) - {2} ({3})",
inputRow.Scientist1, inputRow.Window1, inputRow.Scientist2, inputRow.Window2),
number, inputRows.Count());
WriteReportRows(reportWriter, meshWriter, inputRow, pubs1, pubs2);
break;
case RollingWindow.threeYear:
WriteRollingWindowRows(reportWriter, meshWriter, db1, db2, inputRow, 3);
OnProgress(DateTime.Now, String.Format("Writing {0} - {1} (rolling windows)",
inputRow.Scientist1, inputRow.Scientist2),
number, inputRows.Count());
break;
case RollingWindow.fiveYear:
WriteRollingWindowRows(reportWriter, meshWriter, db1, db2, inputRow, 5);
OnProgress(DateTime.Now, String.Format("Writing {0} - {1} (rolling windows)",
inputRow.Scientist1, inputRow.Scientist2),
number, inputRows.Count());
break;
}
}
}
OnProgress(DateTime.Now, "Finished report generation", 0, 0);
}
///
/// Remove common publications between two Publications object
///
/// Publications object for scientist #1
/// Publications object for scientist #2
private void RecreatePublicationsWithoutDuplicates(Database db1, ref Publications pubs1,
Database db2, ref Publications pubs2)
{
List pmids1 = new List(pubs1.PMIDs);
List pmids2 = new List(pubs2.PMIDs);
// Recreate the publications excluding each others' PMIDs
Publications newPubs1 = new Publications(db1, pubs1.Setnb, pubs1.Window, pmids2);
Publications newPubs2 = new Publications(db2, pubs2.Setnb, pubs2.Window, pmids1);
// Return the new publications
pubs1 = newPubs1;
pubs2 = newPubs2;
}
///
/// Write the rows for rolling windows
///
/// StreamWriter to write the report
/// StreamWriter to write the MeSH headings report
/// InputRow that's being used to generate the report rows
/// Database for scientist #1
/// Database for scientist #2
/// Length of the window
private void WriteRollingWindowRows(StreamWriter reportWriter, StreamWriter meshWriter, Database db1, Database db2, InputRow inputRow, int windowLength)
{
// Warn if the scientists' windows aren't blank
if (inputRow.Window1 != "")
OnWarning("Warning: scientist #1 window will be ignored for rolling windows [" + inputRow.ToString() + "]");
if (inputRow.Window1 != "")
OnWarning("Warning: scientist #2 window will be ignored for rolling windows [" + inputRow.ToString() + "]");
// Get the list of windows
string[] windows = Overlap.RollingWindows(db1, inputRow.Scientist1, db2, inputRow.Scientist2, windowLength);
// Write each window's rows
foreach (string window in windows)
{
Publications pubs1 = new Publications(db1, inputRow.Scientist1, window, null);
Publications pubs2 = new Publications(db2, inputRow.Scientist2, window, null);
if (RemoveCommonPublications)
RecreatePublicationsWithoutDuplicates(db1, ref pubs1, db2, ref pubs2);
WriteReportRows(reportWriter, meshWriter,
new InputRow() {
Scientist1 = inputRow.Scientist1,
Window1 = window,
Scientist2 = inputRow.Scientist2,
Window2 = window
},
pubs1, pubs2);
}
}
///
/// Write the rows to two StreamWriters that are pointed at the report files
///
/// StreamWriter to write the report
/// StreamWriter to write the MeSH headings report
/// InputRow that's being used to generate the report rows
/// Publications for scientist #1
/// Publications for scientist #2
private static void WriteReportRows(StreamWriter reportWriter, StreamWriter meshWriter, InputRow inputRow, Publications pubs1, Publications pubs2)
{
// Write the report row
reportWriter.WriteLine(Overlap.GenerateOverlapReportRow(pubs1, pubs2));
// Write the MeSH heading report rows for scientist #1
Dictionary counts = Overlap.KeywordCounts(pubs1);
foreach (string line in Overlap.GenerateMeSHHeadingsReportRows(
inputRow.Scientist1, inputRow.Window1, counts))
meshWriter.WriteLine(line);
// Write the MeSH heading report rows for scientist #2
counts = Overlap.KeywordCounts(pubs2);
foreach (string line in Overlap.GenerateMeSHHeadingsReportRows(
inputRow.Scientist2, inputRow.Window2, counts))
meshWriter.WriteLine(line);
}
}
}