Skip to content

Commit

Permalink
make parallel
Browse files Browse the repository at this point in the history
  • Loading branch information
JFriel committed Aug 23, 2023
1 parent c7dff3e commit eec69be
Showing 1 changed file with 48 additions and 17 deletions.
65 changes: 48 additions & 17 deletions HICPlugin/DataFlowComponents/CHIColumnFinder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
using Rdmp.Core.ReusableLibraryCode;
using Rdmp.Core.ReusableLibraryCode.Checks;
using Rdmp.Core.ReusableLibraryCode.Progress;
using System.Threading.Tasks;
using System.Diagnostics;

namespace HICPluginInteractive.DataFlowComponents;

Expand All @@ -32,8 +34,8 @@ public class CHIColumnFinder : IPluginDataFlowComponent<DataTable>, IPipelineReq
[DemandsInitialization("By default all columns from the source will be checked for valid CHIs. Set this to a list of headers (separated with a comma) to ignore the specified columns.", DemandType = DemandType.Unspecified)]
public string IgnoreColumns
{
get => string.Join(',',_columnWhitelist);
set => _columnWhitelist=(value ?? "").Split(',').Select(s=>s.Trim()).ToList();
get => string.Join(',', _columnWhitelist);
set => _columnWhitelist = (value ?? "").Split(',').Select(s => s.Trim()).ToList();
}

private bool _firstTime = true;
Expand All @@ -45,6 +47,8 @@ public string IgnoreColumns

public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener listener, GracefulCancellationToken cancellationToken)
{
// var sw = new Stopwatch();
// sw.Start();
if (OverrideUntil.HasValue && OverrideUntil.Value > DateTime.Now)
{
if (_firstTime)
Expand All @@ -69,20 +73,48 @@ public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener
_columnWhitelist.AddRange(ignoreColumnsArray);
}

var batchRowCount = 0;
toProcess.BeginLoadData();
string[] knownBadColumns = Array.Empty<string>();
var dtRows = toProcess.Rows.Cast<DataRow>().ToArray();
foreach (var row in dtRows)
// var batchRowCount = 0;
// toProcess.BeginLoadData();
// string[] knownBadColumns = Array.Empty<string>();
// var dtRows = toProcess.Rows.Cast<DataRow>().ToArray();
// foreach (var row in dtRows)
// {
// foreach (var col in toProcess.Columns.Cast<DataColumn>().Where(col => !_columnWhitelist.Contains(col.ColumnName.Trim()) && !knownBadColumns.Contains(col.ColumnName)))
// {
// bool containsValidCHI = ContainsValidChi(row[col]);
// if (!containsValidCHI) continue;
// knownBadColumns.Append(col.ColumnName);

// if (_activator?.IsInteractive == true && ShowUIComponents)
// DoTheMessageBoxDance(toProcess, listener, col, row, batchRowCount);
// else
// {
// var message =
// $"Column {col.ColumnName} in Dataset {toProcess.TableName} appears to contain a CHI ({row[col]})";
// _foundChiList.Add(message);
// listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning, message));
// if (!_isTableAlreadyNamed)
// listener.OnNotify(this, new NotifyEventArgs(ProgressEventType.Warning,
// "DataTable has not been named. If you want to know the dataset that the error refers to please add an ExtractCatalogueMetadata to the extraction pipeline."));
// }
// }

// batchRowCount++;
// }
// toProcess.EndLoadData();
// sw.Stop();
// var sw2 = new Stopwatch();
// sw2.Start();
Parallel.For(0, toProcess.Rows.Count, rowIndex =>
{
foreach (var col in toProcess.Columns.Cast<DataColumn>().Where(col => !_columnWhitelist.Contains(col.ColumnName.Trim()) && !knownBadColumns.Contains(col.ColumnName)))
DataRow row = toProcess.Rows[rowIndex];
foreach (var col in toProcess.Columns.Cast<DataColumn>().Where(col => !_columnWhitelist.Contains(col.ColumnName.Trim())))
{
bool containsValidCHI = ContainsValidChi(row[col]);
if(!containsValidCHI) continue;
knownBadColumns.Append(col.ColumnName);
if (!containsValidCHI) continue;
if (_activator?.IsInteractive == true && ShowUIComponents)
DoTheMessageBoxDance(toProcess, listener, col, row, batchRowCount);
DoTheMessageBoxDance(toProcess, listener, col, row, rowIndex);
else
{
var message =
Expand All @@ -94,15 +126,14 @@ public DataTable ProcessPipelineData(DataTable toProcess, IDataLoadEventListener
"DataTable has not been named. If you want to know the dataset that the error refers to please add an ExtractCatalogueMetadata to the extraction pipeline."));
}
}

batchRowCount++;
}
toProcess.EndLoadData();
});
// sw2.Stop();
// Console.WriteLine(string.Format("{0} {1} {2}", toProcess.TableName, sw.Elapsed, sw2.Elapsed));
return toProcess;
}


private void DoTheMessageBoxDance(DataTable toProcess, IDataLoadEventListener listener, DataColumn col, DataRow row, int batchRowCount)

private void DoTheMessageBoxDance(DataTable toProcess, IDataLoadEventListener listener, DataColumn col, DataRow row, int batchRowCount)
{
if (_activator.IsInteractive && _activator.YesNo(
$"Column {col.ColumnName} in Dataset {(_isTableAlreadyNamed ? toProcess.TableName : "UNKNOWN (you need an ExtractCatalogueMetadata in the pipeline to get a proper name)")} appears to contain a CHI ({row[col]})\n\nWould you like to view the current batch of data?", "Suspected CHI Column"))
Expand Down

0 comments on commit eec69be

Please sign in to comment.