From 4042183973086c5a3cd131446b4d72e34a0b3f01 Mon Sep 17 00:00:00 2001 From: theo-barfoot <44871137+theo-barfoot@users.noreply.github.com> Date: Wed, 15 May 2024 17:06:28 +0100 Subject: [PATCH] seperate hashing with no concat and fix "hash" option (#11) feat: Enhance CSV processing with individual column hashing and additional options - Modified the existing functionality to hash each selected column separately instead of concatenating them. - Added new columns with the original column name suffixed by "_hash" for each hashed column. - Implemented options to manage columns as follows: - "Hash": Adds a new column with the hashed value and retains the original column. - "Hash and exclude": Adds a new column with the hashed value and removes the original column from the unidentifiable spreadsheet. - "Exclude": Removes the original column only from the unidentifiable spreadsheet but retains it in the original_with_hash spreadsheet. - "Keep": Retains the original column in both spreadsheets without any changes. - Added a new section with a text input box and a button to hash the input using SHA-1. Displays the first 10 characters of the hash below the input box when the button is clicked. - Updated the `myForm` submission event listener to handle separate hashing of each column. - Updated the `updateSelect` function to handle UI changes for each option. - Ensured that new columns with hashed values are added correctly based on the selected options. This commit enhances the usability and flexibility of the CSV processing tool, allowing users to better manage and anonymize their data. --- README.md | 4 +-- index.html | 72 ++++++++++++++++++++-------------------------------- testcsv2.csv | 3 +++ 3 files changed, 32 insertions(+), 47 deletions(-) create mode 100644 testcsv2.csv diff --git a/README.md b/README.md index 53fc7f4..cf72663 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ Navigate to the [Pseudo-Anonymised ID Generator](https://cai4cai.ml/sha1inbrowse The application currently supports `.csv` files only. Make sure your data is saved in this format before proceeding. Select your file by clicking on `Choose File`. ### Step 3: Configure Data Columns -Assign roles to each column in your dataset, specifying which should be hashed, excluded, or kept. Specifically, assign the NHS number column to `Hash and Exclude` and all identifiable information to `Exclude`. Relevant clinical information, but not patient identifiable, can be assigned to `Keep`. Any junk that you do not want, in the anonymised spreadsheet, can be set to `Exclude`. +Assign roles to each column in your dataset, specifying which should be hashed, excluded, or kept. Specifically, assign the NHS number column to `Hash and Exclude` and all identifiable information to `Exclude`. Relevant clinical information, but not patient identifiable, can be assigned to `Keep`. Any junk that you do not want, in the anonymised spreadsheet, can be set to `Exclude`. Columns that you want a hashed version of, but you also want to appear in the anonymised spreadsheet can be set to `Hash` ### Step 4: Process the Data Click `Process` to start the local hashing. This generates two `.csv` files: @@ -33,7 +33,7 @@ Click `Process` to start the local hashing. This generates two `.csv` files: ### Data Handling - **Internal Use**: Keep `original_with_hash.csv` within the NHS trust. -- **External Sharing**: `unidentifiable.csv` can be shared with researchers at KCL, following approved ethics and data sharing agreements. +- **External Sharing**: `unidentifiable.csv` is the anonymised spreadsheet and can be shared with researchers at KCL, following approved ethics and data sharing agreements. ## Support and Contact For any inquiries or support requests, please contact: diff --git a/index.html b/index.html index 6ca839e..8a779a7 100644 --- a/index.html +++ b/index.html @@ -25,7 +25,6 @@ margin-bottom: 10px; text-align: center; } - @@ -125,7 +124,7 @@

Hash Input

const csvFile = document.getElementById("csvFile"); const table = document.getElementById("previewTable"); - const hashColour = "#a3ffff" + const hashColour = "#a3ffff"; const excludeColour = "#ffa6a6"; const stripeWidth = 6; @@ -134,59 +133,59 @@

Hash Input

const input = csvFile.files[0]; const reader = new FileReader(); - // 👇 executed when a file is loaded reader.onload = async function (e) { - // 👇 get the text from CSV file const text = e.target.result; - // 👇 parse it using D3.js const data = d3.csvParse(text); const dataWithHash = d3.csvParse(text); - // Find what to do with each column const toHash = []; + const toHashAndExclude = []; const toExclude = []; data.columns.forEach(function(column) { const value = document.getElementById("select_" + column).value; switch (value) { - case "Hash and keep": + case "Hash": toHash.push(column); break; case "Hash and exclude": - toHash.push(column); - toExclude.push(column); + toHashAndExclude.push(column); break; case "Exclude": toExclude.push(column); break; + case "Keep": + // Keep option, do nothing + break; } }); - console.log(toHash); - console.log(toExclude); for (let i = 0; i < data.length; i++) { - // Concatenate strings for hashing - let hashKey = ""; - toHash.forEach(function(column) { - hashKey += data[i][column]; - }) - // Hash - console.log("Hashing: " + hashKey); - let hash = await sha1(hashKey); - console.log("SHA-1: " + hash); - - data[i]["Hash"] = hash; - dataWithHash[i]["Hash"] = hash; + // Hash and add columns for "Hash" and "Hash and exclude" options + for (let j = 0; j < toHash.length; j++) { + const column = toHash[j]; + const hashKey = data[i][column]; + const hash = await sha1(hashKey); + data[i][`${column}_hash`] = hash; + dataWithHash[i][`${column}_hash`] = hash; + } + for (let j = 0; j < toHashAndExclude.length; j++) { + const column = toHashAndExclude[j]; + const hashKey = data[i][column]; + const hash = await sha1(hashKey); + data[i][`${column}_hash`] = hash; + dataWithHash[i][`${column}_hash`] = hash; + } - // Delete fields in toExclude + // Remove columns for "Exclude" and "Hash and exclude" options toExclude.forEach(function(column) { delete data[i][column]; - }) + }); + toHashAndExclude.forEach(function(column) { + delete data[i][column]; + }); } - // console.log(data); - // console.log(dataWithHash); - let csvContent = "data:text/csv;charset=utf-8," + encodeURI(d3.csvFormat(data)); let originalCsvWithHash = "data:text/csv;charset=utf-8," + encodeURI(d3.csvFormat(dataWithHash)); download(csvContent, "unidentifiable.csv"); @@ -201,7 +200,6 @@

Hash Input

function updateSelect(e) { const select = e.target; let name = select.id.slice(7); - // Update colour of table column const numCells = table.rows.length; let value = select.value.toLowerCase(); let toHash = value.includes("hash"); @@ -231,20 +229,13 @@

Hash Input

const reader = new FileReader(); - // 👇 executed when a file is loaded reader.onload = async function (e) { - // 👇 get the text from CSV file const text = e.target.result; - // 👇 parse it using D3.js - console.log("Parsing started"); const data = d3.csvParse(text); - console.log("Parsing done"); - // Get object for preview table const previewTableBody = document.getElementById("previewTableBody"); - // Header rows for preview const headerRow = document.getElementById("previewHeadRow"); for (let i = 0; i < data.columns.length; i++) { let cell = headerRow.insertCell(i); @@ -253,7 +244,6 @@

Hash Input

cell.innerHTML = column; } - // Add rows to preview table const toShow = Math.min(data.length, 10); for (let i = 0; i < toShow; i++) { const row = previewTableBody.insertRow(i); @@ -268,16 +258,13 @@

Hash Input

} } - // Add drop-down for each field const options = ["Keep", "Exclude", "Hash", "Hash and exclude"]; data.columns.forEach(function(name) { - // Label const label = document.createElement("label"); const id = "select_" + name; label.for = id; label.textContent = name + ": "; selectsParent.appendChild(label); - // Select const select = document.createElement("select"); select.id = id; select.addEventListener("change", updateSelect) @@ -288,22 +275,17 @@

Hash Input

option.text = optionName; select.appendChild(option); }); - // Line break selectsParent.appendChild(document.createElement("br")); }); - // Show size of dataset document.getElementById("previewLbl").innerHTML = "Showing " + toShow + " of " + data.length; - // Show options setOptionsVisible(true); setPreviewVisible(true); setLoadingVisible(false); }; - // 👇 load the input file to the reader setLoadingVisible(true); - console.log("Starting file reading"); reader.readAsText(input); }); diff --git a/testcsv2.csv b/testcsv2.csv new file mode 100644 index 0000000..423452a --- /dev/null +++ b/testcsv2.csv @@ -0,0 +1,3 @@ +nhs number,accession number,some other stuff,junk +12345,RJZ12345,good,beans +54321,RJZ54321,bad,toast