Skip to content

Commit

Permalink
Merge pull request #115 from aehrc/_validation_fixes
Browse files Browse the repository at this point in the history
Validation fixes
  • Loading branch information
alistairewj authored Jul 31, 2024
2 parents baf41b7 + cc90af7 commit 16d1cda
Show file tree
Hide file tree
Showing 25 changed files with 403 additions and 135 deletions.
3 changes: 2 additions & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
url = https://github.com/MIT-LCP/mimic-code
[submodule "mimic-profiles"]
path = mimic-profiles
url = https://github.com/kind-lab/mimic-profiles
url = https://github.com/aehrc/mimic-profiles.git
branch = _validation_fixes
[submodule "fhir-packages"]
path = fhir-packages
url = https://github.com/kind-lab/fhir-packages
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ pip install -e .
```sh
pip install google-cloud
pip install google-cloud-pubsub
pip install google-api-python-client
pip install psycopg2-binary
pip install pandas-gbq
pip install fhir
Expand Down Expand Up @@ -226,6 +227,26 @@ python3 py_mimic_fhir export --export_limit 100
- The outputted ndjson will be written to the MIMIC_JSON_PATH folder specified inthe *.env*


## Generating terminology resources

The `bin/psql-export-trm.py` script can be used to generate terminology resources such as code systems and value sets
from the `fhir_trm` schema of mimic database. These resources can be used to update the MIMIC code systems and value sets defintions in MIMIC-IV IG
(`mimic-profile/input/resources`).

To update the resource generate the terminology tables in postgresql SQL first with `sql/create_fhir_terminology.sql`
(or `sql/create_fhir_terminology.sql) and then run the script with the following command (replace the placeholders with the actual values):

```sh
python bin/psql-export-trm.py \
--db-name "${DATABASE}" \
--db-user "${USER}" \
--db-pass "${PGPASSWORD}" \
--date "2022-09-21T13:59:43-04:00" \
mimic-profiles/input/resources
```

The script requires `click` python package (in addition to the packages listed in the section above).

## Useful wiki links
- The [FHIR Conversion Asusmptions](https://github.com/kind-lab/mimic-fhir/wiki/FHIR-Conversion-Assumptions) section covers assumptions made during the MIMIC to FHIR process.
- The [HAPI FHIR Server Validation](https://github.com/kind-lab/mimic-fhir/wiki/HAPI-FHIR-Server-Validation) section walks through validating the MIMIC resources against various implementation guides using HAPI FHIR.
Expand Down
132 changes: 132 additions & 0 deletions bin/psql-export-trm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#!/usr/bin/env python3
import json
import os
from datetime import datetime

import click
import pandas as pd
import psycopg2
from fhir.resources.codesystem import CodeSystem
from fhir.resources.valueset import ValueSet
import itertools as it

valueset_coded = ['admission_class', 'admission_type', 'datetimeevents_d_items', 'diagnosis_icd', 'encounter_type',
'medication', 'outputevents_d_items', 'procedure_icd', 'procedureevents_d_items', 'specimen_type']


@click.command()
@click.argument('output-dir', type=click.Path())
@click.option('--db-name', help='Database name')
@click.option('--db-user', help='SQL username')
@click.option('--db-pass', help='SQL password')
@click.option('--db-host', help='Host', default='localhost')
@click.option('--date', help='Date to use for the FHIR resource date instead of the current datetime')
def pslq_export_trm(output_dir, db_name, db_user, db_pass, db_host, date) -> None:
"""
Exports terminology from postresql
"""
# static components
fhir_status = 'draft'
fhir_content = 'complete'
version = '2.0'
publisher = 'KinD Lab'
current_date = str(datetime.now().strftime('%Y-%m-%dT%H:%M:%S-04:00')) if date is None else date
base_url = 'http://mimic.mit.edu/fhir/mimic'

os.makedirs(output_dir, exist_ok=True)

# Connect to database
con = psycopg2.connect(dbname=db_name, user=db_user, password=db_pass, host=db_host)

# Pull in all the terminology descriptions
q_cs_descriptions = "SELECT * FROM fhir_trm.cs_descriptions"
cs_descriptions = pd.read_sql_query(q_cs_descriptions, con)

codesystems = list(cs_descriptions.codesystem)
click.echo(f"Exporting codesystems: {codesystems}")

for codesystem in codesystems:
print(codesystem)
cs = CodeSystem(status=fhir_status, content=fhir_content)
cs.id = "mimic-" + codesystem.replace('_', '-')
cs.url = f'{base_url}/CodeSystem/{cs.id}'
cs.valueSet = f'{base_url}/ValueSet/{cs.id}'
cs.version = version
cs.language = 'en'
cs.name = "Mimic" + codesystem.title().replace('_', '')
cs.title = cs.name
cs.date = current_date
cs.publisher = publisher
cs.description = cs_descriptions[cs_descriptions['codesystem'] == codesystem]['description'].iloc[0]

# Generate code/display combos from the fhir_trm tables
q_codesystem = f"SELECT * FROM fhir_trm.cs_{codesystem};"
df_codesystem = pd.read_sql_query(q_codesystem, con)
concept = []
for _, row in df_codesystem.iterrows():
elem = {}
elem['code'] = row['code']
if 'display' in row:
elem['display'] = row['display']
concept.append(elem)

cs.concept = concept

print(f"CodeSystem: {codesystem} has {len(cs.concept)} concepts.")
# Write out CodeSystem json to terminology folder
with open(os.path.join(output_dir, f'CodeSystem-{cs.id}.json'), 'w') as outfile:
json.dump(json.loads(cs.json()), outfile, indent=4)

# Pull in all the valueset descriptions
q_vs_descriptions = f"SELECT * FROM fhir_trm.vs_descriptions;"
vs_descriptions = pd.read_sql_query(q_vs_descriptions, con)

valuesets = list(vs_descriptions.valueset)
click.echo(f"Exporting valuesets: {valuesets}")

for valueset in valuesets:
print(valueset)
vs = ValueSet(status=fhir_status)
vs.id = "mimic-" + valueset.replace('_', '-')
vs.url = f'{base_url}/ValueSet/{vs.id}'
vs.version = version
vs.language = 'en'
vs.name = "Mimic" + valueset.title().replace('_', '')
vs.title = vs.name
vs.date = current_date
vs.publisher = publisher
vs.description = vs_descriptions[vs_descriptions['valueset'] == valueset]['description'].iloc[0]

if valueset in valueset_coded:
print('coded valueset')
# Generate code/display combos from the fhir_trm tables
q_valueset = f"SELECT * FROM fhir_trm.vs_{valueset};"
df_valueset = pd.read_sql_query(q_valueset, con)

concepts_by_system = it.groupby(df_valueset.itertuples(index=False, name=None), key=lambda r:r[0])

def to_concept(_, code, display = None):
if not code or code == '*':
raise ValueError(f'Invalid concept code: {code}')
return dict(code=code, display=display) if display else dict(code=code)

def to_include(system, concepts_it):
concepts = list(concepts_it)
if len(concepts) == 1 and concepts[0][1] == '*':
return dict(system=system)
else:
return dict(system=system, concept=[to_concept(*c) for c in concepts])

include_list = [ to_include(system, concepts) for system, concepts in concepts_by_system ]
vs.compose = {'include': include_list}
else:
sys = {'system': f'{base_url}/CodeSystem/{vs.id}'}
vs.compose = {'include': [sys]}

# Write out ValueSet json to terminology folder
with open(os.path.join(output_dir, f'ValueSet-{vs.id}.json'), 'w') as outfile:
json.dump(json.loads(vs.json()), outfile, indent=4)


if __name__ == '__main__':
pslq_export_trm()
2 changes: 1 addition & 1 deletion mimic-profiles
Submodule mimic-profiles updated 47 files
+70 −0 .github/workflows/deploy.yml
+5 −1 .gitignore
+2 −0 _gencontinuous.sh
+2 −0 _genonce.sh
+3 −0 _updatePublisher.sh
+1 −0 input/resources/CodeSystem-mimic-admission-class.json
+1 −0 input/resources/CodeSystem-mimic-admission-type.json
+7 −7 input/resources/CodeSystem-mimic-bodysite.json
+8 −12 input/resources/CodeSystem-mimic-d-labitems.json
+1,441 −2,481 input/resources/CodeSystem-mimic-diagnosis-icd10.json
+1,059 −1,395 input/resources/CodeSystem-mimic-diagnosis-icd9.json
+4 −120 input/resources/CodeSystem-mimic-hcpcs-cd.json
+33 −0 input/resources/CodeSystem-mimic-identifier-type.json
+20 −0 input/resources/CodeSystem-mimic-lab-flags.json
+21 −49 input/resources/CodeSystem-mimic-medication-etc.json
+37 −97 input/resources/CodeSystem-mimic-medication-formulary-drug-cd.json
+59 −308 input/resources/CodeSystem-mimic-medication-gsn.json
+9 −9 input/resources/CodeSystem-mimic-medication-icu.json
+849 −1,530 input/resources/CodeSystem-mimic-medication-name.json
+71 −107 input/resources/CodeSystem-mimic-medication-ndc.json
+2 −2 input/resources/CodeSystem-mimic-medication-poe-iv.json
+28 −28 input/resources/CodeSystem-mimic-medication-route.json
+362 −398 input/resources/CodeSystem-mimic-medication-site.json
+29 −0 input/resources/CodeSystem-mimic-microbiology-interpretation.json
+0 −20 input/resources/CodeSystem-mimic-microbiology-organism.json
+0 −4 input/resources/CodeSystem-mimic-microbiology-test.json
+79 −79 input/resources/CodeSystem-mimic-observation-category.json
+14 −14 input/resources/CodeSystem-mimic-procedure-category.json
+94 −902 input/resources/CodeSystem-mimic-procedure-icd10.json
+42 −86 input/resources/CodeSystem-mimic-procedure-icd9.json
+502 −1,036 input/resources/CodeSystem-mimic-units.json
+370 −370 input/resources/ValueSet-mimic-datetimeevents-d-items.json
+20 −0 input/resources/ValueSet-mimic-identifier-type.json
+20 −0 input/resources/ValueSet-mimic-lab-flags.json
+20 −0 input/resources/ValueSet-mimic-microbiology-interpretation.json
+152 −152 input/resources/ValueSet-mimic-outputevents-d-items.json
+334 −334 input/resources/ValueSet-mimic-procedureevents-d-items.json
+3 −3 template-kindlab/package/.index.json
+4 −4 template-kindlab/package/content/.index.json
+4 −4 template-kindlab/package/content/assets/.index.json
+4 −4 template-kindlab/package/content/assets/css/.index.json
+4 −4 template-kindlab/package/content/assets/images/.index.json
+4 −4 template-kindlab/package/includes/.index.json
+4 −4 template-kindlab/package/var/.index.json
+4 −4 template-kindlab/package/var/lib/.fhir/.index.json
+4 −4 template-kindlab/package/var/lib/.fhir/packages/.index.json
+4 −4 template-kindlab/package/var/lib/.index.json
1 change: 0 additions & 1 deletion sql/codesystem/cs-descriptions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ INSERT INTO fhir_trm.cs_descriptions(codesystem, description)
VALUES
('admission_class', 'The admission class for MIMIC')
, ('admission_type', 'The admission type for MIMIC')
, ('admission_type_icu', 'The admission type for ICU encounters in MIMIC')
, ('admit_source', 'The admission source for MIMIC')
, ('bodysite', 'The bodysite codes for MIMIC')
, ('chartevents_d_items', 'The chartevents item codes used in the ICU for MIMIC')
Expand Down
2 changes: 1 addition & 1 deletion sql/codesystem/cs-diagnosis-icd10.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

DROP TABLE IF EXISTS fhir_trm.cs_diagnosis_icd10;
CREATE TABLE fhir_trm.cs_diagnosis_icd10(
code VARCHAR NOT NULL,
code VARCHAR PRIMARY KEY,
display VARCHAR NOT NULL
);

Expand Down
2 changes: 1 addition & 1 deletion sql/codesystem/cs-diagnosis-icd9.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

DROP TABLE IF EXISTS fhir_trm.cs_diagnosis_icd9;
CREATE TABLE fhir_trm.cs_diagnosis_icd9(
code VARCHAR NOT NULL,
code VARCHAR PRIMARY KEY,
display VARCHAR NOT NULL
);

Expand Down
23 changes: 14 additions & 9 deletions sql/codesystem/cs-medication-etc.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,20 @@

DROP TABLE IF EXISTS fhir_trm.cs_medication_etc;
CREATE TABLE fhir_trm.cs_medication_etc(
code VARCHAR NOT NULL
code VARCHAR PRIMARY KEY
, display VARCHAR NOT NULL
);

INSERT INTO fhir_trm.cs_medication_etc
SELECT
etccode AS code
, MAX(etcdescription) AS display -- grab one description
FROM mimiciv_ed.medrecon
WHERE
etccode IS NOT NULL
GROUP BY etccode
WITH cs_medrecon AS (
SELECT
NULLIF(TRIM(etccode), '') AS cs_code
, etcdescription AS cs_display
FROM mimiciv_ed.medrecon
)
INSERT INTO fhir_trm.cs_medication_etc SELECT
TRIM(cs_code) AS code
, MAX(cs_display) AS display -- grab one description
FROM cs_medrecon
WHERE cs_code IS NOT NULL
GROUP BY cs_code

12 changes: 8 additions & 4 deletions sql/codesystem/cs-units.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@ CREATE TABLE fhir_trm.cs_units(


WITH mimic_units AS (
SELECT DISTINCT TRIM(REGEXP_REPLACE(dose_due_unit, '\s+', ' ', 'g')) AS unit FROM mimiciv_hosp.emar_detail
-- Medication Administration hospital units
SELECT DISTINCT TRIM(dose_given_unit) AS unit FROM mimiciv_hosp.emar_detail
UNION
SELECT DISTINCT TRIM(infusion_rate_unit) AS unit FROM mimiciv_hosp.emar_detail
UNION
UNION
SELECT DISTINCT TRIM(product_unit) AS unit FROM mimiciv_hosp.emar_detail
UNION

-- Medication Administration ICU units
SELECT DISTINCT TRIM(amountuom) AS unit FROM mimiciv_icu.inputevents
Expand All @@ -32,10 +35,11 @@ WITH mimic_units AS (
UNION

-- Prescription units
SELECT DISTINCT TRIM(dose_unit_rx) AS unit FROM mimiciv_hosp.prescriptions p
SELECT DISTINCT TRIM(dose_unit_rx) AS unit FROM mimiciv_hosp.prescriptions p

)
INSERT INTO fhir_trm.cs_units
SELECT unit
SELECT DISTINCT unit
FROM mimic_units
WHERE
unit IS NOT NULL
Expand Down
2 changes: 0 additions & 2 deletions sql/codesystem/vs-descriptions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,9 @@ INSERT INTO fhir_trm.vs_descriptions(valueset, description)
VALUES
('admission_class', 'The admission class for MIMIC')
, ('admission_type', 'The admission type for MIMIC')
, ('admission_type_icu', 'The admission type for ICU encounters in MIMIC')
, ('admit_source', 'The admission source for MIMIC')
, ('bodysite', 'The bodysite codes for MIMIC')
, ('chartevents_d_items', 'The item codes for chartevents used in MIMIC')
, ('d_items', 'The item codes used throughout the ICU in MIMIC')
, ('d_labitems', 'The lab item codes used in MIMIC')
, ('datetimeevents_d_items', 'The datetime item codes used in MIMIC')
, ('diagnosis_icd', 'The diagnosis ICD9 and ICD10 codes for MIMIC')
Expand Down
Loading

0 comments on commit 16d1cda

Please sign in to comment.