-
Notifications
You must be signed in to change notification settings - Fork 75
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #658 from OHDSI/ICDs_to_SNOMED_refresh_05_2022
ICDs scripts update
- Loading branch information
Showing
28 changed files
with
1,603 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
|
||
/************************************************************************** | ||
* Copyright 2016 Observational Health Data Sciences and Informatics (OHDSI) | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
* Authors: Irina Zherko, Darina Ivakhnenko, Dmitry Dymshyts | ||
* Date: 2021 | ||
**************************************************************************/ | ||
|
||
DROP TABLE IF EXISTS refresh_lookup_done; | ||
TRUNCATE TABLE refresh_lookup_done; | ||
CREATE TABLE refresh_lookup_done ( | ||
id serial primary key , | ||
icd_code VARCHAR, | ||
icd_name VARCHAR, | ||
repl_by_relationship VARCHAR, | ||
repl_by_id INT, | ||
repl_by_code VARCHAR, | ||
repl_by_name VARCHAR, | ||
repl_by_domain VARCHAR, | ||
repl_by_vocabulary VARCHAR); | ||
|
||
SELECT*FROM refresh_lookup_done; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
/************************************************************************** | ||
* Copyright 2016 Observational Health Data Sciences and Informatics (OHDSI) | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
* Authors: Irina Zherko, Darina Ivakhnenko, Dmitry Dymshyts | ||
* Date: 2021 | ||
**************************************************************************/ | ||
-- create current date backup of concept_relationship_manual table | ||
DO | ||
$body$ | ||
DECLARE | ||
update text; | ||
BEGIN | ||
SELECT TO_CHAR(CURRENT_DATE, 'YYYY_MM_DD') | ||
INTO update; | ||
EXECUTE format('create table %I as select * from concept_relationship_manual', | ||
'concept_relationship_manual_backup_' || update); | ||
|
||
END | ||
$body$; | ||
|
||
--create current date backup of concept_manual table | ||
DO | ||
$body$ | ||
DECLARE | ||
update text; | ||
BEGIN | ||
SELECT TO_CHAR(CURRENT_DATE, 'YYYY_MM_DD') | ||
INTO update; | ||
EXECUTE format('create table %I as select * from concept_manual', | ||
'concept_manual_backup_' || update); | ||
|
||
END | ||
$body$; | ||
--Backup without new NON-translated codes - concept_manual_backup_2022_08_16 | ||
--SELECT*FROM concept_manual_backup_2022_08_16; | ||
SELECT distinct * | ||
FROM concept_manual; | ||
|
||
|
||
|
||
TRUNCATE TABLE dev_cim10.concept_relationship_manual; | ||
INSERT INTO dev_cim10.concept_relationship_manual | ||
SELECT*FROM dev_cim10.concept_relationship_manual_backup_2022_05_18; | ||
|
||
-- deprecate previous inaccurate mapping | ||
UPDATE concept_relationship_manual crm | ||
SET invalid_reason = 'D', | ||
valid_end_date = current_date | ||
|
||
--SELECT * FROM concept_relationship_manual crm --use this SELECT for QA | ||
WHERE invalid_reason IS NULL --deprecate only what's not yet deprecated in order to preserve the original deprecation date | ||
|
||
AND concept_code_1 IN (SELECT icd_code FROM refresh_lookup_done) --work only with the codes presented in the manual file of the current vocabulary refresh | ||
|
||
AND NOT EXISTS (SELECT 1 --don't deprecate mapping if the same exists in the current manual file | ||
FROM refresh_lookup_done rl | ||
WHERE rl.icd_code = crm.concept_code_1 --the same source_code is mapped | ||
AND rl.repl_by_code = crm.concept_code_2 --to the same concept_code | ||
AND rl.repl_by_vocabulary = crm.vocabulary_id_2 --of the same vocabulary | ||
AND rl.repl_by_relationship = crm.relationship_id --with the same relationship | ||
) | ||
; | ||
|
||
-- activate mapping, that became valid again | ||
UPDATE concept_relationship_manual crm | ||
SET invalid_reason = null, | ||
valid_end_date = to_date('20991231','yyyymmdd') | ||
|
||
--SELECT * FROM concept_relationship_manual crm --use this SELECT for QA | ||
WHERE invalid_reason = 'D' -- activate only deprecated mappings | ||
|
||
AND concept_code_1 IN (SELECT icd_code FROM refresh_lookup_done) --work only with the codes presented in the manual file of the current vocabulary refresh | ||
|
||
AND EXISTS (SELECT 1 -- activate mapping if the same exists in the current manual file | ||
FROM refresh_lookup_done rl | ||
WHERE rl.icd_code = crm.concept_code_1 --the same source_code is mapped | ||
AND rl.repl_by_code = crm.concept_code_2 --to the same concept_code | ||
AND rl.repl_by_vocabulary = crm.vocabulary_id_2 --of the same vocabulary | ||
AND rl.repl_by_relationship = crm.relationship_id --with the same relationship | ||
) | ||
; | ||
|
||
-- insert new mapping | ||
with mapping AS -- select all new codes with their mappings from manual file | ||
( | ||
SELECT DISTINCT icd_code AS concept_code_1, | ||
repl_by_code AS concept_code_2, | ||
'CIM10' AS vocabulary_id_1, -- set current vocabulary name as vocabulary_id_1 | ||
repl_by_vocabulary AS vocabulary_id_2, | ||
repl_by_relationship AS relationship_id, | ||
current_date AS valid_start_date, -- set the date of the refresh as valid_start_date | ||
to_date('20991231','yyyymmdd') AS valid_end_date, | ||
NULL AS invalid_reason -- make all new mappings valid | ||
FROM refresh_lookup_done | ||
WHERE repl_by_id != 0 -- select only codes with mapping to standard concepts | ||
) | ||
-- insert new mappings into concept_relationship_manual table | ||
INSERT INTO concept_relationship_manual(concept_code_1, concept_code_2, vocabulary_id_1, vocabulary_id_2, relationship_id, valid_start_date, valid_end_date, invalid_reason) | ||
( | ||
SELECT concept_code_1, | ||
concept_code_2, | ||
vocabulary_id_1, | ||
vocabulary_id_2, | ||
relationship_id, | ||
valid_start_date, | ||
valid_end_date, | ||
invalid_reason | ||
FROM mapping m | ||
-- don't insert codes with mapping if the same exists in the current manual file | ||
WHERE (concept_code_1, --the same source_code is mapped | ||
concept_code_2, --to the same concept_code | ||
vocabulary_id_1, | ||
vocabulary_id_2, --of the same vocabulary | ||
relationship_id) --with the same relationship | ||
NOT IN (SELECT concept_code_1, | ||
concept_code_2, | ||
vocabulary_id_1, | ||
vocabulary_id_2, | ||
relationship_id FROM concept_relationship_manual) | ||
) | ||
; | ||
|
||
SELECT * FROM concept_relationship_manual; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
--Run the ICD10 | ||
--Run and check ICD10CM | ||
--1 Upload the refresh_lookup_done | ||
--2 Run the script | ||
--Compare number of rows in uploaded csv and script output | ||
--Drop flagged rows from G-frive | ||
--Manually asses Qualitu of Discr tagged mappings | ||
|
||
|
||
--Detect codes with no mapping in ICD10 | ||
with no_map_by_icd10 as (SELECT distinct r.*,cc.* | ||
FROM refresh_lookup_done r | ||
left JOIN devv5.concept c | ||
on trim(lower(r.icd_code)) = trim(lower(c.concept_code)) | ||
and c.vocabulary_id ='ICD10' | ||
LEFT JOIN devv5.concept_relationship cr | ||
on c.concept_id = cr.concept_id_1 | ||
and cr.relationship_id in ( 'Maps to','Maps to value') | ||
and cr.invalid_reason is null | ||
left JOIN devv5.concept cc | ||
on cr.concept_id_2 = cc.concept_id | ||
and cr.invalid_reason is null | ||
and cr.relationship_id in ( 'Maps to','Maps to value') | ||
where cc.concept_id is null) | ||
, | ||
to_be_dropped as ( | ||
SELECT distinct b.id, | ||
b.icd_code, | ||
b.icd_name, | ||
b.repl_by_relationship, | ||
b.repl_by_id, | ||
b.repl_by_code, | ||
b.repl_by_name, | ||
b.repl_by_domain, | ||
b.repl_by_vocabulary, | ||
case when a.icd_code is null then 'drop' else null end as flag -- drop rows where mapping will come from ICD10 | ||
from no_map_by_icd10 a | ||
RIGHT JOIN refresh_lookup_done b | ||
on a.id=b.id/*a.icd_code = b.icd_code | ||
and a.repl_by_id = b.repl_by_id*/ | ||
) | ||
, | ||
discr as ( | ||
SELECT distinct aa.*, | ||
case when aa.icd_code=r.icd_code and r.repl_by_id<>aa.repl_by_id and aa.repl_by_relationship=r.repl_by_relationship then 'discr' else null end as dicrep --detect rows where possible micctargeting occur (when code exists in several ICD10 like vocabs) | ||
FROM to_be_dropped aa | ||
LEFT JOIN dev_icd10cm.refresh_lookup_done r | ||
ON aa.icd_code=r.icd_code | ||
and aa.repl_by_relationship=r.repl_by_relationship | ||
order by aa.id) | ||
SELECT id, | ||
icd_code, | ||
repl_by_id, | ||
flag, | ||
string_agg(distinct dicrep,'X') as dicrep, | ||
icd_name, | ||
repl_by_relationship, | ||
repl_by_id, | ||
repl_by_code, | ||
repl_by_name, | ||
repl_by_domain, | ||
repl_by_vocabulary | ||
|
||
|
||
FROM discr | ||
group by id, | ||
icd_code, | ||
icd_name, | ||
repl_by_relationship, | ||
repl_by_id, | ||
repl_by_code, | ||
repl_by_name, | ||
repl_by_domain, | ||
repl_by_vocabulary, | ||
flag | ||
order by id | ||
; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
### STEP 6 of the refresh: work with manual staging tables (skip this step if implementing on the Pallas vocabulary server) | ||
6.1.Extract the [respective csv file](https://drive.google.com/file/d/1mwDnNYb7fWHLm7lgNHwoUyMfWuJ2GfLb/view?usp=sharing) into the concept_manual table. The file was generated using the query: | ||
```sql | ||
SELECT concept_name, | ||
domain_id, | ||
vocabulary_id, | ||
concept_class_id, | ||
standard_concept, | ||
concept_code, | ||
valid_start_date, | ||
valid_end_date, | ||
invalid_reason | ||
FROM concept_manual | ||
ORDER BY vocabulary_id, concept_code, invalid_reason, valid_start_date, valid_end_date, concept_name; | ||
ADD new codes with translation here | ||
``` | ||
6.2.Extract the [respective csv file](https://drive.google.com/file/d/1C9qVJwR369y9Jk02iS-qK45Gn5iGGRJr/view?usp=sharing) into the concept_synonym_manual table. The file was generated using the query: | ||
```sql | ||
SELECT synonym_name, | ||
synonym_concept_code, | ||
synonym_vocabulary_id, | ||
language_concept_id | ||
FROM concept_synonym_manual | ||
ORDER BY synonym_vocabulary_id, synonym_concept_code, language_concept_id, synonym_name;` | ||
``` | ||
6.3.Extract the [respective csv file](https://drive.google.com/drive/u/0/folders/1_nY1eDu0RfXmvaipJ1tEGZldnGuOwi2B) into the concept_relationship_manual table. The file was generated using the query: | ||
```sql | ||
SELECT concept_code_1, | ||
concept_code_2, | ||
vocabulary_id_1, | ||
vocabulary_id_2, | ||
relationship_id, | ||
valid_start_date, | ||
valid_end_date, | ||
invalid_reason | ||
FROM concept_relationship_manual | ||
ORDER BY vocabulary_id_1, vocabulary_id_2, relationship_id, concept_code_1, concept_code_2, invalid_reason, valid_start_date, valid_end_date; | ||
``` | ||
##### csv format: | ||
- delimiter: ',' | ||
- encoding: 'UTF8' | ||
- header: ON | ||
- decimal symbol: '.' | ||
- quote escape: with backslash \ | ||
- quote always: FALSE | ||
- NULL string: empty | ||
|
||
### STEP 8 of the refresh: solving problems which are difened during the first load_stage run | ||
8.1. Run [mapping_refresh.sql]. Table refresh_lookup will be created. It contains the list with mappings to outdated, deprecated or updated Standard concepts, as well as automaticaly improved mapping. | ||
8.2. Download this table and open it in spreadsheet editor. Columns icd_ represent ICD10CM concepts with uncertain mapping, columns current_ refer to mapping which currently exists in concept_relationship_stage and columns repl_by_ suggest automatically created mapping, the reason for concepts appearing in this table you can see in column reason (e.g., 'improve_map','without mapping'). | ||
8.3. Perform manual review and mapping. Note, if you think that current mapping is better than suggested replacement, delete rows with these concepts from Excel table. Add column repl_by_relationship and put there necessary relationship_id following the recommendations described below. Then, delete current_ and reason columns. | ||
8.4. Save table as refresh_lookup_done.csv and upload it into your schema using script [create_manual_table.sql] | ||
8.5. Run [manual_mapping_qa.sql] to check whether refresh mapping meets the ICD10CM logic | ||
8.6. If everything is OK, deprecate old mappings for the ICD10CM codes of interest and add fresh mappings to the concept_relationship_manual using [crm_changes.sql]) script | ||
|
||
### Recomanditions for relationship_ids | ||
* **"Maps to"** is used for 1-to-1 FULL equivalent mapping only | ||
* **"Maps to" + "Maps to value"** is used for for Observations and Measurements with results | ||
* **"Is a"** is a temporary relationship used for this check only and applicable for 1-to-1 PARTIAL equivalent AND 1-to-many mappings. | ||
Preserve a manual table with 'Is a' relationships, but change 'Is a' to 'Maps to' during the insertion into the concept_relatioship_manual (e.g. using CASE WHEN). | ||
|
||
#### Required fields in a manual table | ||
- icd_code VARHCAR, | ||
- icd_name VARHCAR, | ||
- repl_by_relationship VARCHAR, | ||
- repl_by_id INT, | ||
- repl_by_code VARCHAR, | ||
- repl_by_name VARCHAR, | ||
- repl_by_domain VARCHAR, | ||
- repl_by_vocabulary VARCHAR |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.