Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Synonym sync: Test mondo.sssom.tsv filtering #706

Draft
wants to merge 2 commits into
base: develop
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion tests/test_sync_synonym.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
INPUT_SOURCE_DB = IN_DIR / 'test_omim.db' # create via: sh run.sh make ../../tests/input/sync_synonym/test_omim.db
INPUT_MONDO_SYNONYMS = IN_DIR / 'mondo-synonyms-scope-type-xref.tsv' # create via: sh run.sh make ../../tests/input/sync_synonym/mondo-synonyms-scope-type-xref.tsv
INPUT_ONTO_SYNONYMS = IN_DIR / 'omim-synonyms-scope-type-xref.tsv' # create via: sh run.sh make ../../tests/input/sync_synonym/omim-synonyms-scope-type-xref.tsv
INPUT_EXCLUDED_SYNONYMS = CONFIG_DIR / 'mondo-excluded-values.yml'
INPUT_EXCLUDED_SYNONYMS = CONFIG_DIR / 'mondo-exclusion-configs.yml'
INPUT_MAPPINGS = IN_DIR / 'test_mondo.sssom.tsv'
INPUT_SOURCE_METADATA = META_DIR / 'omim.yml'
OUTPUT_ADDED = OUT_DIR / 'omim.synonyms.added.robot.tsv'
Expand Down Expand Up @@ -153,8 +153,11 @@ def _assert_in_no_template(self, case: Dict[str, str]):

def _common_case_assertions(self, cases: List[Dict[str, str]], template: str):
"""Run common assertions for each individual ROBOT template case"""
mappings_df = pd.read_csv(INPUT_MAPPINGS, sep='\t', comment='#')
mapping_pairs_set = set(mappings_df[['subject_id', 'object_id']].apply(tuple, axis=1))
for case in cases:
self._assert_only_in_correct_template(case, template)
self.assertIn((case['mondo_id'], case['source_id']), mapping_pairs_set)
results: pd.DataFrame = self.df_lookup[template]
# -1 accounts for the ROBOT subheader
self.assertEqual(len(cases), len(results) - 1, f'Got a different number of rows in template: {template}.')
Expand Down Expand Up @@ -312,3 +315,18 @@ def test_confirmed(self):
'synonym': 'Unmapped: Synonym exists in 1 source and 1 Mondo term, but no mapping',
'source_id': 'OMIM:999999',
})

def test_real_sssom_filter(self):
"""Test real outputs and ensure all cases have mondo.sssom.tsv (exact) mappings.

Unlike most other tests in the suite, this tests local, real synonym sync outputs and real mondo.sssom.tsv.
"""
sssom_path = ONTO_DIR / 'tmp' / 'mondo.sssom.tsv'
combined_cases_path = ONTO_DIR / 'reports' / 'sync-synonym' / 'synonym_sync_combined_cases.robot.tsv'
if all([os.path.exists(x) for x in [sssom_path, combined_cases_path]]):
mappings_df = pd.read_csv(sssom_path, sep='\t', comment='#')
combined_df = pd.read_csv(combined_cases_path, sep='\t')
mapping_pairs_set = set(mappings_df[['subject_id', 'object_id']].apply(tuple, axis=1))
combined_df2 = combined_df[
combined_df[['mondo_id', 'source_id']].apply(tuple, axis=1).isin(mapping_pairs_set)]
self.assertEqual(len(combined_df), len(combined_df2), 'Not all cases have mondo.sssom.tsv mappings.')