Skip to content

Commit

Permalink
Flexible matching for constellation assignments; added additional sit…
Browse files Browse the repository at this point in the history
…e for H3 motif; classify GISAID-exclusive strains
  • Loading branch information
btski committed Oct 24, 2024
1 parent 0baf6c5 commit 7af7378
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 6 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ src/.orientdb_history
build/
dist/
octofludb.egg-info/
readme.txt
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
v1.0.2 [2023-11-03]
===================

* Make constellation determination more robust with partial, case-insensitive matching
* Correct H3 motif by adding missing site (159)
* Fix fetch-unclassified-swine.rq to include GISAID strains

v1.0.1 [2022-02-16]
===================

Expand Down
4 changes: 2 additions & 2 deletions octofludb/data/fetch-unclassified-swine.rq
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
PREFIX f: <https://flu-crew.org/term/>

SELECT DISTINCT ?gb ?seq
SELECT DISTINCT ?seqid ?seq
WHERE {
?sid f:host "swine" .
?sid f:has_segment ?gid .
?gid f:genbank_id ?gb .
?gid f:seqid ?seqid .
?gid f:dnaseq ?seq .
MINUS {
?gid f:clade ?clade .
Expand Down
9 changes: 7 additions & 2 deletions octofludb/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def _make_constellations(rows: List[Tuple[str, str, str]]) -> List[Tuple[str, st
segment_lookup = dict(PB2=0, PB1=1, PA=2, NP=3, M=4, MP=4, NS=5)

clade_lookup = dict(
pdm="P", LAIV="V", TX98="V", TRIG="T", humanSeasonal="H", classicalSwine="C", avian="A"
pdm="P", LAIV="V", TRIG="T", humanSeasonal="H", classicalSwine="C", avian="A"
)

const: Dict[str, List[str]] = dict()
Expand All @@ -91,6 +91,11 @@ def _make_constellations(rows: List[Tuple[str, str, str]]) -> List[Tuple[str, st

if clade in clade_lookup:
char = clade_lookup[clade]
elif "-like" in clade.lower():
log(
f"{bad('WARNING:')} internal gene clade includes '-like' label ({clade}), assigning constellation character 'X'"
)
char = "X"
else:
# add flexible matching
for c_lookup, c_letter in clade_lookup.items():
Expand All @@ -102,7 +107,7 @@ def _make_constellations(rows: List[Tuple[str, str, str]]) -> List[Tuple[str, st
break
else:
log(
f"{bad('WARNING:')} expected internal gene clade to be one of 'pdm', 'LAIV', 'TX98', 'TRIG', 'classicalSwine', 'humanSeasonal', or 'avian'. Found clade {clade}, assigning constellation character 'X'"
f"{bad('WARNING:')} expected internal gene clade to be one of 'pdm', 'LAIV', 'TRIG', 'classicalSwine', 'humanSeasonal', or 'avian'. Found clade {clade}, assigning constellation character 'X'"
)
char = "X"

Expand Down
2 changes: 1 addition & 1 deletion octofludb/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def upload_motifs(url: str, repo: str) -> List[str]:
# find h3 motifs
h3_motif_table = script.findMotifs(
os.path.join(os.path.dirname(__file__), "data", "get-h3-swine.rq"),
["h3_motif=145,155,156,158,189"],
["h3_motif=145,155,156,158,159,189"],
"H3",
url=url,
repo_name=repo,
Expand Down
2 changes: 1 addition & 1 deletion octofludb/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.0.1"
__version__ = "1.0.2"

0 comments on commit 7af7378

Please sign in to comment.