Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: gnomad_re should accept all nucleotide characters for ref/alt #277

Merged
merged 1 commit into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/ga4gh/vrs/extras/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ class Translator:
"""

beacon_re = re.compile(r"(?P<chr>[^-]+)\s*:\s*(?P<pos>\d+)\s*(?P<ref>\w+)\s*>\s*(?P<alt>\w+)")
gnomad_re = re.compile(r"(?P<chr>[^-]+)-(?P<pos>\d+)-(?P<ref>[ACGTN]+)-(?P<alt>[ACGTN]+|\*|\.)", re.IGNORECASE)
gnomad_re = re.compile(
r"(?P<chr>[^-]+)-(?P<pos>\d+)-(?P<ref>[ACGTURYKMSWBDHVN]+)-(?P<alt>[ACGTURYKMSWBDHVN]+)",
re.IGNORECASE
)
hgvs_re = re.compile(r"[^:]+:[cgnpr]\.")
spdi_re = re.compile(r"(?P<ac>[^:]+):(?P<pos>\d+):(?P<del_len_or_seq>\w*):(?P<ins_seq>\w*)")

Expand Down
155 changes: 128 additions & 27 deletions tests/extras/cassettes/test_from_gnomad.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.1
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:19?start=44908821&end=44908822
response:
Expand All @@ -23,7 +23,7 @@ interactions:
Content-Type:
- text/plain; charset=utf-8
Date:
- Mon, 16 Jan 2023 16:32:42 GMT
- Tue, 07 Nov 2023 23:05:59 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
Expand All @@ -39,33 +39,134 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.1
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/metadata/GRCh38:19
uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:17?start=83129586&end=83129598
response:
body:
string: "{\n \"added\": \"2016-08-24T08:19:02Z\",\n \"aliases\": [\n \"Ensembl:19\",\n
\ \"ensembl:19\",\n \"GRCh38:19\",\n \"GRCh38:chr19\",\n \"GRCh38.p1:19\",\n
\ \"GRCh38.p1:chr19\",\n \"GRCh38.p10:19\",\n \"GRCh38.p10:chr19\",\n
\ \"GRCh38.p11:19\",\n \"GRCh38.p11:chr19\",\n \"GRCh38.p12:19\",\n
\ \"GRCh38.p12:chr19\",\n \"GRCh38.p2:19\",\n \"GRCh38.p2:chr19\",\n
\ \"GRCh38.p3:19\",\n \"GRCh38.p3:chr19\",\n \"GRCh38.p4:19\",\n \"GRCh38.p4:chr19\",\n
\ \"GRCh38.p5:19\",\n \"GRCh38.p5:chr19\",\n \"GRCh38.p6:19\",\n \"GRCh38.p6:chr19\",\n
\ \"GRCh38.p7:19\",\n \"GRCh38.p7:chr19\",\n \"GRCh38.p8:19\",\n \"GRCh38.p8:chr19\",\n
\ \"GRCh38.p9:19\",\n \"GRCh38.p9:chr19\",\n \"MD5:b0eba2c7bb5c953d1e06a508b5e487de\",\n
\ \"NCBI:NC_000019.10\",\n \"refseq:NC_000019.10\",\n \"SEGUID:AHxM5/L8jIX08UhBBkKXkiO5rhY\",\n
\ \"SHA1:007c4ce7f2fc8c85f4f148410642979223b9ae16\",\n \"VMC:GS_IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n
\ \"sha512t24u:IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\",\n \"ga4gh:SQ.IIB53T8CNeJJdUqzn9V_JnRtQadwWCbl\"\n
\ ],\n \"alphabet\": \"ACGNT\",\n \"length\": 58617616\n}\n"
string: GTTGWCACATGA
headers:
Connection:
- close
Content-Length:
- '1035'
- '12'
Content-Type:
- text/plain; charset=utf-8
Date:
- Tue, 07 Nov 2023 23:05:59 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
code: 200
message: OK
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/metadata/GRCh38:17
response:
body:
string: "{\n \"added\": \"2016-08-27T23:52:54Z\",\n \"aliases\": [\n \"GRCh38:17\",\n
\ \"GRCh38:chr17\",\n \"GRCh38.p1:17\",\n \"GRCh38.p1:chr17\",\n \"GRCh38.p10:17\",\n
\ \"GRCh38.p10:chr17\",\n \"GRCh38.p11:17\",\n \"GRCh38.p11:chr17\",\n
\ \"GRCh38.p12:17\",\n \"GRCh38.p12:chr17\",\n \"GRCh38.p2:17\",\n
\ \"GRCh38.p2:chr17\",\n \"GRCh38.p3:17\",\n \"GRCh38.p3:chr17\",\n
\ \"GRCh38.p4:17\",\n \"GRCh38.p4:chr17\",\n \"GRCh38.p5:17\",\n \"GRCh38.p5:chr17\",\n
\ \"GRCh38.p6:17\",\n \"GRCh38.p6:chr17\",\n \"GRCh38.p7:17\",\n \"GRCh38.p7:chr17\",\n
\ \"GRCh38.p8:17\",\n \"GRCh38.p8:chr17\",\n \"GRCh38.p9:17\",\n \"GRCh38.p9:chr17\",\n
\ \"MD5:f9a0fb01553adb183568e3eb9d8626db\",\n \"NCBI:NC_000017.11\",\n
\ \"refseq:NC_000017.11\",\n \"SEGUID:s2Skupj8o6wdjf0aPrgOipAr67Q\",\n
\ \"SHA1:b364a4ba98fca3ac1d8dfd1a3eb80e8a902bebb4\",\n \"VMC:GS_dLZ15tNO1Ur0IcGjwc3Sdi_0A6Yf4zm7\",\n
\ \"sha512t24u:dLZ15tNO1Ur0IcGjwc3Sdi_0A6Yf4zm7\",\n \"ga4gh:SQ.dLZ15tNO1Ur0IcGjwc3Sdi_0A6Yf4zm7\"\n
\ ],\n \"alphabet\": \"ACGKNRSTWY\",\n \"length\": 83257441\n}\n"
headers:
Connection:
- close
Content-Length:
- '1004'
Content-Type:
- application/json
Date:
- Tue, 07 Nov 2023 23:05:59 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
code: 200
message: OK
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:7?start=1&end=17
response:
body:
string: NNNNNNNNNNNNNNNN
headers:
Connection:
- close
Content-Length:
- '16'
Content-Type:
- text/plain; charset=utf-8
Date:
- Tue, 07 Nov 2023 23:05:59 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
code: 200
message: OK
- request:
body: null
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate
Connection:
- keep-alive
User-Agent:
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/metadata/GRCh38:7
response:
body:
string: "{\n \"added\": \"2016-08-27T21:23:35Z\",\n \"aliases\": [\n \"GRCh38:7\",\n
\ \"GRCh38:chr7\",\n \"GRCh38.p1:7\",\n \"GRCh38.p1:chr7\",\n \"GRCh38.p10:7\",\n
\ \"GRCh38.p10:chr7\",\n \"GRCh38.p11:7\",\n \"GRCh38.p11:chr7\",\n
\ \"GRCh38.p12:7\",\n \"GRCh38.p12:chr7\",\n \"GRCh38.p2:7\",\n \"GRCh38.p2:chr7\",\n
\ \"GRCh38.p3:7\",\n \"GRCh38.p3:chr7\",\n \"GRCh38.p4:7\",\n \"GRCh38.p4:chr7\",\n
\ \"GRCh38.p5:7\",\n \"GRCh38.p5:chr7\",\n \"GRCh38.p6:7\",\n \"GRCh38.p6:chr7\",\n
\ \"GRCh38.p7:7\",\n \"GRCh38.p7:chr7\",\n \"GRCh38.p8:7\",\n \"GRCh38.p8:chr7\",\n
\ \"GRCh38.p9:7\",\n \"GRCh38.p9:chr7\",\n \"MD5:cc044cc2256a1141212660fb07b6171e\",\n
\ \"NCBI:NC_000007.14\",\n \"refseq:NC_000007.14\",\n \"SEGUID:4+JjCcBVhPCr8vdIhUKFycPv8bY\",\n
\ \"SHA1:e3e26309c05584f0abf2f748854285c9c3eff1b6\",\n \"VMC:GS_F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul\",\n
\ \"sha512t24u:F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul\",\n \"ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul\"\n
\ ],\n \"alphabet\": \"ACGNRSTY\",\n \"length\": 159345973\n}\n"
headers:
Connection:
- close
Content-Length:
- '977'
Content-Type:
- application/json
Date:
- Mon, 16 Jan 2023 16:32:42 GMT
- Tue, 07 Nov 2023 23:05:59 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
Expand All @@ -81,7 +182,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.1
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:13?start=32936731&end=32936732
response:
Expand All @@ -95,7 +196,7 @@ interactions:
Content-Type:
- text/plain; charset=utf-8
Date:
- Mon, 16 Jan 2023 16:32:42 GMT
- Tue, 07 Nov 2023 23:05:59 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
Expand All @@ -111,7 +212,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.1
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:13?start=32936731&end=32936732
response:
Expand All @@ -125,7 +226,7 @@ interactions:
Content-Type:
- text/plain; charset=utf-8
Date:
- Mon, 16 Jan 2023 16:32:42 GMT
- Tue, 07 Nov 2023 23:05:59 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
Expand All @@ -141,7 +242,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.1
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/sequence/GRCh38:13?start=32936731&end=32936732
response:
Expand All @@ -155,7 +256,7 @@ interactions:
Content-Type:
- text/plain; charset=utf-8
Date:
- Mon, 16 Jan 2023 16:32:42 GMT
- Tue, 07 Nov 2023 23:05:59 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
Expand All @@ -171,7 +272,7 @@ interactions:
Connection:
- keep-alive
User-Agent:
- python-requests/2.28.1
- python-requests/2.31.0
method: GET
uri: http://localhost:5000/seqrepo/1/metadata/GRCh38:13
response:
Expand All @@ -197,7 +298,7 @@ interactions:
Content-Type:
- application/json
Date:
- Mon, 16 Jan 2023 16:32:42 GMT
- Tue, 07 Nov 2023 23:05:59 GMT
Server:
- Werkzeug/2.2.2 Python/3.10.4
status:
Expand Down
8 changes: 8 additions & 0 deletions tests/extras/test_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,14 @@ def test_from_beacon(tlr):
def test_from_gnomad(tlr):
assert tlr._from_gnomad(snv_inputs["gnomad"]).as_dict() == snv_output

assert tlr._from_gnomad("17-83129587-GTTGWCACATGA-G")

# Test valid characters
assert tlr._from_gnomad(
"7-2-ACGTURYKMSWBDHVN-ACGTURYKMSWBDHVN",
require_validation=False
)

# Invalid input. Ref does not match regex
assert not tlr._from_gnomad("13-32936732-helloworld-C")

Expand Down
2 changes: 1 addition & 1 deletion tests/extras/test_vcf_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,4 @@ def test_get_vrs_object_invalid_input(vcf_annotator, caplog):

# No ALT
vcf_annotator._get_vrs_object("7-140753336-A-.", {}, [], "GRCh38")
assert "ValidationError when translating 7-140753336-A-. from gnomad" in caplog.text
assert "None was returned when translating 7-140753336-A-. from gnomad" in caplog.text