Skip to content

Commit

Permalink
ingest: Remove use of ncov-ingest geolocation rules (#75)
Browse files Browse the repository at this point in the history
Remove the use of the ncov-ingest geolocation rules since Augur
now uses the built-in geolocation rules by default.

Depends on the release of
<nextstrain/augur#1745>

Part of larger migration
nextstrain/public#17
  • Loading branch information
j23414 authored Mar 5, 2025
1 parent de2ad8a commit 188cda2
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 28 deletions.
4 changes: 0 additions & 4 deletions ingest/defaults/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,6 @@ ncbi_datasets_fields:

# Config parameters related to the curate pipeline
curate:
# URL pointed to public generalized geolocation rules
# For the Nextstrain team, this is currently
# "https://raw.githubusercontent.com/nextstrain/ncov-ingest/master/source-data/gisaid_geoLocationRules.tsv"
geolocation_rules_url: "https://raw.githubusercontent.com/nextstrain/ncov-ingest/master/source-data/gisaid_geoLocationRules.tsv"
# The path to the local geolocation rules within the pathogen repo
# The path should be relative to the ingest directory.
local_geolocation_rules: "defaults/geolocation-rules.tsv"
Expand Down
26 changes: 2 additions & 24 deletions ingest/rules/curate.smk
Original file line number Diff line number Diff line change
Expand Up @@ -13,28 +13,6 @@ Parameters are expected to be defined in `config.curate`.
"""


rule fetch_general_geolocation_rules:
output:
general_geolocation_rules="data/general-geolocation-rules.tsv",
params:
geolocation_rules_url=config["curate"]["geolocation_rules_url"],
shell:
"""
curl -fsSL --output {output.general_geolocation_rules} {params.geolocation_rules_url}
"""


rule concat_geolocation_rules:
input:
general_geolocation_rules="data/general-geolocation-rules.tsv",
local_geolocation_rules=config["curate"]["local_geolocation_rules"],
output:
all_geolocation_rules="data/all-geolocation-rules.tsv",
shell:
"""
cat {input.general_geolocation_rules} {input.local_geolocation_rules} >> {output.all_geolocation_rules}
"""

def format_field_map(field_map: dict[str, str]) -> str:
"""
Format dict to `"key1"="value1" "key2"="value2"...` for use in shell commands.
Expand All @@ -44,7 +22,7 @@ def format_field_map(field_map: dict[str, str]) -> str:
rule curate:
input:
sequences_ndjson="data/genbank.ndjson",
all_geolocation_rules="data/all-geolocation-rules.tsv",
geolocation_rules=config["curate"]["local_geolocation_rules"],
annotations=config["curate"]["annotations"],
manual_mapping="defaults/host_hostgenus_hosttype_map.tsv",
output:
Expand Down Expand Up @@ -93,7 +71,7 @@ rule curate:
--default-value {params.authors_default_value:q} \
--abbr-authors-field {params.abbr_authors_field} \
| augur curate apply-geolocation-rules \
--geolocation-rules {input.all_geolocation_rules} \
--geolocation-rules {input.geolocation_rules} \
| ./scripts/transform-state-names \
| ./scripts/post_process_metadata.py \
| ./scripts/transform-new-fields \
Expand Down

0 comments on commit 188cda2

Please sign in to comment.