diff --git a/defaults/parameters.yaml b/defaults/parameters.yaml index 9359b43a6..fce6baac2 100644 --- a/defaults/parameters.yaml +++ b/defaults/parameters.yaml @@ -137,8 +137,21 @@ ancestral: # Frequencies settings frequencies: - # min_date is set by default to 1 year before present - # but can be explicitly set if desired + # default settings that can be over-ridden for specific builds + default: + + # min_date is set by default to 1 year before present + min_date: "1Y" + + # max_date is set by default to present date - recent_days_to_censor + + # KDE bandwidths in proportion of a year to use per strain. + # using 1M bandwidth by default + narrow_bandwidth: 0.0833 + + + # settings that can be over-ridden across all builds, but not for specific builds + recent_days_to_censor: 0 # Number of weeks between pivots pivot_interval: 1 @@ -146,9 +159,7 @@ frequencies: # Measure pivots in weeks pivot_interval_units: "weeks" - # KDE bandwidths in proportion of a year to use per strain. - # using 15 day bandwidth - narrow_bandwidth: 0.041 + # Weight of KDE that uses wide bandwidth proportion_wide: 0.0 # Diffusion frequency settings diff --git a/docs/src/reference/workflow-config-file.rst b/docs/src/reference/workflow-config-file.rst index 23f9ed789..bc3aa17a6 100644 --- a/docs/src/reference/workflow-config-file.rst +++ b/docs/src/reference/workflow-config-file.rst @@ -983,13 +983,30 @@ columns frequencies ----------- -- Valid attributes: +- type: object +- description: Parameters for specifying tip frequency calculations via ``augur frequencies`` +- examples: + +.. code:: yaml + + frequencies: + pivot_interval_units: "weeks" + default: + min_date: "6M" + narrow_bandwidth: 0.038 + global_1m: + min_date: "1M" + narrow_bandwidth: 0.019 + global_2020_to_2022: + min_date: "2020-01-01" + max_date: "2022-01-01" + narrow_bandwidth: 0.076 + +Each named traits configuration (``default`` or build-named) supports specification of ``min_date``, ``max_date`` and ``narrow_bandwidth``. Other parameters can only be specified across all builds. .. contents:: :local: -.. _min_date-1: - min_date ~~~~~~~~ diff --git a/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml b/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml index 41363741a..7345d6010 100644 --- a/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml +++ b/nextstrain_profiles/nextstrain-gisaid-21L/builds.yaml @@ -896,115 +896,62 @@ traits: # narrow_bandwidth = 0.019 or 7 days for "1m" and "2m" # narrow_bandwidth = 0.038 or 14 days for "6m" and "all-time" frequencies: + default: + min_date: "2020-01-01" + narrow_bandwidth: 0.038 global_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 global_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 global_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - global_all-time: - min_date: "2022-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 africa_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 africa_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 africa_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - africa_all-time: - min_date: "2022-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 asia_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 asia_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 asia_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - asia_all-time: - min_date: "2022-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 europe_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 europe_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 europe_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - europe_all-time: - min_date: "2022-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 north-america_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 north-america_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 north-america_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - north-america_all-time: - min_date: "2022-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 oceania_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 oceania_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 oceania_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - oceania_all-time: - min_date: "2022-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 south-america_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 south-america_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 south-america_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - south-america_all-time: - min_date: "2022-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 diff --git a/nextstrain_profiles/nextstrain-gisaid/builds.yaml b/nextstrain_profiles/nextstrain-gisaid/builds.yaml index ab07dcc63..65e4cec89 100644 --- a/nextstrain_profiles/nextstrain-gisaid/builds.yaml +++ b/nextstrain_profiles/nextstrain-gisaid/builds.yaml @@ -887,119 +887,62 @@ traits: # narrow_bandwidth = 0.019 or 7 days for "1m" and "2m" # narrow_bandwidth = 0.038 or 14 days for "6m" and "all-time" frequencies: - reference: + default: min_date: "2020-01-01" narrow_bandwidth: 0.038 - recent_days_to_censor: 7 global_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 global_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 global_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - global_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 africa_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 africa_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 africa_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - africa_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 asia_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 asia_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 asia_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - asia_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 europe_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 europe_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 europe_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - europe_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 north-america_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 north-america_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 north-america_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - north-america_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 oceania_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 oceania_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 oceania_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - oceania_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 south-america_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 south-america_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 south-america_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - south-america_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 diff --git a/nextstrain_profiles/nextstrain-open/builds.yaml b/nextstrain_profiles/nextstrain-open/builds.yaml index e39f59da7..5e9d9755f 100644 --- a/nextstrain_profiles/nextstrain-open/builds.yaml +++ b/nextstrain_profiles/nextstrain-open/builds.yaml @@ -892,119 +892,62 @@ traits: # narrow_bandwidth = 0.019 or 7 days for "1m" and "2m" # narrow_bandwidth = 0.038 or 14 days for "6m" and "all-time" frequencies: - reference: + default: min_date: "2020-01-01" narrow_bandwidth: 0.038 - recent_days_to_censor: 7 global_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 global_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 global_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - global_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 africa_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 africa_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 africa_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - africa_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 asia_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 asia_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 asia_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - asia_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 europe_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 europe_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 europe_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - europe_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 north-america_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 north-america_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 north-america_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - north-america_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 oceania_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 oceania_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 oceania_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - oceania_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 south-america_1m: min_date: "1M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 south-america_2m: min_date: "2M" narrow_bandwidth: 0.019 - recent_days_to_censor: 7 south-america_6m: min_date: "6M" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 - south-america_all-time: - min_date: "2020-01-01" - narrow_bandwidth: 0.038 - recent_days_to_censor: 7 diff --git a/workflow/snakemake_rules/common.smk b/workflow/snakemake_rules/common.smk index 0c2713950..290716ec1 100644 --- a/workflow/snakemake_rules/common.smk +++ b/workflow/snakemake_rules/common.smk @@ -183,8 +183,8 @@ def _get_sampling_bias_correction_for_wildcards(wildcards): def _get_min_date_for_frequencies(wildcards): if wildcards.build_name in config["frequencies"] and "min_date" in config["frequencies"][wildcards.build_name]: return config["frequencies"][wildcards.build_name]["min_date"] - elif "frequencies" in config and "min_date" in config["frequencies"]: - return config["frequencies"]["min_date"] + elif "frequencies" in config and "min_date" in config["frequencies"]["default"]: + return config["frequencies"]["default"]["min_date"] else: # If not explicitly specified, default to 1 year back from the present min_date_cutoff = datetime.date.today() - datetime.timedelta(weeks=52) @@ -195,8 +195,8 @@ def _get_min_date_for_frequencies(wildcards): def _get_max_date_for_frequencies(wildcards): if wildcards.build_name in config["frequencies"] and "max_date" in config["frequencies"][wildcards.build_name]: return config["frequencies"][wildcards.build_name]["max_date"] - elif "frequencies" in config and "max_date" in config["frequencies"]: - return config["frequencies"]["max_date"] + elif "frequencies" in config and "max_date" in config["frequencies"]["default"]: + return config["frequencies"]["default"]["max_date"] else: # Allow users to censor the N most recent days to minimize effects of # uneven recent sampling. @@ -207,6 +207,17 @@ def _get_max_date_for_frequencies(wildcards): datetime.date.today() - offset ) +def _get_narrow_bandwidth_for_wildcards(wildcards): + # check if builds.yaml contains frequencies:{build_name}:narrow_bandwidth + if wildcards.build_name in config["frequencies"] and 'narrow_bandwidth' in config["frequencies"][wildcards.build_name]: + return config["frequencies"][wildcards.build_name]["narrow_bandwidth"] + # check if parameters.yaml contains frequencies:default:narrow_bandwidth + elif "frequencies" in config and "narrow_bandwidth" in config["frequencies"]["default"]: + return config["frequencies"]["default"]["narrow_bandwidth"] + # else return augur frequencies default value + else: + return 0.0833 + def _get_upload_inputs(wildcards): # Do whatever the configuration says if it has opinions. if "upload" in config: diff --git a/workflow/snakemake_rules/main_workflow.smk b/workflow/snakemake_rules/main_workflow.smk index 2bd018d25..10c19542f 100644 --- a/workflow/snakemake_rules/main_workflow.smk +++ b/workflow/snakemake_rules/main_workflow.smk @@ -1164,7 +1164,7 @@ rule tip_frequencies: max_date = _get_max_date_for_frequencies, pivot_interval = config["frequencies"]["pivot_interval"], pivot_interval_units = config["frequencies"]["pivot_interval_units"], - narrow_bandwidth = config["frequencies"]["narrow_bandwidth"], + narrow_bandwidth = _get_narrow_bandwidth_for_wildcards, proportion_wide = config["frequencies"]["proportion_wide"] resources: # Memory use scales primarily with the size of the metadata file.