Skip to content

Commit

Permalink
Move grp filtering before counts
Browse files Browse the repository at this point in the history
  • Loading branch information
mike-w-wilson committed Jun 20, 2024
1 parent 8ff42ee commit c831c42
Showing 1 changed file with 11 additions and 13 deletions.
24 changes: 11 additions & 13 deletions gnomad_qc/v4/analyses/grpmax_comps.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ def version_stats(
t_ht = filter_to_threshold(
p_ht, threshold, version=version, eur_filter=eur_filter
)
t_ht = t_ht.filter(
hl.literal(DIVERSE_GRPS).contains(t_ht.grpmax_ga)
)
t_ht = t_ht.checkpoint(
f"gs://gnomad-tmp-4day/grpmax_comps_{version}_{grp_id}_{threshold}.ht",
overwrite=True,
Expand All @@ -146,10 +149,6 @@ def version_stats(
t_ht.count() / t_variants * 100,
)

t_ht = t_ht.filter(
hl.literal(DIVERSE_GRPS).contains(t_ht.grpmax_ga)
)

# For each diverse genetic ancestry group, aggregate the number of
# variants where that group is grpmax
counts_by_thresholds[threshold] = t_ht.aggregate(
Expand Down Expand Up @@ -282,15 +281,14 @@ def main(args):
msg = ""
ht = process_consequences(ht, has_polyphen=False)

if csq_terms:
logger.info(
"Filtering to keep only %s ...",
(
str(csq_terms) + " variants"
if csq_terms and not non_syn_only
else "non-synonymous variants"
),
)
logger.info(
"Filtering to keep only %s ...",
(
str(csq_terms) + " variants"
if not non_syn_only
else "non-synonymous variants"
),
)
if args.canonical:
vep_csq_expr = ht.vep.worst_csq_for_variant_canonical
msg += " on canonical transcripts"
Expand Down

0 comments on commit c831c42

Please sign in to comment.