Skip to content

Commit

Permalink
Fix single ranking criterion bug (#128)
Browse files Browse the repository at this point in the history
* fix bug

* another small fix
  • Loading branch information
willdumm authored Apr 12, 2024
1 parent e2910b7 commit 4741dfd
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 10 deletions.
43 changes: 34 additions & 9 deletions gctree/branching_processes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1204,7 +1204,7 @@ def filter_trees( # noqa: C901
nz_coeff_bplikelihood,
nz_coeff_isotype_pars,
nz_coeff_context,
nz_coeff_alleles,
_,
) = [val != 0 for val in coeffs]
coeff_bplikelihood, coeff_isotype_pars, coeff_context, coeff_alleles = coeffs

Expand Down Expand Up @@ -1245,13 +1245,33 @@ def filter_trees( # noqa: C901
splits=[] if chain_split is None else [chain_split],
)
dag_filters.append((mut_funcs, coeff_context))
if nz_coeff_alleles:
allele_funcs = _allele_dagfuncs()
dag_filters.append((allele_funcs, coeff_alleles))

# add allele funcs no matter what, for logging
allele_funcs = _allele_dagfuncs()
dag_filters.append((allele_funcs, coeff_alleles))
# add 0-returning functions so dagfuncs return tuples, even if allele funcs are the only ones used for filtering
dag_filters.append(
(
hdag.utils.HistoryDagFilter(
hdag.utils.AddFuncDict(
{
"start_func": lambda n: 0,
"edge_weight_func": lambda n1, n2: 0,
"accum_func": lambda ls: 0,
},
name="",
),
min,
ordering_name="",
),
0,
)
)

combined_dag_filter = functools.reduce(
lambda x, y: x + y, (dag_filter for dag_filter, _ in dag_filters)
)

if ranking_coeffs:
if len(ranking_coeffs) != 3:
raise ValueError(
Expand Down Expand Up @@ -1298,6 +1318,7 @@ def linear_combinator(weighttuple):
+ " + ".join(
str(coeff) + "(" + fl.weight_funcs.name + ")"
for fl, coeff in dag_filters
if coeff != 0
)
)
else:
Expand All @@ -1308,6 +1329,7 @@ def linear_combinator(weighttuple):
ranking_dag_filter.ordering_names,
ranking_dag_filter.weight_funcs.names,
)
if ord_name != ""
)
if verbose:
print(ranking_description)
Expand All @@ -1324,16 +1346,17 @@ def reformat(field, n=10):
print("\n" + title + ":", file=file)
statstring = "\t".join(
tuple(
reformat(dfilter.weight_funcs.name, n=14)
for dfilter, _ in dag_filters
reformat(dfilter.weight_funcs.name, n=15)
for dfilter, _ in dag_filters[:-1]
)
)
print(
f"tree \t{statstring}" + ("\ttreescore" if show_score else ""),
file=file,
)
for j, best_weighttuple in enumerate(statlist, 1):
statstring = "\t".join(reformat(it) for it in best_weighttuple)
# ignore always-0 entry at end:
statstring = "\t".join(reformat(it) for it in best_weighttuple[:-1])
print(
f"{j:<10}\t{statstring}"
+ (
Expand Down Expand Up @@ -1396,7 +1419,9 @@ def reformat(field, n=10):
minfunckey = ranking_dag_filter.optimal_func
dag_ls.sort(key=minfunckey)

df = pd.DataFrame(dag_ls, columns=combined_dag_filter.weight_funcs.names)
df = pd.DataFrame(
dag_ls, columns=combined_dag_filter.weight_funcs.names
).drop(columns=[""])
df.to_csv(outbase + ".tree_stats.csv")
df["set"] = ["all_trees"] * len(df)
bestdf = pd.DataFrame(
Expand All @@ -1405,7 +1430,7 @@ def reformat(field, n=10):
bestdf["set"] = ["best_tree"]
toplot_df = pd.concat([df, bestdf], ignore_index=True)
pplot = sns.pairplot(
toplot_df.drop(["Alleles"], errors="ignore"),
toplot_df.drop(columns=["Alleles", ""], errors="ignore"),
hue="set",
diag_kind="hist",
)
Expand Down
5 changes: 4 additions & 1 deletion tests/smalltest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,14 @@ mkdir -p tests/smalltest_output
wget -O HS5F_Mutability.csv https://bitbucket.org/kleinstein/shazam/raw/ba4b30fc6791e2cfd5712e9024803c53b136e664/data-raw/HS5F_Mutability.csv
wget -O HS5F_Substitution.csv https://bitbucket.org/kleinstein/shazam/raw/ba4b30fc6791e2cfd5712e9024803c53b136e664/data-raw/HS5F_Substitution.csv

gctree infer tests/small_outfile tests/abundances.csv --outbase tests/smalltest_output/gctree.infer --root GL --frame 1 --verbose --idlabel

gctree infer tests/small_outfile tests/abundances.csv --outbase tests/smalltest_output/gctree.infer --root GL --frame 1 --verbose --idlabel --idmapfile tests/idmap.txt --isotype_mapfile tests/isotypemap.txt --mutability HS5F_Mutability.csv --substitution HS5F_Substitution.csv --ranking_coeffs 0 1 0 --use_old_mut_parsimony --branching_process_ranking_coeff 0

gctree infer tests/small_outfile tests/abundances.csv --outbase tests/smalltest_output/gctree.infer --root GL --frame 1 --verbose --idlabel --idmapfile tests/idmap.txt --isotype_mapfile tests/isotypemap.txt --mutability HS5F_Mutability.csv --substitution HS5F_Substitution.csv --ranking_coeffs 1 1 0 --use_old_mut_parsimony --branching_process_ranking_coeff 0

gctree infer tests/small_outfile tests/abundances.csv --outbase tests/smalltest_output/gctree.infer --root GL --frame 1 --verbose --idlabel --idmapfile tests/idmap.txt --isotype_mapfile tests/isotypemap.txt --mutability HS5F_Mutability.csv --substitution HS5F_Substitution.csv --ranking_coeffs .01 -1 0 --branching_process_ranking_coeff -1 --summarize_forest --tree_stats

gctree infer tests/small_outfile tests/abundances.csv --outbase tests/smalltest_output/gctree.infer --root GL --frame 1 --verbose --idlabel

gctree infer tests/small_outfile tests/abundances.csv --outbase tests/smalltest_output/gctree.infer --root GL --frame 1 --verbose --idlabel --idmapfile tests/idmap.txt --isotype_mapfile tests/isotypemap.txt

Expand Down

0 comments on commit 4741dfd

Please sign in to comment.