Fix single ranking criterion bug (#128)

* fix bug * another small fix
matsengrp · Apr 12, 2024 · 4741dfd · 4741dfd
1 parent e2910b7
commit 4741dfd
Show file tree

Hide file tree

Showing 2 changed files with 38 additions and 10 deletions.
diff --git a/gctree/branching_processes.py b/gctree/branching_processes.py
@@ -1204,7 +1204,7 @@ def filter_trees(  # noqa: C901
             nz_coeff_bplikelihood,
             nz_coeff_isotype_pars,
             nz_coeff_context,
-            nz_coeff_alleles,
+            _,
         ) = [val != 0 for val in coeffs]
         coeff_bplikelihood, coeff_isotype_pars, coeff_context, coeff_alleles = coeffs
 
@@ -1245,13 +1245,33 @@ def filter_trees(  # noqa: C901
                     splits=[] if chain_split is None else [chain_split],
                 )
             dag_filters.append((mut_funcs, coeff_context))
-        if nz_coeff_alleles:
-            allele_funcs = _allele_dagfuncs()
-            dag_filters.append((allele_funcs, coeff_alleles))
+
+        # add allele funcs no matter what, for logging
+        allele_funcs = _allele_dagfuncs()
+        dag_filters.append((allele_funcs, coeff_alleles))
+        # add 0-returning functions so dagfuncs return tuples, even if allele funcs are the only ones used for filtering
+        dag_filters.append(
+            (
+                hdag.utils.HistoryDagFilter(
+                    hdag.utils.AddFuncDict(
+                        {
+                            "start_func": lambda n: 0,
+                            "edge_weight_func": lambda n1, n2: 0,
+                            "accum_func": lambda ls: 0,
+                        },
+                        name="",
+                    ),
+                    min,
+                    ordering_name="",
+                ),
+                0,
+            )
+        )
 
         combined_dag_filter = functools.reduce(
             lambda x, y: x + y, (dag_filter for dag_filter, _ in dag_filters)
         )
+
         if ranking_coeffs:
             if len(ranking_coeffs) != 3:
                 raise ValueError(
@@ -1298,6 +1318,7 @@ def linear_combinator(weighttuple):
                 + " + ".join(
                     str(coeff) + "(" + fl.weight_funcs.name + ")"
                     for fl, coeff in dag_filters
+                    if coeff != 0
                 )
             )
         else:
@@ -1308,6 +1329,7 @@ def linear_combinator(weighttuple):
                     ranking_dag_filter.ordering_names,
                     ranking_dag_filter.weight_funcs.names,
                 )
+                if ord_name != ""
             )
         if verbose:
             print(ranking_description)
@@ -1324,16 +1346,17 @@ def reformat(field, n=10):
             print("\n" + title + ":", file=file)
             statstring = "\t".join(
                 tuple(
-                    reformat(dfilter.weight_funcs.name, n=14)
-                    for dfilter, _ in dag_filters
+                    reformat(dfilter.weight_funcs.name, n=15)
+                    for dfilter, _ in dag_filters[:-1]
                 )
             )
             print(
                 f"tree     \t{statstring}" + ("\ttreescore" if show_score else ""),
                 file=file,
             )
             for j, best_weighttuple in enumerate(statlist, 1):
-                statstring = "\t".join(reformat(it) for it in best_weighttuple)
+                # ignore always-0 entry at end:
+                statstring = "\t".join(reformat(it) for it in best_weighttuple[:-1])
                 print(
                     f"{j:<10}\t{statstring}"
                     + (
@@ -1396,7 +1419,9 @@ def reformat(field, n=10):
                 minfunckey = ranking_dag_filter.optimal_func
             dag_ls.sort(key=minfunckey)
 
-            df = pd.DataFrame(dag_ls, columns=combined_dag_filter.weight_funcs.names)
+            df = pd.DataFrame(
+                dag_ls, columns=combined_dag_filter.weight_funcs.names
+            ).drop(columns=[""])
             df.to_csv(outbase + ".tree_stats.csv")
             df["set"] = ["all_trees"] * len(df)
             bestdf = pd.DataFrame(
@@ -1405,7 +1430,7 @@ def reformat(field, n=10):
             bestdf["set"] = ["best_tree"]
             toplot_df = pd.concat([df, bestdf], ignore_index=True)
             pplot = sns.pairplot(
-                toplot_df.drop(["Alleles"], errors="ignore"),
+                toplot_df.drop(columns=["Alleles", ""], errors="ignore"),
                 hue="set",
                 diag_kind="hist",
             )

diff --git a/tests/smalltest.sh b/tests/smalltest.sh
@@ -8,11 +8,14 @@ mkdir -p tests/smalltest_output
 wget -O HS5F_Mutability.csv https://bitbucket.org/kleinstein/shazam/raw/ba4b30fc6791e2cfd5712e9024803c53b136e664/data-raw/HS5F_Mutability.csv
 wget -O HS5F_Substitution.csv https://bitbucket.org/kleinstein/shazam/raw/ba4b30fc6791e2cfd5712e9024803c53b136e664/data-raw/HS5F_Substitution.csv
 
+gctree infer tests/small_outfile tests/abundances.csv --outbase tests/smalltest_output/gctree.infer  --root GL --frame 1 --verbose --idlabel
+
+gctree infer tests/small_outfile tests/abundances.csv --outbase tests/smalltest_output/gctree.infer --root GL --frame 1 --verbose --idlabel --idmapfile tests/idmap.txt --isotype_mapfile tests/isotypemap.txt --mutability HS5F_Mutability.csv --substitution HS5F_Substitution.csv --ranking_coeffs 0 1 0 --use_old_mut_parsimony --branching_process_ranking_coeff 0 
+
 gctree infer tests/small_outfile tests/abundances.csv --outbase tests/smalltest_output/gctree.infer --root GL --frame 1 --verbose --idlabel --idmapfile tests/idmap.txt --isotype_mapfile tests/isotypemap.txt --mutability HS5F_Mutability.csv --substitution HS5F_Substitution.csv --ranking_coeffs 1 1 0 --use_old_mut_parsimony --branching_process_ranking_coeff 0 
 
 gctree infer tests/small_outfile tests/abundances.csv --outbase tests/smalltest_output/gctree.infer --root GL --frame 1 --verbose --idlabel --idmapfile tests/idmap.txt --isotype_mapfile tests/isotypemap.txt --mutability HS5F_Mutability.csv --substitution HS5F_Substitution.csv --ranking_coeffs .01 -1 0 --branching_process_ranking_coeff -1 --summarize_forest --tree_stats
 
-gctree infer tests/small_outfile tests/abundances.csv --outbase tests/smalltest_output/gctree.infer  --root GL --frame 1 --verbose --idlabel
 
 gctree infer tests/small_outfile tests/abundances.csv --outbase tests/smalltest_output/gctree.infer --root GL --frame 1 --verbose --idlabel --idmapfile tests/idmap.txt --isotype_mapfile tests/isotypemap.txt