Merge branch 'develop' into remove-hardcoded-paths/rnaseq

HelikarLab · Dec 9, 2024 · 82044bd · 82044bd
2 parents 7226e36 + 0422633
commit 82044bd
Show file tree

Hide file tree

Showing 13 changed files with 1,874 additions and 657 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,8 +1,6 @@
-# See https://pre-commit.com for more information
-# See https://pre-commit.com/hooks.html for more hooks
 repos:
--   repo: https://github.com/opensource-nepal/commitlint
-    rev: v1.2.0
+  - repo: https://github.com/commitizen-tools/commitizen
+    rev: master
     hooks:
-    -   id: commitlint
-        name: Commit Lint
+      - id: commitizen
+        stages: [ commit-msg ]
diff --git a/main/COMO.ipynb b/main/COMO.ipynb
diff --git a/main/como/merge_xomics.py b/main/como/merge_xomics.py
@@ -16,7 +16,7 @@
 
 from como import proteomics_gen, return_placeholder_data
 from como.combine_distributions import _combine_zscores
-from como.custom_types import RNASeqPreparationMethod
+from como.custom_types import RNAPrepMethod
 from como.project import Config
 from como.utils import split_gene_expression_data
 
@@ -93,7 +93,7 @@ def __post_init__(self):
             raise ValueError("Adjust method must be either 'progressive', 'regressive', 'flat', or 'custom'")
 
 
-def _load_rnaseq_tests(filename, context_name, prep_method: RNASeqPreparationMethod) -> tuple[str, pd.DataFrame]:
+def _load_rnaseq_tests(filename, context_name, prep_method: RNAPrepMethod) -> tuple[str, pd.DataFrame]:
     """Load rnaseq results.
 
     Returns a dictionary of test (context, context, cell, etc ) names and rnaseq expression data
@@ -112,11 +112,11 @@ def load_dummy_dict():
         raise FileNotFoundError(f"Error: Config file not found at {inquiry_full_path}")
 
     match prep_method:
-        case RNASeqPreparationMethod.TOTAL:
+        case RNAPrepMethod.TOTAL:
             filename = f"rnaseq_total_{context_name}.csv"
-        case RNASeqPreparationMethod.MRNA:
+        case RNAPrepMethod.MRNA:
             filename = f"rnaseq_mrna_{context_name}.csv"
-        case RNASeqPreparationMethod.SCRNA:
+        case RNAPrepMethod.SCRNA:
             filename = f"rnaseq_scrna_{context_name}.csv"
         case _:
             raise ValueError(
@@ -344,15 +344,9 @@ async def _merge_xomics(
     config = Config()
     logger.info(f"Merging data for {context_name}")
     # load data for each source if it exists. IF not load an empty dummy dataset
-    trnaseq = _load_rnaseq_tests(
-        filename=trnaseq_file, context_name=context_name, prep_method=RNASeqPreparationMethod.TOTAL
-    )
-    mrnaseq = _load_rnaseq_tests(
-        filename=mrnaseq_file, context_name=context_name, prep_method=RNASeqPreparationMethod.MRNA
-    )
-    scrnaseq = _load_rnaseq_tests(
-        filename=scrnaseq_file, context_name=context_name, prep_method=RNASeqPreparationMethod.SCRNA
-    )
+    trnaseq = _load_rnaseq_tests(filename=trnaseq_file, context_name=context_name, prep_method=RNAPrepMethod.TOTAL)
+    mrnaseq = _load_rnaseq_tests(filename=mrnaseq_file, context_name=context_name, prep_method=RNAPrepMethod.MRNA)
+    scrnaseq = _load_rnaseq_tests(filename=scrnaseq_file, context_name=context_name, prep_method=RNAPrepMethod.SCRNA)
     proteomics = proteomics_gen.load_proteomics_tests(filename=proteomics_file, context_name=context_name)
 
     expression_list = []

diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py
@@ -9,7 +9,7 @@
 from loguru import logger
 
 from como import Config
-from como.custom_types import RNASeqPreparationMethod
+from como.custom_types import RNAPrepMethod
 from como.rnaseq import FilteringTechnique, save_rnaseq_tests
 
 
@@ -22,11 +22,11 @@ class _Arguments:
     high_batch_ratio: float
     filtering_technique: FilteringTechnique
     minimum_cutoff: int | str
-    library_prep: RNASeqPreparationMethod
+    library_prep: RNAPrepMethod
     taxon: Taxon
 
     def __post_init__(self):
-        self.library_prep = RNASeqPreparationMethod.from_string(str(self.library_prep))
+        self.library_prep = RNAPrepMethod.from_string(str(self.library_prep))
         self.filtering_technique = FilteringTechnique.from_string(str(self.filtering_technique))
 
         if self.minimum_cutoff is None:
@@ -46,7 +46,7 @@ async def _handle_context_batch(
     batch_ratio_high: float,
     technique: FilteringTechnique,
     cut_off: int | float | str,
-    prep: RNASeqPreparationMethod,
+    prep: RNAPrepMethod,
     taxon: Taxon,
 ) -> None:
     """Iterate through each context type and create rnaseq expression file.
@@ -81,9 +81,9 @@ async def _handle_context_batch(
         rnaseq_input_filepath = (
             config.data_dir / "data_matrices" / context_name / f"gene_counts_matrix_{prep.value}_{context_name}"
         )
-        if prep == RNASeqPreparationMethod.SCRNA:
+        if prep == RNAPrepMethod.SCRNA:
             rnaseq_input_filepath = rnaseq_input_filepath.with_suffix(".h5ad")
-        elif prep in {RNASeqPreparationMethod.TOTAL, RNASeqPreparationMethod.MRNA}:
+        elif prep in {RNAPrepMethod.TOTAL, RNAPrepMethod.MRNA}:
             rnaseq_input_filepath = rnaseq_input_filepath.with_suffix(".csv")
 
         if not rnaseq_input_filepath.exists():
@@ -117,7 +117,7 @@ async def _handle_context_batch(
 async def rnaseq_gen(
     # config_filepath: Path,
     config_filename: str,
-    prep: RNASeqPreparationMethod,
+    prep: RNAPrepMethod,
     taxon_id: int | str | Taxon,
     replicate_ratio: float = 0.5,
     high_replicate_ratio: float = 1.0,