Skip to content

Commit

Permalink
fix: push new utils.py file
Browse files Browse the repository at this point in the history
  • Loading branch information
JoshLoecker committed Oct 31, 2024
1 parent 6e3f27b commit aaecc11
Show file tree
Hide file tree
Showing 9 changed files with 55 additions and 35 deletions.
27 changes: 2 additions & 25 deletions main/como/__init__.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,11 @@
from enum import Enum

import pandas as pd

from como.como_utilities import stringlist_to_list
from como.project import Config
from como.utils import stringlist_to_list

__all__ = ["stringlist_to_list", "Config", "RNASeqPreparationMethod"]
__all__ = ["stringlist_to_list", "Config"]
__version__ = "1.10.0"


class RNASeqPreparationMethod(Enum):
TOTAL = "total"
MRNA = "mrna"
SCRNA = "scrna"

@staticmethod
def from_string(value: str) -> "RNASeqPreparationMethod":
match_value = "".join(c for c in value if c.isascii()).lower()

match match_value:
case "total" | "trna":
return RNASeqPreparationMethod.TOTAL
case "mrna":
return RNASeqPreparationMethod.MRNA
case "scrna":
return RNASeqPreparationMethod.SCRNA
case _:
possible_values = [t.value for t in RNASeqPreparationMethod]
raise ValueError(f"Filtering technique must be one of {possible_values}; got: {value}")


def return_placeholder_data() -> pd.DataFrame:
return pd.DataFrame(data=0, index=pd.Index(data=[0], name="entrez_gene_id"), columns=["expressed", "top"])
5 changes: 1 addition & 4 deletions main/como/cluster_rnaseq.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
import rpy2_api
from como_utilities import stringlist_to_list
from project import Config

# read and translate R functions
configs = Config()
r_file_path = Path(configs.code_dir, "rscripts", "cluster_samples.R")
from utils import stringlist_to_list


def main() -> None:
Expand Down
1 change: 1 addition & 0 deletions main/como/create_context_specific_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

from como.como_utilities import Compartments, split_gene_expression_data, stringlist_to_list
from como.project import Config
from como.utils import Compartments, split_gene_expression_data, stringlist_to_list


class Algorithm(Enum):
Expand Down
22 changes: 22 additions & 0 deletions main/como/custom_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from enum import Enum


class RNASeqPreparationMethod(Enum):
TOTAL = "total"
MRNA = "mrna"
SCRNA = "scrna"

@staticmethod
def from_string(value: str) -> "RNASeqPreparationMethod":
match_value = "".join(c for c in value if c.isascii()).lower()

match match_value:
case "total" | "trna":
return RNASeqPreparationMethod.TOTAL
case "mrna":
return RNASeqPreparationMethod.MRNA
case "scrna":
return RNASeqPreparationMethod.SCRNA
case _:
possible_values = [t.value for t in RNASeqPreparationMethod]
raise ValueError(f"Filtering technique must be one of {possible_values}; got: {value}")
1 change: 1 addition & 0 deletions main/como/merge_xomics.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from como.combine_distributions import combine_zscores_main
from como.como_utilities import split_gene_expression_data
from como.project import Config
from como.utils import split_gene_expression_data


class _MergedHeaderNames:
Expand Down
2 changes: 1 addition & 1 deletion main/como/rnaseq.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from scipy.stats import norm
from sklearn.neighbors import KernelDensity

from como import RNASeqPreparationMethod
from como.custom_types import RNASeqPreparationMethod
from como.project import Config


Expand Down
6 changes: 4 additions & 2 deletions main/como/rnaseq_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@

sys.path.insert(0, Path(__file__).parent.parent.as_posix())


import argparse
import asyncio
from dataclasses import dataclass
from typing import Optional

import pandas as pd
from fast_bioservices import Taxon
from loguru import logger

from como import Config, RNASeqPreparationMethod
from como import Config
from como.custom_types import RNASeqPreparationMethod
from como.rnaseq import FilteringTechnique, save_rnaseq_tests


Expand Down
3 changes: 3 additions & 0 deletions main/como/rnaseq_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,19 @@
sys.path.insert(0, Path(__file__).parent.parent.as_posix())

import argparse
import asyncio
import re
from dataclasses import dataclass, field
from typing import Literal, Optional, Union

import numpy as np
import pandas as pd
import scanpy as sc
from fast_bioservices import BioDBNet, Input, Output, Taxon
from loguru import logger

from como import Config, stringlist_to_list
from como.utils import _format_cohersion


@dataclass
Expand Down
23 changes: 20 additions & 3 deletions main/como/utils.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import contextlib
import io
import sre_constants
import sys
from pathlib import Path
from typing import Iterator

import aiofiles
import pandas as pd
import scanpy as sc
from fast_bioservices import BioDBNet, Output, Taxon

__all__ = ["Compartments", "stringlist_to_list", "split_gene_expression_data", "suppress_stdout"]
Expand Down Expand Up @@ -128,7 +132,20 @@ def suppress_stdout() -> Iterator[None]:
sys.stdout = sys.__stdout__


def _format_cohersion(biodbnet: BioDBNet, *, requested_output: Output, input_values: list[str], taxon: Taxon) -> pd.DataFrame:
cohersion = biodbnet.dbFind(values=input_values, output_db=requested_output, taxon=taxon).drop(columns=["Input Type"])
cohersion.columns = pd.Index(["input_value", requested_output.value.replace(" ", "_").lower()])
async def _format_cohersion(biodbnet: BioDBNet, *, requested_output: Output | list[Output], input_values: list[str], taxon: Taxon) -> pd.DataFrame:
requested_output = [requested_output] if isinstance(requested_output, Output) else requested_output
cohersion = (await biodbnet.db_find(values=input_values, output_db=requested_output, taxon=taxon)).drop(columns=["Input Type"])
cohersion.columns = pd.Index(["input_value", *[o.value.replace(" ", "_").lower() for o in requested_output]])
return cohersion


async def _async_read_csv(path: Path, **kwargs) -> pd.DataFrame:
async with aiofiles.open(path, "r") as f:
content = await f.read()
return pd.read_csv(io.StringIO(content), **kwargs)


async def _async_read_excel(path: Path, **kwargs) -> pd.DataFrame:
async with aiofiles.open(path, "rb") as f:
content = await f.read()
return pd.read_excel(io.StringIO(content.decode()), **kwargs)

0 comments on commit aaecc11

Please sign in to comment.