From 4ce9d8c84168c35535de0d2c109c8cf8ff4a2204 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Thu, 5 Dec 2024 07:54:52 -0600 Subject: [PATCH 01/91] feat: initial work to remove hardcoded filepaths Signed-off-by: Josh Loecker --- main/como/custom_types.py | 11 +- main/como/rnaseq_preprocess.py | 678 +++++++++++++++++++-------------- main/como/utils.py | 57 +-- 3 files changed, 427 insertions(+), 319 deletions(-) diff --git a/main/como/custom_types.py b/main/como/custom_types.py index a4fcd19b..0f464203 100644 --- a/main/como/custom_types.py +++ b/main/como/custom_types.py @@ -1,4 +1,9 @@ +from __future__ import annotations + from enum import Enum +from pathlib import Path + +from fast_bioservices import Taxon class RNASeqPreparationMethod(Enum): @@ -7,7 +12,7 @@ class RNASeqPreparationMethod(Enum): SCRNA = "scrna" @staticmethod - def from_string(value: str) -> "RNASeqPreparationMethod": + def from_string(value: str) -> RNASeqPreparationMethod: """Build a preparation method object from a string.""" match_value = "".join(c for c in value if c.isascii()).lower() @@ -21,3 +26,7 @@ def from_string(value: str) -> "RNASeqPreparationMethod": case _: possible_values = [t.value for t in RNASeqPreparationMethod] raise ValueError(f"Filtering technique must be one of {possible_values}; got: {value}") + + +type_taxon = Taxon | int | str +type_path = str | Path diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index c522f0f1..ee02948c 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -4,27 +4,36 @@ import asyncio import re from dataclasses import dataclass, field +from io import StringIO from pathlib import Path -from typing import Literal +from typing import Literal, NamedTuple +import aiofiles import numpy as np import pandas as pd import scanpy as sc -from fast_bioservices import Input, Taxon from fast_bioservices.biothings.mygene import MyGene from loguru import logger -from como import Config, stringlist_to_list -from como.utils import convert_gene_data +from como.custom_types import type_path, type_taxon +from como.utils import _listify, convert_gene_data +type_rna = Literal["total", "mrna"] -@dataclass -class _Arguments: + +class _Arguments(NamedTuple): context_names: list[str] - taxon_id: Taxon | int | str - mode: Literal["create", "provide"] - input_format: str - provided_matrix_fname: str = None + mode: list[Literal["create", "provide"]] + taxon_id: list[str] + input_como_dirpath: list[Path] | None + input_matrix_filepath: list[Path] | None + output_gene_info_filepath: Path | None + output_count_matrices_dir: list[Path] | None + output_trna_config_filepath: Path | None + output_mrna_config_filepath: Path | None + output_trna_count_matrix: list[Path] | None + output_mrna_count_matrix: list[Path] | None + cache: bool @dataclass @@ -41,27 +50,28 @@ def num_genes(self): return len(self.count_matrix) @classmethod - def build_from_tab(cls, filepath: Path) -> _STARinformation: + async def build_from_tab(cls, filepath: Path) -> _STARinformation: if filepath.suffix != ".tab": raise ValueError(f"Building STAR information requires a '.tab' file; received: '{filepath}'") - with filepath.open("r") as i_stream: - num_unmapped = [int(i) for i in next(i_stream).rstrip("\n").split("\t")[1:]] - num_multimapping = [int(i) for i in next(i_stream).rstrip("\n").split("\t")[1:]] - num_no_feature = [int(i) for i in next(i_stream).rstrip("\n").split("\t")[1:]] - num_ambiguous = [int(i) for i in next(i_stream).rstrip("\n").split("\t")[1:]] - - df = pd.read_csv( - filepath, - sep="\t", - skiprows=4, - names=[ - "ensembl_gene_id", - "unstranded_rna_counts", - "first_read_transcription_strand", - "second_read_transcription_strand", - ], - ) - # Remove NA values + + async with aiofiles.open(filepath) as i_stream: + unmapped, multimapping, no_feature, ambiguous = await asyncio.gather( + *[i_stream.readline(), i_stream.readline(), i_stream.readline(), i_stream.readline()] + ) + num_unmapped = [int(i) for i in unmapped.rstrip("\n").split("\t")[1:]] + num_multimapping = [int(i) for i in multimapping.rstrip("\n").split("\t")[1:]] + num_no_feature = [int(i) for i in no_feature.rstrip("\n").split("\t")[1:]] + num_ambiguous = [int(i) for i in ambiguous.rstrip("\n").split("\t")[1:]] + remainder = await i_stream.read() + + string_io = StringIO(remainder) + df = pd.read_csv(string_io, sep="\t", header=None) + df.columns = [ + "ensembl_gene_id", + "unstranded_rna_counts", + "first_read_transcription_strand", + "second_read_transcription_strand", + ] df = df[~df["ensembl_gene_id"].isna()] return _STARinformation( num_unmapped=num_unmapped, @@ -77,7 +87,7 @@ def build_from_tab(cls, filepath: Path) -> _STARinformation: class _StudyMetrics: study_name: str count_files: list[Path] - strandedness_files: list[Path] + strand_files: list[Path] __sample_names: list[str] = field(default_factory=list) __num_samples: int = 0 @@ -93,10 +103,10 @@ def __post_init__(self): self.__num_samples = len(self.count_files) self.__sample_names = [f.stem for f in self.count_files] - if len(self.count_files) != len(self.strandedness_files): + if len(self.count_files) != len(self.strand_files): raise ValueError( f"Unequal number of count files and strand files for study '{self.study_name}'. " - f"Found {len(self.count_files)} count files and {len(self.strandedness_files)} strand files." + f"Found {len(self.count_files)} count files and {len(self.strand_files)} strand files." ) if self.num_samples != len(self.count_files): @@ -105,44 +115,36 @@ def __post_init__(self): f"Found {self.num_samples} samples and {len(self.count_files)} count files." ) - if self.num_samples != len(self.strandedness_files): + if self.num_samples != len(self.strand_files): raise ValueError( f"Unequal number of samples and strand files for study '{self.study_name}'. " - f"Found {self.num_samples} samples and {len(self.strandedness_files)} strand files." + f"Found {self.num_samples} samples and {len(self.strand_files)} strand files." ) if self.__num_samples == 1: raise ValueError(f"Only one sample exists for study {self.study_name}. Provide at least two samples") self.count_files.sort() - self.strandedness_files.sort() + self.strand_files.sort() self.__sample_names.sort() -def _context_from_filepath(file: Path) -> str: - return file.stem.split("_S")[0] - - def _sample_name_from_filepath(file: Path) -> str: return re.search(r".+_S\d+R\d+", file.stem).group() def _organize_gene_counts_files(data_dir: Path) -> list[_StudyMetrics]: - root_gene_count_dir = Path(data_dir, "geneCounts") - root_strandedness_dir = Path(data_dir, "strandedness") + gene_count_dir = Path(data_dir, "geneCounts").resolve() + strand_dir = Path(data_dir, "strandedness").resolve() - gene_counts_directories: list[Path] = sorted( - [p for p in Path(root_gene_count_dir).glob("*") if not p.name.startswith(".")] - ) - strandedness_directories: list[Path] = sorted( - [p for p in Path(root_strandedness_dir).glob("*") if not p.name.startswith(".")] - ) + gene_counts_directories: list[Path] = sorted([p for p in gene_count_dir.glob("*") if not p.name.startswith(".")]) + strandedness_directories: list[Path] = sorted([p for p in strand_dir.glob("*") if not p.name.startswith(".")]) if len(gene_counts_directories) != len(strandedness_directories): raise ValueError( f"Unequal number of gene count directories and strandedness directories. " f"Found {len(gene_counts_directories)} gene count directories and {len(strandedness_directories)} strandedness directories." # noqa: E501 - f"\nGene count directory: {root_gene_count_dir}\nStrandedness directory: {root_strandedness_dir}" + f"\nGene count directory: {gene_count_dir}\nStrandedness directory: {strand_dir}" ) # For each study, collect gene count files, fragment files, insert size files, layouts, and strandedness information @@ -157,16 +159,16 @@ def _organize_gene_counts_files(data_dir: Path) -> list[_StudyMetrics]: _StudyMetrics( study_name=gene_dir.stem, count_files=list(gene_dir.glob("*.tab")), - strandedness_files=list(strand_dir.glob("*.txt")), + strand_files=list(strand_dir.glob("*.txt")), ) ) return study_metrics -def _process_first_multirun_sample(strand_file: Path, all_counts_files: list[Path]): +async def _process_first_multirun_sample(strand_file: Path, all_counts_files: list[Path]): sample_count = pd.DataFrame() for file in all_counts_files: - star_information = _STARinformation.build_from_tab(file) + star_information = await _STARinformation.build_from_tab(file) strand_information = strand_file.read_text().rstrip("\n").lower() if strand_information not in ("none", "first_read_transcription_strand", "second_read_transcription_strand"): @@ -196,8 +198,8 @@ def _process_first_multirun_sample(strand_file: Path, all_counts_files: list[Pat return count_sums -def _process_standard_replicate(counts_file: Path, strand_file: Path, sample_name: str): - star_information = _STARinformation.build_from_tab(counts_file) +async def _process_standard_replicate(counts_file: Path, strand_file: Path, sample_name: str): + star_information = await _STARinformation.build_from_tab(counts_file) strand_information = strand_file.read_text().rstrip("\n").lower() if strand_information not in ("none", "first_read_transcription_strand", "second_read_transcription_strand"): @@ -214,7 +216,7 @@ def _process_standard_replicate(counts_file: Path, strand_file: Path, sample_nam return sample_count -def _prepare_sample_counts( +async def _prepare_sample_counts( sample_name: str, counts_file: Path, strand_file: Path, @@ -222,27 +224,27 @@ def _prepare_sample_counts( ) -> pd.DataFrame | Literal["SKIP"]: # Test if the counts_file is the first run in a multi-run smaple if re.search(r"R\d+r1", counts_file.as_posix()): - return _process_first_multirun_sample(strand_file=strand_file, all_counts_files=all_counts_files) + return await _process_first_multirun_sample(strand_file=strand_file, all_counts_files=all_counts_files) elif re.search(r"R\d+r\d+", counts_file.as_posix()): return "SKIP" else: - return _process_standard_replicate(counts_file, strand_file, sample_name) + return await _process_standard_replicate(counts_file, strand_file, sample_name) async def _create_sample_counts_matrix(metrics: _StudyMetrics) -> pd.DataFrame: adjusted_index = 0 - counts: pd.DataFrame | Literal["SKIP"] = _prepare_sample_counts( + counts: pd.DataFrame | Literal["SKIP"] = await _prepare_sample_counts( sample_name=metrics.sample_names[0], counts_file=metrics.count_files[0], - strand_file=metrics.strandedness_files[0], + strand_file=metrics.strand_files[0], all_counts_files=metrics.count_files, ) for i in range(1, metrics.num_samples): - new_counts = _prepare_sample_counts( + new_counts = await _prepare_sample_counts( sample_name=metrics.sample_names[i], counts_file=metrics.count_files[i], - strand_file=metrics.strandedness_files[i], + strand_file=metrics.strand_files[i], all_counts_files=metrics.count_files, ) if isinstance(new_counts, str) and new_counts == "SKIP": @@ -260,35 +262,38 @@ async def _create_sample_counts_matrix(metrics: _StudyMetrics) -> pd.DataFrame: return counts -async def _create_counts_matrix(context_name: str, config: Config): - """Create a counts matrix by reading gene counts table(s).""" - data_dir = config.data_dir / "COMO_input" / context_name - matrix_output_dir = config.data_dir / "data_matrices" / context_name - - study_metrics = _organize_gene_counts_files(data_dir=data_dir) - final_matrix: pd.DataFrame = pd.DataFrame() +async def _write_counts_matrix( + *, + config_df: pd.DataFrame, + como_context_dir: Path, + output_counts_matrix_filepath: Path, + rna_type: type_rna, +) -> pd.DataFrame: + """Create a counts matrix file by reading gene counts table(s).""" + study_metrics = _organize_gene_counts_files(data_dir=como_context_dir) + counts: list[pd.DataFrame] = await asyncio.gather( + *[_create_sample_counts_matrix(metric) for metric in study_metrics] + ) + final_matrix = pd.DataFrame() + for count in counts: + final_matrix = count if final_matrix.empty else pd.merge(final_matrix, count, on="ensembl_gene_id", how="outer") - for metric in study_metrics: - counts: pd.DataFrame = await _create_sample_counts_matrix(metric) - final_matrix = ( - counts if final_matrix.empty else pd.merge(final_matrix, counts, on="ensembl_gene_id", how="outer") - ) + rna_specific_sample_names = config_df.loc[config_df["library_prep"] == rna_type, "sample_name"].tolist() + final_matrix = final_matrix[["ensembl_gene_id", *rna_specific_sample_names]] - output_filename = matrix_output_dir / f"gene_counts_matrix_full_{data_dir.stem}.csv" - output_filename.parent.mkdir(parents=True, exist_ok=True) - final_matrix.to_csv(output_filename, index=False) - logger.success(f"Wrote gene count matrix for '{data_dir.stem}' at '{output_filename}'") + output_counts_matrix_filepath.parent.mkdir(parents=True, exist_ok=True) + final_matrix.to_csv(output_counts_matrix_filepath, index=False) + logger.success(f"Wrote gene count matrix at '{output_counts_matrix_filepath}'") + return final_matrix -async def _create_config_df(context_name: str) -> pd.DataFrame: # noqa: C901 +async def _create_config_df(context_name: str, /, como_input_dir: Path) -> pd.DataFrame: # noqa: C901 """Create configuration sheet. The configuration file created is based on the gene counts matrix. If using zFPKM normalization technique, mean fragment lengths will be fetched """ - config = Config() - gene_counts_files = list(Path(config.data_dir, "COMO_input", context_name, "geneCounts").rglob("*.tab")) - + gene_counts_files = list(Path(como_input_dir, context_name, "geneCounts").rglob("*.tab")) sample_names: list[str] = [] fragment_lengths: list[int | float] = [] layouts: list[str] = [] @@ -329,19 +334,12 @@ async def _create_config_df(context_name: str) -> pd.DataFrame: # noqa: C901 r_label = re.findall(r"r\d{1,3}", r.as_posix())[0] R_label = re.findall(r"R\d{1,3}", r.as_posix())[0] # noqa: N806 frag_filename = "".join([context_name, "_", study_number, R_label, r_label, "_fragment_size.txt"]) - frag_files.append( - config.data_dir / "COMO_input" / context_name / "fragmentSizes" / study_number / frag_filename - ) - - context_path = config.data_dir / "COMO_input" / context_name + frag_files.append(como_input_dir / context_name / "fragmentSizes" / study_number / frag_filename) + context_path = como_input_dir / context_name layout_files: list[Path] = list((context_path / "layouts").rglob(f"{context_name}_{label}_layout.txt")) - strand_files: list[Path] = list( - (context_path / "strandedness").rglob(f"{context_name}_{label}_strandedness.txt") - ) - frag_files: list[Path] = list( - (context_path / "fragmentSizes").rglob(f"{context_name}_{label}_fragment_size.txt") - ) + strand_files: list[Path] = list((context_path / "strandedness").rglob(f"{context_name}_{label}_strandedness.txt")) # fmt: skip # noqa: E501 + frag_files: list[Path] = list((context_path / "fragmentSizes").rglob(f"{context_name}_{label}_fragment_size.txt")) # fmt: skip # noqa: E501 prep_files: list[Path] = list((context_path / "prepMethods").rglob(f"{context_name}_{label}_prep_method.txt")) layout = "UNKNOWN" @@ -351,7 +349,7 @@ async def _create_config_df(context_name: str) -> pd.DataFrame: # noqa: C901 f"this should be defined by user if using zFPKM or rnaseq_gen.py will not run" ) elif len(layout_files) == 1: - with layout_files[0].open("w") as file: + with layout_files[0].open("r") as file: layout = file.read().strip() elif len(layout_files) > 1: raise ValueError( @@ -367,7 +365,7 @@ async def _create_config_df(context_name: str) -> pd.DataFrame: # noqa: C901 f"infer the strandedness when writing the counts matrix" ) elif len(strand_files) == 1: - with strand_files[0].open("w") as file: + with strand_files[0].open("r") as file: strand = file.read().strip() elif len(strand_files) > 1: raise ValueError( @@ -379,7 +377,7 @@ async def _create_config_df(context_name: str) -> pd.DataFrame: # noqa: C901 if len(prep_files) == 0: logger.warning(f"No prep file found for {label}, assuming 'total' as in Total RNA library preparation") elif len(prep_files) == 1: - with prep_files[0].open("w") as file: + with prep_files[0].open("r") as file: prep = file.read().strip().lower() if prep not in ["total", "mrna"]: raise ValueError(f"Prep method must be either 'total' or 'mrna' for {label}") @@ -441,55 +439,29 @@ async def _create_config_df(context_name: str) -> pd.DataFrame: # noqa: C901 return out_df -def _split_config_df(df): - """Split a config dataframe to two. - - One for Total RNA library prep, one for mRNA - """ - df_t = df[df["library_prep"] == "total"] - df_m = df[df["library_prep"] == "mrna"] - - return df_t, df_m - - -def _split_counts_matrices( - count_matrix_all: Path, df_total: pd.DataFrame, df_mrna: pd.DataFrame -) -> tuple[pd.DataFrame, pd.DataFrame]: - """Split a counts-matrix dataframe to two. - - One for Total RNA library prep, one for mRNA - """ - logger.info(f"Reading gene count matrix file at '{count_matrix_all}'") - matrix_all = pd.read_csv(count_matrix_all) - matrix_total = matrix_all[ - ["ensembl_gene_id"] + [n for n in matrix_all.columns if n in df_total["sample_name"].tolist()] - ] - matrix_mrna = matrix_all[ - ["ensembl_gene_id"] + [n for n in matrix_all.columns if n in df_mrna["sample_name"].tolist()] - ] - - return matrix_total, matrix_mrna - - async def _create_gene_info_file( *, - matrix_files: list[Path], - taxon_id, - config: Config, + counts_matrix: pd.DataFrame | Path, + output_filepath: Path, + taxon_id: type_taxon, cache: bool, ): """Create gene info file for specified context by reading first column in its count matrix file.""" logger.info("Fetching gene info") - genes = set() - for file in matrix_files: - data: pd.DataFrame | sc.AnnData = pd.read_csv(file) if file.suffix == ".csv" else sc.read_h5ad(file) - input_values = data.iloc[:, 0].tolist() if isinstance(data, pd.DataFrame) else data.var_names.tolist() - conversion = await convert_gene_data(input_values, taxon_id) - genes.update(conversion["entrez_gene_id"].astype(str).tolist()) + + data: pd.DataFrame | sc.AnnData = ( + (pd.read_csv(counts_matrix) if counts_matrix.suffix == ".csv" else sc.read_h5ad(counts_matrix)) + if isinstance(counts_matrix, Path) + else counts_matrix + ) + + input_values = data.iloc[:, 0].tolist() if isinstance(data, pd.DataFrame) else data.var_names.tolist() + conversion = await convert_gene_data(input_values, taxon_id) + genes = conversion["entrez_gene_id"].astype(str).tolist() mygene = MyGene(cache=cache) gene_data = await mygene.query( - items=list(genes), + items=genes, taxon=taxon_id, scopes="entrezgene", ) @@ -521,123 +493,222 @@ async def _create_gene_info_file( ] gene_info["size"] = gene_info["end_position"].astype(int) - gene_info["start_position"].astype(int) gene_info.drop(columns=["start_position", "end_position"], inplace=True) - output_filepath = config.data_dir / "gene_info.csv" gene_info.to_csv(output_filepath, index=False) logger.success(f"Gene Info file written at '{output_filepath}'") -async def _handle_context_batch( # noqa: C901 +async def _create_matrix_file( + context_name: str, + taxon_id: type_taxon, + output_gene_info_filepath: Path, + output_config_filepath: Path, + output_gene_matrix_filepath: Path, + como_dirpath: type_path, + rna_type: type_rna, + cache: bool, +) -> None: + como_context_dir = como_dirpath / context_name + config_df = await _create_config_df(context_name, como_input_dir=como_dirpath) + counts_matrix = await _write_counts_matrix( + config_df=config_df, + como_context_dir=como_context_dir, + output_counts_matrix_filepath=output_gene_matrix_filepath, + rna_type=rna_type, + ) + with pd.ExcelWriter(output_config_filepath) as writer: + subset_config = config_df[config_df["library_prep"] == rna_type] + subset_config.to_excel(writer, sheet_name=context_name, header=True, index=False) + + await _create_gene_info_file( + counts_matrix=counts_matrix, + output_filepath=output_gene_info_filepath, + taxon_id=taxon_id, + cache=cache, + ) + + +async def _process_items( context_names: list[str], - mode: Literal["create", "provide"], - taxon_id, - provided_matrix_file, - config: Config, + mode: list[Literal["create", "provide"]], + taxon_id: list[str], + input_como_dirpath: list[Path] | None, + input_matrix_filepath: list[Path] | None, + output_gene_info_filepath: Path, + output_trna_config_filepath: Path | None, + output_mrna_config_filepath: Path | None, + output_trna_count_matrix: list[Path] | None, + output_mrna_count_matrix: list[Path] | None, cache: bool, ): - """Handle iteration through each context type and create appropriate files.""" - trnaseq_config_filename = config.config_dir / "trnaseq_data_inputs_auto.xlsx" - mrnaseq_config_filename = config.config_dir / "mrnaseq_data_inputs_auto.xlsx" - - using_trna = False # turn on when any total set is found to prevent writer from being init multiple times or empty - using_mrna = False # turn on when any mrna set is found to prevent writer from being init multiple times or empty - - logger.success(f"Found {len(context_names)} contexts to process: {', '.join(context_names)}") - - tmatrix_files: list[Path] = [] - mmatrix_files: list[Path] = [] - match mode: - case "create": - for context_name in context_names: - context_name = context_name.strip(" ") - logger.info(f"Processing {context_name}") - gene_output_dir = config.result_dir / context_name - matrix_output_dir = config.data_dir / "data_matrices" / context_name - - gene_output_dir.parent.mkdir(parents=True, exist_ok=True) - matrix_output_dir.parent.mkdir(parents=True, exist_ok=True) - - logger.info(f"Gene info output directory is '{gene_output_dir}'") - - await _create_counts_matrix(context_name, config=config) - # TODO: warn user or remove samples that are all 0 to prevent density plot error in zFPKM - config_df = await _create_config_df(context_name) - trna_df, mrna_df = _split_config_df(config_df) - - matrix_path_total = matrix_output_dir / f"gene_counts_matrix_total_{context_name}.csv" - if not trna_df.empty: - if not using_trna: - using_trna = True - twriter = pd.ExcelWriter(trnaseq_config_filename) - - tmatrix_files.append(matrix_path_total) - trna_df.to_excel(twriter, sheet_name=context_name, header=True, index=False) - - matrix_path_mrna = matrix_output_dir / f"gene_counts_matrix_mrna_{context_name}.csv" - if not mrna_df.empty: - if not using_mrna: - using_mrna = True - mwriter = pd.ExcelWriter(mrnaseq_config_filename) - - mmatrix_files.append(matrix_path_mrna) - mrna_df.to_excel(mwriter, sheet_name=context_name, header=True, index=False) - - matrix_path_all = matrix_output_dir / f"gene_counts_matrix_full_{context_name}.csv" - trna_matrix, mrna_matrix = _split_counts_matrices(matrix_path_all, trna_df, mrna_df) - if len(trna_matrix.columns) >= 1: - trna_matrix.to_csv(matrix_path_total, header=True, index=False) - if len(mrna_matrix.columns) >= 1: - mrna_matrix.to_csv(matrix_path_mrna, header=True, index=False) - - if using_trna: - twriter.close() - if using_mrna: - mwriter.close() - - await _create_gene_info_file( - matrix_files=tmatrix_files + mmatrix_files, - taxon_id=taxon_id, - config=config, - cache=cache, + tasks = [] + for i, m in enumerate(mode): + if m == "create" and output_trna_config_filepath: + tasks.append( + asyncio.create_task( + _create_matrix_file( + context_name=context_names[i], + taxon_id=taxon_id[i], + output_gene_matrix_filepath=output_trna_count_matrix[i], + como_dirpath=input_como_dirpath[i], + output_gene_info_filepath=output_gene_info_filepath, + output_config_filepath=output_trna_config_filepath, + rna_type="total", + cache=cache, + ) + ) ) - case "provide": - matrix_files: list[Path] = [Path(p) for p in stringlist_to_list(provided_matrix_file)] - await _create_gene_info_file( - matrix_files=matrix_files, - taxon_id=taxon_id, - config=config, - cache=cache, + + if m == "create" and output_mrna_config_filepath: + tasks.append( + asyncio.create_task( + _create_matrix_file( + context_name=context_names[i], + taxon_id=taxon_id[i], + output_gene_matrix_filepath=output_mrna_count_matrix[i], + como_dirpath=input_como_dirpath[i], + output_gene_info_filepath=output_gene_info_filepath, + output_config_filepath=output_mrna_config_filepath, + rna_type="mrna", + cache=cache, + ) + ) ) - case _: - raise ValueError("'--mode' must be either 'create' or 'provide'") + if m == "provide": + tasks.append( + asyncio.create_task( + _create_gene_info_file( + counts_matrix=input_matrix_filepath[i], + output_filepath=output_gene_info_filepath, + taxon_id=taxon_id[i], + cache=cache, + ) + ) + ) + await asyncio.gather(*tasks) -async def rnaseq_preprocess( - context_names: list[str], - mode: Literal["create", "provide"], - taxon_id: int | str, - input_format: Input | str, - matrix_file: str | Path | None = None, - config: Config = None, + +def _validate_matrix_output_args( + output_count_matrices_dirpath: list, + output_trna_count_matrix_filepath: list, + output_mrna_count_matrix_filepath: list, +): + def _raise(): + raise ValueError( + "output_count_matrices_dirpath OR " + "(output_trna_count_matrix_filepath AND output_mrna_count_matrix_filepath) can be provided" + ) + + # output_count_matrices_dir OR (output_trna_count_matrix AND output_mrna_count_matrix) can be provided + # Check this condition is satisfied + if output_count_matrices_dirpath and (output_trna_count_matrix_filepath or output_mrna_count_matrix_filepath): + _raise() + if output_trna_count_matrix_filepath and not output_mrna_count_matrix_filepath: + _raise() + if not output_trna_count_matrix_filepath and output_mrna_count_matrix_filepath: + _raise() + + +async def rnaseq_preprocess( # noqa: C901 + context_names: str | list[str], + mode: Literal["create", "provide"] | list[Literal["create", "provide"]], + taxon_id: type_taxon | list[type_taxon], + input_como_dirpath: type_path | list[type_path] | None = None, + input_matrix_filepath: type_path | list[type_path] | None = None, + output_gene_info_filepath: Path | None = None, + output_trna_config_filepath: Path | None = None, + output_mrna_config_filepath: Path | None = None, + output_count_matrices_dirpath: list[Path] | None = None, + output_trna_count_matrix: list[Path] | None = None, + output_mrna_count_matrix: list[Path] | None = None, cache: bool = True, ) -> None: """Preprocesses RNA-seq data for downstream analysis. Fetches additional gene information from a provided matrix or gene counts, or optionally creates this matrix using gene count files obtained using STAR aligner + + :param context_names: The context/cell type being processed + :param mode: The mode of operation + :param taxon_id: The NCBI taxonomy ID + :param output_gene_info_filepath: Path to the output gene information CSV file + :param output_trna_config_filepath: Path to the output tRNA config file (if in "create" mode) + :param output_mrna_config_filepath: Path to the output mRNA config file (if in "create" mode) + :param output_count_matrices_dirpath: The path to write all created count matrices + :param output_trna_count_matrix: The path to write total RNA count matrices + :param output_mrna_count_matrix: The path to write messenger RNA count matrices + :param input_como_dirpath: If in "create" mode, the input path(s) to the COMO_input directory of the current context + i.e., the directory containing "fragmentSizes", "geneCounts", "insertSizeMetrics", etc. directories + :param input_matrix_filepath: If in "provide" mode, the path(s) to the count matrices to be processed + :param cache: Should HTTP requests be cached """ - config = Config() if config is None else config - if mode not in {"create", "provide"}: - raise ValueError("mode must be either 'create' or 'provide'") + context_names = _listify(context_names) + mode = _listify(mode) + taxon_id = _listify(taxon_id) + output_count_matrices_dirpath: list[Path] = [Path(i) for i in _listify(output_count_matrices_dirpath)] if output_count_matrices_dirpath else [] # fmt: skip # noqa: E501 + output_trna_count_matrix: list[Path] = [Path(i) for i in _listify(output_trna_count_matrix)] if output_trna_count_matrix else [] # fmt: skip # noqa: E501 + output_mrna_count_matrix: list[Path] = [Path(i) for i in _listify(output_mrna_count_matrix)] if output_mrna_count_matrix else [] # fmt: skip # noqa: E501 + input_como_dirpath: list[Path] = [Path(i) for i in _listify(input_como_dirpath)] if input_como_dirpath else [] + input_matrix_filepath: list[Path] = ( + [Path(i) for i in _listify(input_matrix_filepath)] if input_matrix_filepath else [] + ) - if not isinstance(taxon_id, int) and taxon_id not in ["human", "mouse"]: - raise ValueError("taxon_id must be either an integer, or accepted string ('mouse', 'human')") + _validate_matrix_output_args( + output_count_matrices_dirpath=output_count_matrices_dirpath, + output_trna_count_matrix_filepath=output_trna_count_matrix, + output_mrna_count_matrix_filepath=output_mrna_count_matrix, + ) + + if len(input_como_dirpath) == 0 and len(input_matrix_filepath) == 0: + raise ValueError("Either 'como_input_dirpath' or 'input_matrix_filepath' must be provided.") + + if not any({output_trna_config_filepath, output_mrna_config_filepath}): + raise ValueError("Either 'output_trna_config_filepath' or 'output_mrna_config_filepath' must be provided.") + if output_trna_config_filepath and output_trna_config_filepath.suffix not in {".xlsx", ".xls"}: + raise ValueError("output_trna_config_filepath must be an Excel file.") + if output_mrna_config_filepath and output_mrna_config_filepath.suffix not in {".xlsx", ".xls"}: + raise ValueError("output_mrna_config_filepath must be an Excel file.") + + if not all(m in {"create", "provide"} for m in mode): + raise ValueError(f"Invalid mode(s): {', '.join(m for m in mode if m not in {'create', 'provide'})}") + + if not all(t.isdigit() or isinstance(t, int) or t in {"human", "mouse"} for t in taxon_id): + raise ValueError("Invalid taxon_id(s). Must be integer, 'human', or 'mouse'.") + + if not (len(context_names) == len(mode) == len(taxon_id) == len(input_como_dirpath or input_matrix_filepath)): + raise ValueError( + "context_names, mode, taxon_id, and (como or matrix) input must be the same length.\n" + f"context_names: {len(context_names)}\n" + f"mode: {len(mode)}\n" + f"taxon_id: {len(taxon_id)}\n" + f"como_input_dirpath or matrix_filepath: {len(input_como_dirpath or input_matrix_filepath)}" + ) - await _handle_context_batch( + for path in input_como_dirpath: + if not path.exists(): + raise ValueError(f"COMO input directory does not exist: {path}") + if not path.is_dir(): + raise ValueError(f"COMO input directory must be a directory: {path}") + + for path in input_matrix_filepath: + if not path.exists(): + raise ValueError(f"Input matrix file does not exist: {path}") + if path.suffix not in {".csv", ".h5ad"}: + raise ValueError(f"Input matrix file must be a .csv or .h5ad file: {path}") + if not path.is_file(): + raise ValueError(f"Input matrix file must be a file: {path}") + + await _process_items( context_names=context_names, mode=mode, taxon_id=taxon_id, - provided_matrix_file=Path(matrix_file if matrix_file is not None else "").as_posix(), - config=config, + output_gene_info_filepath=output_gene_info_filepath, + output_trna_config_filepath=output_trna_config_filepath, + output_mrna_config_filepath=output_mrna_config_filepath, + output_trna_count_matrix=output_trna_count_matrix, + output_mrna_count_matrix=output_mrna_count_matrix, + input_como_dirpath=input_como_dirpath, + input_matrix_filepath=input_matrix_filepath, cache=cache, ) @@ -645,97 +716,120 @@ async def rnaseq_preprocess( def _parse_args(): parser = argparse.ArgumentParser( prog="rnaseq_preprocess.py", - description=""" - Fetches additional gene information from a provided matrix or gene counts, or optionally creates this - matrix using gene count files obtained using STAR aligner. Creation of counts matrix from STAR aligner - output requires that the 'COMO_input' folder exists and is correctly structured according to the - normalization technique being used. A correctly structured folder can be made using our Snakemake-based - alignment pipeline at: - https://github.com/HelikarLab/FastqToGeneCounts""", - epilog=""" - For additional help, please post questions/issues in the MADRID GitHub repo at - https://github.com/HelikarLab/MADRID or email babessell@gmail.com""", + description="Fetches additional gene information from a provided matrix or gene counts, " + "or optionally creates this matrix using gene count files obtained using STAR aligner. " + "Creation of counts matrix from STAR aligner output requires that the 'COMO_input' " + "folder exists and is correctly structured according to the normalization technique being used. " + "A correctly structured folder can be made using our Snakemake-based alignment pipeline at:" + "https://github.com/HelikarLab/FastqToGeneCounts", + epilog="For additional help, please post questions/issues in the MADRID GitHub repo at" + "https://github.com/HelikarLab/COMO", ) parser.add_argument( - "-n", "--context-names", + required=True, type=str, - nargs="+", + nargs="*", + help="Tissue/cell name of models to generate. These names should correspond to the folders" + "in 'COMO_input/' if creating count matrix files, or to" + "'work/data/data_matrices//gene_counts_matrix_.csv' if supplying" + "the count matrix as an imported .csv file. If making multiple models in a batch, then" + "use the format: 'context1 context2 context3'", + ) + parser.add_argument( + "--mode", + type=str, + nargs="*", required=True, - dest="context_names", - help="""Tissue/cell name of models to generate. These names should correspond to the folders - in 'COMO_input/' if creating count matrix files, or to - 'work/data/data_matrices//gene_counts_matrix_.csv' if supplying - the count matrix as an imported .csv file. If making multiple models in a batch, then - use the format: "context1 context2 context3". """, + help="Mode of rnaseq_preprocess.py, either 'create' or 'provide'", ) parser.add_argument( - "-i", "--taxon-id", required=False, - default=9606, - dest="taxon_id", + nargs="*", + type=str, + default="9606", help="BioDbNet taxon ID number, also accepts 'human', or 'mouse'", ) parser.add_argument( - "--input-format", - required=True, - dest="input_format", - help="The data input format, such as Ensembl, Entrez, etc.", + "--output-gene-info-filepath", + required=False, + type=Path, + help="The location to write gene information", ) parser.add_argument( - "--mode", + "--output-count-matrices-dir", + required=False, type=str, - required=True, - dest="mode", - choices={"create", "provide"}, - help="Mode of rnaseq_preprocess.py, either 'make' or 'provide'", + help="All count matrix files can be placed in a single directory " + "if they should not be saved to specific locations", + ) + parser.add_argument( + "--output-trna-count-matrix", + required=False, + type=str, + help="The location to save total RNA count matrices", + ) + parser.add_argument( + "--output-mrna-count-matrix", + required=False, + type=str, + help="The location to save messenger RNA count matrices", ) parser.add_argument( - "--matrix", + "--output-trna-config-filepath", required=False, - dest="provided_matrix_fname", default=None, - help="Name of provided counts matrix in /work/data/data_matrices//.csv", + type=Path, + help="The location to save TRNA config file", ) - - parsed = parser.parse_args() - parsed.context_names = stringlist_to_list(parsed.context_names) - - if parsed.mode == "provide" and parsed.provided_matrix_fname is None: - raise ValueError("If provide_matrix is True, then provided_matrix_fname must be provided") - - # handle species alternative ids - taxon_id = str(parsed.taxon_id) - if taxon_id.isdigit(): - try: - parsed.taxon_id = Taxon.from_int(int(taxon_id)) - except ValueError: - parsed.taxon_id = int(taxon_id) - else: - if taxon_id.upper() in {"HUMAN", "HOMO SAPIENS"}: - parsed.taxon_id = Taxon.HOMO_SAPIENS - elif taxon_id.upper() in {"MOUSE", "MUS MUSCULUS"}: - parsed.taxon_id = Taxon.MUS_MUSCULUS - else: - raise ValueError( - f"Taxon id (--taxon-id) is invalid; accepts 'human', 'mouse', or an integer value; provided: {taxon_id}" - ) - - args = _Arguments(**vars(parsed)) - return args + parser.add_argument( + "--output-mrna-config-filepath", + required=False, + default=None, + type=Path, + help="The location to save MRNA config file", + ) + parser.add_argument( + "--input-como-dirpath", + nargs="*", + required=False, + default=None, + type=str, + help="Path to COMO input directory", + ) + parser.add_argument( + "--input-matrix-filepath", + required=False, + nargs="*", + default=None, + type=str, + help="Path to input matrix file", + ) + parser.add_argument( + "--cache", + required=False, + type=bool, + default=True, + help="Cache files for faster processing", + ) + return _Arguments(**vars(parser.parse_args())) if __name__ == "__main__": args: _Arguments = _parse_args() - taxon_id_value = args.taxon_id.value if isinstance(args.taxon_id, Taxon) else args.taxon_id - asyncio.run( rnaseq_preprocess( context_names=args.context_names, mode=args.mode, - taxon_id=taxon_id_value, - matrix_file=args.provided_matrix_fname, - input_format=args.input_format, + taxon_id=args.taxon_id, + output_gene_info_filepath=args.output_gene_info_filepath, + output_trna_count_matrix=args.output_trna_count_matrix, + output_mrna_count_matrix=args.output_mrna_count_matrix, + output_trna_config_filepath=args.output_trna_config_filepath, + output_mrna_config_filepath=args.output_mrna_config_filepath, + input_como_dirpath=args.input_como_dirpath, + input_matrix_filepath=args.input_matrix_filepath, + cache=args.cache, ) ) diff --git a/main/como/utils.py b/main/como/utils.py index 9668a34d..a359de0f 100644 --- a/main/como/utils.py +++ b/main/como/utils.py @@ -115,32 +115,32 @@ def stringlist_to_list(stringlist: str | list[str]) -> list[str]: :param stringlist: The "string list" gathered from the command line. Example input: "['mat', 'xml', 'json']" """ - if isinstance(stringlist, str): - if stringlist.startswith("[") and stringlist.endswith("]"): - # Remove any brackets from the first and last items; replace quotation marks and commas with nothing - new_list: list[str] = stringlist.strip("[]").replace("'", "").replace(" ", "").split(",") - - # Show a warning if more than one item is present in the list (this means we are using the old method) - logger.critical( - "DeprecationWarning: Please use the new method of providing context names, " - "i.e. --output-filetypes 'type1 type2 type3'." - ) - logger.critical( - "If you are using COMO, this can be done by setting the 'context_names' variable to a " - "simple string separated by spaces. Here are a few examples!" - ) - logger.critical("context_names = 'cellType1 cellType2 cellType3'") - logger.critical("output_filetypes = 'output1 output2 output3'") - logger.critical( - "\nYour current method of passing context names will be removed in the future. " - "Update your variables above accordingly!\n\n" - ) - - else: - new_list: list[str] = stringlist.split(" ") - - return new_list - return stringlist + if isinstance(stringlist, list): + return stringlist + + if not (stringlist.startswith("[") and stringlist.endswith("]")): + return stringlist.split(" ") + + # Remove any brackets from the first and last items; replace quotation marks and commas with nothing + new_list: list[str] = stringlist.strip("[]").replace("'", "").replace(" ", "").split(",") + + # Show a warning if more than one item is present in the list (this means we are using the old method) + logger.critical( + "DeprecationWarning: Please use the new method of providing context names, " + "i.e. --output-filetypes 'type1 type2 type3'." + ) + logger.critical( + "If you are using COMO, this can be done by setting the 'context_names' variable to a " + "simple string separated by spaces. Here are a few examples!" + ) + logger.critical("context_names = 'cellType1 cellType2 cellType3'") + logger.critical("output_filetypes = 'output1 output2 output3'") + logger.critical( + "\nYour current method of passing context names will be removed in the future. " + "Update your variables above accordingly!\n\n" + ) + + return new_list def split_gene_expression_data(expression_data: pd.DataFrame, recon_algorithm: Algorithm | None = None): @@ -258,3 +258,8 @@ async def convert_gene_data(values: list[str], taxon_id: int | str | Taxon) -> p return await gene_id_to_ensembl_and_gene_symbol(ids=values, taxon=taxon_id) else: raise ValueError("Gene data must be of the same type (i.e., all Ensembl, Entrez, or Gene Symbols)") + + +def _listify(value): + """Convert items into a list.""" + return [value] if not isinstance(value, list) else value From 90c43f35e3ed5dbaf1a67f378d33e6f6f7db3131 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:18:59 +0000 Subject: [PATCH 02/91] chore(deps): bump astral-sh/setup-uv from 3 to 4 Bumps [astral-sh/setup-uv](https://github.com/astral-sh/setup-uv) from 3 to 4. - [Release notes](https://github.com/astral-sh/setup-uv/releases) - [Commits](https://github.com/astral-sh/setup-uv/compare/v3...v4) --- updated-dependencies: - dependency-name: astral-sh/setup-uv dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/continuous_integration.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml index 8109f213..7bd2c871 100644 --- a/.github/workflows/continuous_integration.yml +++ b/.github/workflows/continuous_integration.yml @@ -14,7 +14,7 @@ jobs: uses: actions/checkout@v4 - name: Install uv - uses: astral-sh/setup-uv@v3 + uses: astral-sh/setup-uv@v4 - name: Create Virtual Environment run: uv venv From bfcafa3cba8815ab932266148bb5bf3a5e509cb0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:19:03 +0000 Subject: [PATCH 03/91] chore(deps): bump astral-sh/ruff-action from 1 to 2 Bumps [astral-sh/ruff-action](https://github.com/astral-sh/ruff-action) from 1 to 2. - [Release notes](https://github.com/astral-sh/ruff-action/releases) - [Changelog](https://github.com/astral-sh/ruff-action/blob/main/release-drafter.yml) - [Commits](https://github.com/astral-sh/ruff-action/compare/v1...v2) --- updated-dependencies: - dependency-name: astral-sh/ruff-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/continuous_integration.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml index 8109f213..869f587d 100644 --- a/.github/workflows/continuous_integration.yml +++ b/.github/workflows/continuous_integration.yml @@ -26,17 +26,17 @@ jobs: run: uv run jupyter nbconvert --clear-output --inplace "main/COMO.ipynb" - name: Format Python Imports - uses: astral-sh/ruff-action@v1 + uses: astral-sh/ruff-action@v2 with: args: "check --fix --select I" - name: Format code - uses: astral-sh/ruff-action@v1 + uses: astral-sh/ruff-action@v2 with: args: "format" - name: Format Notebook - uses: astral-sh/ruff-action@v1 + uses: astral-sh/ruff-action@v2 with: args: "format main/COMO.ipynb" @@ -54,7 +54,7 @@ jobs: uses: actions/checkout@v4 - name: Check Lint - uses: astral-sh/ruff-action@v1 + uses: astral-sh/ruff-action@v2 with: args: "check --no-fix --verbose" From 3e81a3de65cd40d2db137b95c4275e7da14fc9f4 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Fri, 6 Dec 2024 23:14:57 -0600 Subject: [PATCH 04/91] refactor: allow formatting of jupyter notebooks Signed-off-by: Josh Loecker --- ruff.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ruff.toml b/ruff.toml index 9591bda3..455091e3 100644 --- a/ruff.toml +++ b/ruff.toml @@ -1,6 +1,6 @@ line-length = 120 -extend-include = ["docs/**/*.py", "tests/**/*.py"] -exclude = ["__init__.py", "main/COMO.ipynb"] +extend-include = ["docs/**/*.py", "tests/**/*.py", "**/*.ipynb"] +exclude = ["__init__.py"] [format] quote-style = "double" From 476ca849247a1eb2b954dff0f2bea39ed4033286 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Fri, 6 Dec 2024 23:37:01 -0600 Subject: [PATCH 05/91] refactor: rename --- main/como/custom_types.py | 32 ------------------------- main/como/types.py | 49 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 32 deletions(-) delete mode 100644 main/como/custom_types.py create mode 100644 main/como/types.py diff --git a/main/como/custom_types.py b/main/como/custom_types.py deleted file mode 100644 index 0f464203..00000000 --- a/main/como/custom_types.py +++ /dev/null @@ -1,32 +0,0 @@ -from __future__ import annotations - -from enum import Enum -from pathlib import Path - -from fast_bioservices import Taxon - - -class RNASeqPreparationMethod(Enum): - TOTAL = "total" - MRNA = "mrna" - SCRNA = "scrna" - - @staticmethod - def from_string(value: str) -> RNASeqPreparationMethod: - """Build a preparation method object from a string.""" - match_value = "".join(c for c in value if c.isascii()).lower() - - match match_value: - case "total" | "trna": - return RNASeqPreparationMethod.TOTAL - case "mrna": - return RNASeqPreparationMethod.MRNA - case "scrna": - return RNASeqPreparationMethod.SCRNA - case _: - possible_values = [t.value for t in RNASeqPreparationMethod] - raise ValueError(f"Filtering technique must be one of {possible_values}; got: {value}") - - -type_taxon = Taxon | int | str -type_path = str | Path diff --git a/main/como/types.py b/main/como/types.py new file mode 100644 index 00000000..8e657f7f --- /dev/null +++ b/main/como/types.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from abc import ABC +from enum import Enum +from pathlib import Path + +from pydantic import BaseModel, ConfigDict +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class _BaseModel(BaseModel, ABC): + model_config = ConfigDict() + + +class _BaseArguments(BaseSettings, ABC): + model_config = SettingsConfigDict( + cli_parse_args=True, + cli_kebab_case=True, + nested_model_default_partial_update=True, + case_sensitive=True, + cli_avoid_json=True, + cli_enforce_required=True, + ) + + +class RNAPrepMethod(Enum): + TOTAL = "total" + MRNA = "mrna" + SCRNA = "scrna" + + @staticmethod + def from_string(value: str) -> RNAPrepMethod: + """Build a preparation method object from a string.""" + match_value = "".join(c for c in value if c.isascii()).lower() + + match match_value: + case "total" | "trna": + return RNAPrepMethod.TOTAL + case "mrna": + return RNAPrepMethod.MRNA + case "scrna": + return RNAPrepMethod.SCRNA + case _: + possible_values = [t.value for t in RNAPrepMethod] + raise ValueError(f"Filtering technique must be one of {possible_values}; got: {value}") + + +type_path = str | Path +type_rna = Literal["total", "polya"] From 97160e705f34de1801ede4d147acdad2ee730a49 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Sat, 7 Dec 2024 00:28:42 -0600 Subject: [PATCH 06/91] refactor: import required libraries --- main/como/rnaseq_preprocess.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index ee02948c..67092955 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -1,23 +1,23 @@ from __future__ import annotations -import argparse import asyncio +import contextlib import re +import sys from dataclasses import dataclass, field -from io import StringIO +from io import StringIO, TextIOWrapper +from itertools import chain from pathlib import Path -from typing import Literal, NamedTuple +from typing import Literal import aiofiles import numpy as np import pandas as pd import scanpy as sc from fast_bioservices.biothings.mygene import MyGene +from fast_bioservices.pipeline import ensembl_to_gene_id_and_symbol, gene_symbol_to_ensembl_and_gene_id from loguru import logger -from como.custom_types import type_path, type_taxon -from como.utils import _listify, convert_gene_data - type_rna = Literal["total", "mrna"] @@ -34,6 +34,7 @@ class _Arguments(NamedTuple): output_trna_count_matrix: list[Path] | None output_mrna_count_matrix: list[Path] | None cache: bool +from como.types import RNAPrepMethod, type_path, type_rna @dataclass From 0cacd94c29d523af2dbc6e7b0d26646e606fb020 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Sat, 7 Dec 2024 00:29:26 -0600 Subject: [PATCH 07/91] refactor: remove unused arguments dataclass --- main/como/rnaseq_preprocess.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index 67092955..43ddc42c 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -18,23 +18,8 @@ from fast_bioservices.pipeline import ensembl_to_gene_id_and_symbol, gene_symbol_to_ensembl_and_gene_id from loguru import logger -type_rna = Literal["total", "mrna"] - - -class _Arguments(NamedTuple): - context_names: list[str] - mode: list[Literal["create", "provide"]] - taxon_id: list[str] - input_como_dirpath: list[Path] | None - input_matrix_filepath: list[Path] | None - output_gene_info_filepath: Path | None - output_count_matrices_dir: list[Path] | None - output_trna_config_filepath: Path | None - output_mrna_config_filepath: Path | None - output_trna_count_matrix: list[Path] | None - output_mrna_count_matrix: list[Path] | None - cache: bool from como.types import RNAPrepMethod, type_path, type_rna +from como.utils import _listify @dataclass From 9b45c1d5925d0acff0cf62c5c89a826b9aff44ac Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Sat, 7 Dec 2024 00:51:20 -0600 Subject: [PATCH 08/91] refactor: remove unused cli argument parsing Signed-off-by: Josh Loecker --- main/como/rnaseq_preprocess.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index 43ddc42c..f4231d5b 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -32,7 +32,7 @@ class _STARinformation: count_matrix: pd.DataFrame @property - def num_genes(self): + def num_genes(self) -> int: return len(self.count_matrix) @classmethod @@ -120,8 +120,8 @@ def _sample_name_from_filepath(file: Path) -> str: def _organize_gene_counts_files(data_dir: Path) -> list[_StudyMetrics]: - gene_count_dir = Path(data_dir, "geneCounts").resolve() - strand_dir = Path(data_dir, "strandedness").resolve() + gene_count_dir = Path(data_dir, "geneCounts") + strand_dir = Path(data_dir, "strandedness") gene_counts_directories: list[Path] = sorted([p for p in gene_count_dir.glob("*") if not p.name.startswith(".")]) strandedness_directories: list[Path] = sorted([p for p in strand_dir.glob("*") if not p.name.startswith(".")]) From 31f59a7039c97a4cf9260647e8bd8fde859fd83e Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Sat, 7 Dec 2024 00:54:13 -0600 Subject: [PATCH 09/91] fix: get correct column name Signed-off-by: Josh Loecker --- main/como/rnaseq_preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index f4231d5b..687af089 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -166,7 +166,7 @@ async def _process_first_multirun_sample(strand_file: Path, all_counts_files: li if strand_information == "none": strand_information = "unstranded_rna_counts" - run_counts = star_information.count_matrix[["gene_id", strand_information]] + run_counts = star_information.count_matrix[["ensembl_gene_id", strand_information]] run_counts.columns = pd.Index(["ensembl_gene_id", "counts"]) if sample_count.empty: sample_count = run_counts From 8462ab7d00a5295d2091e273425d66a446d9f7ab Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Sat, 7 Dec 2024 00:54:38 -0600 Subject: [PATCH 10/91] refactor: pythonic approach to merging dataframes Signed-off-by: Josh Loecker --- main/como/rnaseq_preprocess.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index 687af089..87417020 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -168,11 +168,11 @@ async def _process_first_multirun_sample(strand_file: Path, all_counts_files: li run_counts = star_information.count_matrix[["ensembl_gene_id", strand_information]] run_counts.columns = pd.Index(["ensembl_gene_id", "counts"]) - if sample_count.empty: - sample_count = run_counts - else: - # Merge to take all items from both data frames - sample_count = sample_count.merge(run_counts, on="ensembl_gene_id", how="outer") + sample_count = ( + run_counts + if sample_count.empty + else sample_count.merge(run_counts, on=["ensembl_gene_id", "counts"], how="outer") + ) # Set na values to 0 sample_count = sample_count.fillna(value="0") From caea939355a1391d12b102def9e8834f0a07e343 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Sat, 7 Dec 2024 01:02:33 -0600 Subject: [PATCH 11/91] style: variable renaming Signed-off-by: Josh Loecker --- main/como/merge_xomics.py | 10 +++++----- main/como/rnaseq_preprocess.py | 6 +++--- main/como/types.py | 1 + 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/main/como/merge_xomics.py b/main/como/merge_xomics.py index 7079d456..3111ec66 100644 --- a/main/como/merge_xomics.py +++ b/main/como/merge_xomics.py @@ -16,7 +16,7 @@ from como import proteomics_gen, return_placeholder_data from como.combine_distributions import _combine_zscores -from como.custom_types import RNASeqPreparationMethod +from como.custom_types import RNAPrepMethod from como.project import Config from como.utils import split_gene_expression_data @@ -93,7 +93,7 @@ def __post_init__(self): raise ValueError("Adjust method must be either 'progressive', 'regressive', 'flat', or 'custom'") -def _load_rnaseq_tests(filename, context_name, prep_method: RNASeqPreparationMethod) -> tuple[str, pd.DataFrame]: +def _load_rnaseq_tests(filename, context_name, prep_method: RNAPrepMethod) -> tuple[str, pd.DataFrame]: """Load rnaseq results. Returns a dictionary of test (context, context, cell, etc ) names and rnaseq expression data @@ -112,11 +112,11 @@ def load_dummy_dict(): raise FileNotFoundError(f"Error: Config file not found at {inquiry_full_path}") match prep_method: - case RNASeqPreparationMethod.TOTAL: + case RNAPrepMethod.TOTAL: filename = f"rnaseq_total_{context_name}.csv" - case RNASeqPreparationMethod.MRNA: + case RNAPrepMethod.MRNA: filename = f"rnaseq_mrna_{context_name}.csv" - case RNASeqPreparationMethod.SCRNA: + case RNAPrepMethod.SCRNA: filename = f"rnaseq_scrna_{context_name}.csv" case _: raise ValueError( diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index 87417020..3d31e050 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -253,7 +253,7 @@ async def _write_counts_matrix( config_df: pd.DataFrame, como_context_dir: Path, output_counts_matrix_filepath: Path, - rna_type: type_rna, + rna: type_rna, ) -> pd.DataFrame: """Create a counts matrix file by reading gene counts table(s).""" study_metrics = _organize_gene_counts_files(data_dir=como_context_dir) @@ -264,12 +264,12 @@ async def _write_counts_matrix( for count in counts: final_matrix = count if final_matrix.empty else pd.merge(final_matrix, count, on="ensembl_gene_id", how="outer") - rna_specific_sample_names = config_df.loc[config_df["library_prep"] == rna_type, "sample_name"].tolist() + rna_specific_sample_names = config_df.loc[config_df["library_prep"] == rna, "sample_name"].tolist() final_matrix = final_matrix[["ensembl_gene_id", *rna_specific_sample_names]] output_counts_matrix_filepath.parent.mkdir(parents=True, exist_ok=True) final_matrix.to_csv(output_counts_matrix_filepath, index=False) - logger.success(f"Wrote gene count matrix at '{output_counts_matrix_filepath}'") + logger.success(f"Wrote gene count matrix for '{rna}' RNA at '{output_counts_matrix_filepath}'") return final_matrix diff --git a/main/como/types.py b/main/como/types.py index 8e657f7f..ebe44ecf 100644 --- a/main/como/types.py +++ b/main/como/types.py @@ -3,6 +3,7 @@ from abc import ABC from enum import Enum from pathlib import Path +from typing import Literal from pydantic import BaseModel, ConfigDict from pydantic_settings import BaseSettings, SettingsConfigDict From 43e1a2e0202c4d923c2c96adf4198b0d4640a68e Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Sat, 7 Dec 2024 01:02:47 -0600 Subject: [PATCH 12/91] style: ruff formatting Signed-off-by: Josh Loecker --- main/como/merge_xomics.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/main/como/merge_xomics.py b/main/como/merge_xomics.py index 3111ec66..8dc2a6fd 100644 --- a/main/como/merge_xomics.py +++ b/main/como/merge_xomics.py @@ -344,15 +344,9 @@ async def _merge_xomics( config = Config() logger.info(f"Merging data for {context_name}") # load data for each source if it exists. IF not load an empty dummy dataset - trnaseq = _load_rnaseq_tests( - filename=trnaseq_file, context_name=context_name, prep_method=RNASeqPreparationMethod.TOTAL - ) - mrnaseq = _load_rnaseq_tests( - filename=mrnaseq_file, context_name=context_name, prep_method=RNASeqPreparationMethod.MRNA - ) - scrnaseq = _load_rnaseq_tests( - filename=scrnaseq_file, context_name=context_name, prep_method=RNASeqPreparationMethod.SCRNA - ) + trnaseq = _load_rnaseq_tests(filename=trnaseq_file, context_name=context_name, prep_method=RNAPrepMethod.TOTAL) + mrnaseq = _load_rnaseq_tests(filename=mrnaseq_file, context_name=context_name, prep_method=RNAPrepMethod.MRNA) + scrnaseq = _load_rnaseq_tests(filename=scrnaseq_file, context_name=context_name, prep_method=RNAPrepMethod.SCRNA) proteomics = proteomics_gen.load_proteomics_tests(filename=proteomics_file, context_name=context_name) expression_list = [] From 963d4816cdf791d4bc14e98be983c6e2ff753283 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Sat, 7 Dec 2024 01:46:32 -0600 Subject: [PATCH 13/91] =?UTF-8?q?=E2=9C=A8=20feat:=20added=20gitmoji?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 16d682c0..d72921b9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,8 +1,11 @@ -# See https://pre-commit.com for more information -# See https://pre-commit.com/hooks.html for more hooks repos: -- repo: https://github.com/opensource-nepal/commitlint - rev: v1.2.0 + - repo: https://github.com/ljnsn/cz-conventional-gitmoji + rev: v0.6.1 hooks: - - id: commitlint - name: Commit Lint + - id: conventional-gitmoji +# - repo: https://github.com/commitizen-tools/commitizen +# rev: v3.29.0 +# hooks: +# - id: commitizen +# additional_dependencies: [cz-conventional-gitmoji] +# stages: [commit-msg] From c389fd9e8531737afa70e059b3c65c0b0aa56605 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Sat, 7 Dec 2024 01:48:52 -0600 Subject: [PATCH 14/91] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20remove?= =?UTF-8?q?=20commitizen?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d72921b9..8a1635ef 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,9 +3,3 @@ repos: rev: v0.6.1 hooks: - id: conventional-gitmoji -# - repo: https://github.com/commitizen-tools/commitizen -# rev: v3.29.0 -# hooks: -# - id: commitizen -# additional_dependencies: [cz-conventional-gitmoji] -# stages: [commit-msg] From 993b4a8651e25adb5e750122fe6a1a1b3a76b662 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Sat, 7 Dec 2024 01:52:00 -0600 Subject: [PATCH 15/91] =?UTF-8?q?=F0=9F=8E=A8=20style:=20change=20variable?= =?UTF-8?q?=20names?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main/como/rnaseq.py | 10 +++++----- main/como/rnaseq_gen.py | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/main/como/rnaseq.py b/main/como/rnaseq.py index 1c504d97..d67658e6 100644 --- a/main/como/rnaseq.py +++ b/main/como/rnaseq.py @@ -27,7 +27,7 @@ from scipy.signal import find_peaks from sklearn.neighbors import KernelDensity -from como.custom_types import RNASeqPreparationMethod +from como.custom_types import RNAPrepMethod from como.migrations import gene_info_migrations from como.project import Config from como.utils import convert_gene_data @@ -528,7 +528,7 @@ def cpm_filter( context_name: str, metrics: NamedMetrics, filtering_options: _FilteringOptions, - prep: RNASeqPreparationMethod, + prep: RNAPrepMethod, ) -> NamedMetrics: """Apply Counts Per Million (CPM) filtering to the count matrix for a given sample.""" config = Config() @@ -660,7 +660,7 @@ def filter_counts( metrics: NamedMetrics, technique: FilteringTechnique, filtering_options: _FilteringOptions, - prep: RNASeqPreparationMethod, + prep: RNAPrepMethod, ) -> NamedMetrics: """Filter the count matrix based on the specified technique.""" match technique: @@ -684,7 +684,7 @@ async def save_rnaseq_tests( config_filepath: Path, gene_info_filepath: Path, output_filepath: Path, - prep: RNASeqPreparationMethod, + prep: RNAPrepMethod, taxon_id: Taxon, replicate_ratio: float, batch_ratio: float, @@ -702,7 +702,7 @@ async def save_rnaseq_tests( high_batch_ratio=high_batch_ratio, ) - if prep == RNASeqPreparationMethod.SCRNA: + if prep == RNAPrepMethod.SCRNA: technique = FilteringTechnique.umi logger.warning( "Single cell filtration does not normalize and assumes " diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 8f8ffa34..47aeb8ee 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -9,7 +9,7 @@ from loguru import logger from como import Config -from como.custom_types import RNASeqPreparationMethod +from como.custom_types import RNAPrepMethod from como.rnaseq import FilteringTechnique, save_rnaseq_tests @@ -22,11 +22,11 @@ class _Arguments: high_batch_ratio: float filtering_technique: FilteringTechnique minimum_cutoff: int | str - library_prep: RNASeqPreparationMethod + library_prep: RNAPrepMethod taxon: Taxon def __post_init__(self): - self.library_prep = RNASeqPreparationMethod.from_string(str(self.library_prep)) + self.library_prep = RNAPrepMethod.from_string(str(self.library_prep)) self.filtering_technique = FilteringTechnique.from_string(str(self.filtering_technique)) if self.minimum_cutoff is None: @@ -46,7 +46,7 @@ async def _handle_context_batch( batch_ratio_high: float, technique: FilteringTechnique, cut_off: int | float | str, - prep: RNASeqPreparationMethod, + prep: RNAPrepMethod, taxon: Taxon, ) -> None: """Iterate through each context type and create rnaseq expression file. @@ -81,9 +81,9 @@ async def _handle_context_batch( rnaseq_input_filepath = ( config.data_dir / "data_matrices" / context_name / f"gene_counts_matrix_{prep.value}_{context_name}" ) - if prep == RNASeqPreparationMethod.SCRNA: + if prep == RNAPrepMethod.SCRNA: rnaseq_input_filepath = rnaseq_input_filepath.with_suffix(".h5ad") - elif prep in {RNASeqPreparationMethod.TOTAL, RNASeqPreparationMethod.MRNA}: + elif prep in {RNAPrepMethod.TOTAL, RNAPrepMethod.MRNA}: rnaseq_input_filepath = rnaseq_input_filepath.with_suffix(".csv") if not rnaseq_input_filepath.exists(): @@ -117,7 +117,7 @@ async def _handle_context_batch( async def rnaseq_gen( # config_filepath: Path, config_filename: str, - prep: RNASeqPreparationMethod, + prep: RNAPrepMethod, taxon_id: int | str | Taxon, replicate_ratio: float = 0.5, high_replicate_ratio: float = 1.0, From c4f53bc75a83392fb4f76a58b49771bc13b95fd7 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Sat, 7 Dec 2024 01:52:37 -0600 Subject: [PATCH 16/91] =?UTF-8?q?=F0=9F=8E=A8=20style:=20sort=20dependenci?= =?UTF-8?q?es?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2c6d42c5..8e271ec0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,22 +3,24 @@ name = "COMO" dynamic = ["version"] requires-python = ">=3.10,<3.13" dependencies = [ + "aiofiles>=24.1.0", + "aioftp>=0.23.1", + "cobamp", "cobra>=0.28.0", "fast-bioservices>=0.3.9", "gurobipy>=11.0", "kaleido==0.2.1", "loguru>=0.7.2", + "openpyxl>=3.1.5", "pandas>=1.3.5", "plotly>=5.24.1", + "pydantic-settings", + "pydantic>=2.10.3", "scanpy>=1.9.8", - "scipy>=1.7.3", "scikit-learn>=1.5.2", + "scipy>=1.7.3", "setuptools<60.0", - "openpyxl>=3.1.5", - "aiofiles>=24.1.0", - "aioftp>=0.23.1", "troppo", - "cobamp", ] [build-system] From 9dd5de6aef8487326249d4ea550d85c4102010eb Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Sat, 7 Dec 2024 01:54:00 -0600 Subject: [PATCH 17/91] =?UTF-8?q?=E2=9E=95=20dep-add:=20add=20interactive?= =?UTF-8?q?=20optional=20dependencies?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 8e271ec0..4519ce67 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,12 @@ dependencies = [ "troppo", ] +[project.optional-dependencies] +interactive = [ + "ipython>=8.0.0", + "jupyterlab>=4.3.2", +] + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" From ba0c445003e69e345f9f1a1504ddaec36c6f0369 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Sat, 7 Dec 2024 01:54:33 -0600 Subject: [PATCH 18/91] =?UTF-8?q?=E2=9E=95=20dep-add:=20add=20commitizen?= =?UTF-8?q?=20and=20conventional=20gitmoji?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 4519ce67..859ddcbe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,8 +53,12 @@ dev-dependencies = [ "ruff>=0.8.0", "hypothesis>=6.122.1", "pytest-cov>=6.0.0", + "commitizen>=4.1.0", + "cz-conventional-gitmoji>=0.6.1", ] [tool.uv.sources] troppo = { git = "https://github.com/JoshLoecker/troppo", rev = "update_dependencies" } cobamp = { git = "https://github.com/JoshLoecker/cobamp", rev = "update_packages" } +pydantic-settings = { git = "https://github.com/pydantic/pydantic-settings" } + From c7d0576e2138fea281c330d3ae7d994a012e79b4 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 10:48:43 -0600 Subject: [PATCH 19/91] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20accept?= =?UTF-8?q?=20file=20paths=20instead=20of=20dataframes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main/como/rnaseq_preprocess.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index 3d31e050..c6cfd271 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -427,7 +427,7 @@ async def _create_config_df(context_name: str, /, como_input_dir: Path) -> pd.Da async def _create_gene_info_file( *, - counts_matrix: pd.DataFrame | Path, + counts_matrix_filepaths: list[Path], output_filepath: Path, taxon_id: type_taxon, cache: bool, @@ -435,15 +435,17 @@ async def _create_gene_info_file( """Create gene info file for specified context by reading first column in its count matrix file.""" logger.info("Fetching gene info") - data: pd.DataFrame | sc.AnnData = ( - (pd.read_csv(counts_matrix) if counts_matrix.suffix == ".csv" else sc.read_h5ad(counts_matrix)) - if isinstance(counts_matrix, Path) - else counts_matrix - ) + async def read_counts(file: Path) -> list[str]: + data = await asyncio.to_thread(pd.read_csv if file.suffix == ".csv" else sc.read_h5ad, file) + conversion = ( + await ensembl_to_gene_id_and_symbol(ids=data["ensembl_gene_id"].tolist(), taxon=taxon) + if isinstance(data, pd.DataFrame) + else await gene_symbol_to_ensembl_and_gene_id(symbols=data.var_names.tolist(), taxon=taxon) + ) + return conversion["entrez_gene_id"].tolist() - input_values = data.iloc[:, 0].tolist() if isinstance(data, pd.DataFrame) else data.var_names.tolist() - conversion = await convert_gene_data(input_values, taxon_id) - genes = conversion["entrez_gene_id"].astype(str).tolist() + logger.info("Fetching gene info (this may take 1-5 minutes)") + genes = set(chain.from_iterable(await asyncio.gather(*[read_counts(f) for f in counts_matrix_filepaths]))) mygene = MyGene(cache=cache) gene_data = await mygene.query( From b18383a00bdbc915b8553e462bc31cba4cde9cfc Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 10:49:34 -0600 Subject: [PATCH 20/91] =?UTF-8?q?=F0=9F=93=9D=20docs:=20update=20documenta?= =?UTF-8?q?tion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main/como/rnaseq_preprocess.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index c6cfd271..58122de3 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -432,8 +432,10 @@ async def _create_gene_info_file( taxon_id: type_taxon, cache: bool, ): - """Create gene info file for specified context by reading first column in its count matrix file.""" - logger.info("Fetching gene info") + """Create a gene information file context. + + The gene information file will be created by reading each matrix filepath in the provided list + """ async def read_counts(file: Path) -> list[str]: data = await asyncio.to_thread(pd.read_csv if file.suffix == ".csv" else sc.read_h5ad, file) @@ -616,19 +618,20 @@ async def rnaseq_preprocess( # noqa: C901 Fetches additional gene information from a provided matrix or gene counts, or optionally creates this matrix using gene count files obtained using STAR aligner - :param context_names: The context/cell type being processed - :param mode: The mode of operation - :param taxon_id: The NCBI taxonomy ID + :param context_name: The context/cell type being processed + :param taxon: The NCBI taxonomy ID :param output_gene_info_filepath: Path to the output gene information CSV file :param output_trna_config_filepath: Path to the output tRNA config file (if in "create" mode) - :param output_mrna_config_filepath: Path to the output mRNA config file (if in "create" mode) - :param output_count_matrices_dirpath: The path to write all created count matrices - :param output_trna_count_matrix: The path to write total RNA count matrices - :param output_mrna_count_matrix: The path to write messenger RNA count matrices - :param input_como_dirpath: If in "create" mode, the input path(s) to the COMO_input directory of the current context + :param output_polya_config_filepath: Path to the output mRNA config file (if in "create" mode) + :param output_trna_count_matrix_filepath: The path to write total RNA count matrices + :param output_polya_count_matrix_filepath: The path to write messenger RNA count matrices + :param como_context_dir: If in "create" mode, the input path(s) to the COMO_input directory of the current context i.e., the directory containing "fragmentSizes", "geneCounts", "insertSizeMetrics", etc. directories :param input_matrix_filepath: If in "provide" mode, the path(s) to the count matrices to be processed + :param preparation_method: The preparation method :param cache: Should HTTP requests be cached + :param log_level: The logging level + :param log_location: The logging location """ context_names = _listify(context_names) mode = _listify(mode) From 90a6dac1e9058c3929ce96accad476dc252d8daf Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 10:50:18 -0600 Subject: [PATCH 21/91] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20accept?= =?UTF-8?q?=20integers=20only=20for=20taxon=20IDs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main/como/rnaseq_preprocess.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index 58122de3..48bdd3b3 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -450,11 +450,7 @@ async def read_counts(file: Path) -> list[str]: genes = set(chain.from_iterable(await asyncio.gather(*[read_counts(f) for f in counts_matrix_filepaths]))) mygene = MyGene(cache=cache) - gene_data = await mygene.query( - items=genes, - taxon=taxon_id, - scopes="entrezgene", - ) + gene_data = await mygene.query(items=list(genes), taxon=taxon, scopes="entrezgene") gene_info: pd.DataFrame = pd.DataFrame( data=None, columns=pd.Index(data=["ensembl_gene_id", "gene_symbol", "entrez_gene_id", "start_position", "end_position"]), From e321c3e1dc0cc7434b7437d17da6a7b638adca76 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 10:50:44 -0600 Subject: [PATCH 22/91] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20pythonic?= =?UTF-8?q?=20approach=20to=20calculating=20size?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main/como/rnaseq_preprocess.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index 48bdd3b3..e83ab0e8 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -462,11 +462,9 @@ async def read_counts(file: Path) -> list[str]: ensembl_ids = ensembl_ids[0] start_pos = data.get("genomic_pos.start", 0) + start_pos = sum(start_pos) / len(start_pos) if isinstance(start_pos, list) else start_pos end_pos = data.get("genomic_pos.end", 0) - if isinstance(start_pos, list): - start_pos = sum(start_pos) / len(start_pos) - if isinstance(end_pos, list): - end_pos = sum(end_pos) / len(end_pos) + end_pos = sum(end_pos) / len(end_pos) if isinstance(end_pos, list) else end_pos gene_info.at[i, "gene_symbol"] = data.get("symbol", "-") gene_info.at[i, "entrez_gene_id"] = data.get("entrezgene", "-") From e54309e260146c0919ad92d80a72715629765951 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 10:51:18 -0600 Subject: [PATCH 23/91] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20accept?= =?UTF-8?q?=20integer=20for=20taxon=20id?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main/como/rnaseq_preprocess.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index e83ab0e8..9df07cf4 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -429,7 +429,7 @@ async def _create_gene_info_file( *, counts_matrix_filepaths: list[Path], output_filepath: Path, - taxon_id: type_taxon, + taxon: int, cache: bool, ): """Create a gene information file context. @@ -473,10 +473,15 @@ async def read_counts(file: Path) -> list[str]: gene_info.at[i, "end_position"] = end_pos gene_info = gene_info[ - (gene_info["entrez_gene_id"] != "-") & (gene_info["ensembl_gene_id"] != "-") & (gene_info["gene_symbol"] != "-") + ( + (gene_info["entrez_gene_id"] != "-") + & (gene_info["ensembl_gene_id"] != "-") + & (gene_info["gene_symbol"] != "-") + ) ] gene_info["size"] = gene_info["end_position"].astype(int) - gene_info["start_position"].astype(int) gene_info.drop(columns=["start_position", "end_position"], inplace=True) + gene_info.sort_values(by="ensembl_gene_id", inplace=True) gene_info.to_csv(output_filepath, index=False) logger.success(f"Gene Info file written at '{output_filepath}'") From 6cc638f3b09e75667883b8fe0d925dffaaa2d3ae Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 10:52:11 -0600 Subject: [PATCH 24/91] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20more=20c?= =?UTF-8?q?oncise=20method=20of=20creating=20matrix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main/como/rnaseq_preprocess.py | 33 ++++++++------------------------- 1 file changed, 8 insertions(+), 25 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index 9df07cf4..ade8e4d7 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -488,40 +488,23 @@ async def read_counts(file: Path) -> list[str]: async def _create_matrix_file( context_name: str, - taxon_id: type_taxon, - output_gene_info_filepath: Path, output_config_filepath: Path, - output_gene_matrix_filepath: Path, - como_dirpath: type_path, - rna_type: type_rna, - cache: bool, + como_context_dir: type_path, + output_counts_matrix_filepath: Path, + rna: type_rna, ) -> None: - como_context_dir = como_dirpath / context_name - config_df = await _create_config_df(context_name, como_input_dir=como_dirpath) - counts_matrix = await _write_counts_matrix( + config_df = await _create_config_df(context_name, como_input_dir=como_context_dir) + await _write_counts_matrix( config_df=config_df, como_context_dir=como_context_dir, - output_counts_matrix_filepath=output_gene_matrix_filepath, - rna_type=rna_type, + output_counts_matrix_filepath=output_counts_matrix_filepath, + rna=rna, ) with pd.ExcelWriter(output_config_filepath) as writer: - subset_config = config_df[config_df["library_prep"] == rna_type] + subset_config = config_df[config_df["library_prep"] == rna] subset_config.to_excel(writer, sheet_name=context_name, header=True, index=False) - await _create_gene_info_file( - counts_matrix=counts_matrix, - output_filepath=output_gene_info_filepath, - taxon_id=taxon_id, - cache=cache, - ) - -async def _process_items( - context_names: list[str], - mode: list[Literal["create", "provide"]], - taxon_id: list[str], - input_como_dirpath: list[Path] | None, - input_matrix_filepath: list[Path] | None, output_gene_info_filepath: Path, output_trna_config_filepath: Path | None, output_mrna_config_filepath: Path | None, From a348df736ca81c6d566e30df071452b24c6bcc85 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 10:52:46 -0600 Subject: [PATCH 25/91] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20remove?= =?UTF-8?q?=20command=20line=20interface?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main/como/rnaseq_preprocess.py | 122 --------------------------------- 1 file changed, 122 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index ade8e4d7..47029316 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -684,125 +684,3 @@ async def rnaseq_preprocess( # noqa: C901 input_matrix_filepath=input_matrix_filepath, cache=cache, ) - - -def _parse_args(): - parser = argparse.ArgumentParser( - prog="rnaseq_preprocess.py", - description="Fetches additional gene information from a provided matrix or gene counts, " - "or optionally creates this matrix using gene count files obtained using STAR aligner. " - "Creation of counts matrix from STAR aligner output requires that the 'COMO_input' " - "folder exists and is correctly structured according to the normalization technique being used. " - "A correctly structured folder can be made using our Snakemake-based alignment pipeline at:" - "https://github.com/HelikarLab/FastqToGeneCounts", - epilog="For additional help, please post questions/issues in the MADRID GitHub repo at" - "https://github.com/HelikarLab/COMO", - ) - parser.add_argument( - "--context-names", - required=True, - type=str, - nargs="*", - help="Tissue/cell name of models to generate. These names should correspond to the folders" - "in 'COMO_input/' if creating count matrix files, or to" - "'work/data/data_matrices//gene_counts_matrix_.csv' if supplying" - "the count matrix as an imported .csv file. If making multiple models in a batch, then" - "use the format: 'context1 context2 context3'", - ) - parser.add_argument( - "--mode", - type=str, - nargs="*", - required=True, - help="Mode of rnaseq_preprocess.py, either 'create' or 'provide'", - ) - parser.add_argument( - "--taxon-id", - required=False, - nargs="*", - type=str, - default="9606", - help="BioDbNet taxon ID number, also accepts 'human', or 'mouse'", - ) - parser.add_argument( - "--output-gene-info-filepath", - required=False, - type=Path, - help="The location to write gene information", - ) - parser.add_argument( - "--output-count-matrices-dir", - required=False, - type=str, - help="All count matrix files can be placed in a single directory " - "if they should not be saved to specific locations", - ) - parser.add_argument( - "--output-trna-count-matrix", - required=False, - type=str, - help="The location to save total RNA count matrices", - ) - parser.add_argument( - "--output-mrna-count-matrix", - required=False, - type=str, - help="The location to save messenger RNA count matrices", - ) - parser.add_argument( - "--output-trna-config-filepath", - required=False, - default=None, - type=Path, - help="The location to save TRNA config file", - ) - parser.add_argument( - "--output-mrna-config-filepath", - required=False, - default=None, - type=Path, - help="The location to save MRNA config file", - ) - parser.add_argument( - "--input-como-dirpath", - nargs="*", - required=False, - default=None, - type=str, - help="Path to COMO input directory", - ) - parser.add_argument( - "--input-matrix-filepath", - required=False, - nargs="*", - default=None, - type=str, - help="Path to input matrix file", - ) - parser.add_argument( - "--cache", - required=False, - type=bool, - default=True, - help="Cache files for faster processing", - ) - return _Arguments(**vars(parser.parse_args())) - - -if __name__ == "__main__": - args: _Arguments = _parse_args() - asyncio.run( - rnaseq_preprocess( - context_names=args.context_names, - mode=args.mode, - taxon_id=args.taxon_id, - output_gene_info_filepath=args.output_gene_info_filepath, - output_trna_count_matrix=args.output_trna_count_matrix, - output_mrna_count_matrix=args.output_mrna_count_matrix, - output_trna_config_filepath=args.output_trna_config_filepath, - output_mrna_config_filepath=args.output_mrna_config_filepath, - input_como_dirpath=args.input_como_dirpath, - input_matrix_filepath=args.input_matrix_filepath, - cache=args.cache, - ) - ) From c26e12ecb34b56e18314f9f440f19c636922735b Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 10:53:27 -0600 Subject: [PATCH 26/91] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20define?= =?UTF-8?q?=20specific=20input/output=20file=20paths?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main/como/rnaseq_preprocess.py | 97 +++++++++++++--------------------- 1 file changed, 38 insertions(+), 59 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index 47029316..ae226f02 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -615,72 +615,51 @@ async def rnaseq_preprocess( # noqa: C901 :param log_level: The logging level :param log_location: The logging location """ - context_names = _listify(context_names) - mode = _listify(mode) - taxon_id = _listify(taxon_id) - output_count_matrices_dirpath: list[Path] = [Path(i) for i in _listify(output_count_matrices_dirpath)] if output_count_matrices_dirpath else [] # fmt: skip # noqa: E501 - output_trna_count_matrix: list[Path] = [Path(i) for i in _listify(output_trna_count_matrix)] if output_trna_count_matrix else [] # fmt: skip # noqa: E501 - output_mrna_count_matrix: list[Path] = [Path(i) for i in _listify(output_mrna_count_matrix)] if output_mrna_count_matrix else [] # fmt: skip # noqa: E501 - input_como_dirpath: list[Path] = [Path(i) for i in _listify(input_como_dirpath)] if input_como_dirpath else [] - input_matrix_filepath: list[Path] = ( - [Path(i) for i in _listify(input_matrix_filepath)] if input_matrix_filepath else [] - ) + with contextlib.suppress(ValueError): + logger.remove(0) + logger.add( + sink=log_location, + level=log_level, + format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {name}:{line} - {message}", # noqa: E501 + ) - _validate_matrix_output_args( - output_count_matrices_dirpath=output_count_matrices_dirpath, - output_trna_count_matrix_filepath=output_trna_count_matrix, - output_mrna_count_matrix_filepath=output_mrna_count_matrix, + output_gene_info_filepath = output_gene_info_filepath.resolve() + como_context_dir = como_context_dir.resolve() + input_matrix_filepath = [i.resolve() for i in _listify(input_matrix_filepath)] if input_matrix_filepath else None + output_trna_config_filepath = ( + output_trna_config_filepath.resolve() if output_trna_config_filepath else output_trna_config_filepath + ) + output_polya_config_filepath = ( + output_polya_config_filepath.resolve() if output_polya_config_filepath else output_polya_config_filepath + ) + output_trna_count_matrix_filepath = ( + output_trna_count_matrix_filepath.resolve() + if output_trna_count_matrix_filepath + else output_trna_count_matrix_filepath + ) + output_polya_count_matrix_filepath = ( + output_polya_count_matrix_filepath.resolve() + if output_polya_count_matrix_filepath + else output_polya_count_matrix_filepath ) - if len(input_como_dirpath) == 0 and len(input_matrix_filepath) == 0: - raise ValueError("Either 'como_input_dirpath' or 'input_matrix_filepath' must be provided.") - - if not any({output_trna_config_filepath, output_mrna_config_filepath}): - raise ValueError("Either 'output_trna_config_filepath' or 'output_mrna_config_filepath' must be provided.") - if output_trna_config_filepath and output_trna_config_filepath.suffix not in {".xlsx", ".xls"}: - raise ValueError("output_trna_config_filepath must be an Excel file.") - if output_mrna_config_filepath and output_mrna_config_filepath.suffix not in {".xlsx", ".xls"}: - raise ValueError("output_mrna_config_filepath must be an Excel file.") - - if not all(m in {"create", "provide"} for m in mode): - raise ValueError(f"Invalid mode(s): {', '.join(m for m in mode if m not in {'create', 'provide'})}") - - if not all(t.isdigit() or isinstance(t, int) or t in {"human", "mouse"} for t in taxon_id): - raise ValueError("Invalid taxon_id(s). Must be integer, 'human', or 'mouse'.") + input_matrix_filepath = _listify(input_matrix_filepath) + preparation_method = _listify(preparation_method) - if not (len(context_names) == len(mode) == len(taxon_id) == len(input_como_dirpath or input_matrix_filepath)): + if len(input_matrix_filepath) != len(preparation_method): raise ValueError( - "context_names, mode, taxon_id, and (como or matrix) input must be the same length.\n" - f"context_names: {len(context_names)}\n" - f"mode: {len(mode)}\n" - f"taxon_id: {len(taxon_id)}\n" - f"como_input_dirpath or matrix_filepath: {len(input_como_dirpath or input_matrix_filepath)}" + "input_matrix_filepath (--input-matrix-filepath) and " + "preparation_method (--preparation-method) must be the same length." ) - - for path in input_como_dirpath: - if not path.exists(): - raise ValueError(f"COMO input directory does not exist: {path}") - if not path.is_dir(): - raise ValueError(f"COMO input directory must be a directory: {path}") - - for path in input_matrix_filepath: - if not path.exists(): - raise ValueError(f"Input matrix file does not exist: {path}") - if path.suffix not in {".csv", ".h5ad"}: - raise ValueError(f"Input matrix file must be a .csv or .h5ad file: {path}") - if not path.is_file(): - raise ValueError(f"Input matrix file must be a file: {path}") - - await _process_items( - context_names=context_names, - mode=mode, - taxon_id=taxon_id, + await _process( + context_name=context_name, + taxon=taxon, + como_context_dir=como_context_dir, + input_matrix_filepath=input_matrix_filepath, output_gene_info_filepath=output_gene_info_filepath, output_trna_config_filepath=output_trna_config_filepath, - output_mrna_config_filepath=output_mrna_config_filepath, - output_trna_count_matrix=output_trna_count_matrix, - output_mrna_count_matrix=output_mrna_count_matrix, - input_como_dirpath=input_como_dirpath, - input_matrix_filepath=input_matrix_filepath, + output_mrna_config_filepath=output_polya_config_filepath, + output_trna_matrix_filepath=output_trna_count_matrix_filepath, + output_mrna_matrix_filepath=output_polya_count_matrix_filepath, cache=cache, ) From 8ee3f0c26ba887656a773906aac49cf2c21327eb Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 10:54:19 -0600 Subject: [PATCH 27/91] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20remove?= =?UTF-8?q?=20unnecessary=20code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main/como/rnaseq_preprocess.py | 96 +++++++++++++--------------------- 1 file changed, 35 insertions(+), 61 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index ae226f02..3276e90a 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -505,80 +505,54 @@ async def _create_matrix_file( subset_config.to_excel(writer, sheet_name=context_name, header=True, index=False) +async def _process( + context_name: str, + taxon: int, output_gene_info_filepath: Path, + como_context_dir: Path | None, + input_matrix_filepath: list[Path] | None, output_trna_config_filepath: Path | None, output_mrna_config_filepath: Path | None, - output_trna_count_matrix: list[Path] | None, - output_mrna_count_matrix: list[Path] | None, + output_trna_matrix_filepath: Path | None, + output_mrna_matrix_filepath: Path | None, cache: bool, ): - tasks = [] - for i, m in enumerate(mode): - if m == "create" and output_trna_config_filepath: - tasks.append( - asyncio.create_task( - _create_matrix_file( - context_name=context_names[i], - taxon_id=taxon_id[i], - output_gene_matrix_filepath=output_trna_count_matrix[i], - como_dirpath=input_como_dirpath[i], - output_gene_info_filepath=output_gene_info_filepath, - output_config_filepath=output_trna_config_filepath, - rna_type="total", - cache=cache, - ) - ) - ) + rna_types: list[tuple[type_rna, Path, Path]] = [] + if output_trna_config_filepath: + rna_types.append(("total", output_trna_config_filepath, output_trna_matrix_filepath)) + if output_mrna_config_filepath: + rna_types.append(("polya", output_mrna_config_filepath, output_mrna_matrix_filepath)) - if m == "create" and output_mrna_config_filepath: - tasks.append( - asyncio.create_task( - _create_matrix_file( - context_name=context_names[i], - taxon_id=taxon_id[i], - output_gene_matrix_filepath=output_mrna_count_matrix[i], - como_dirpath=input_como_dirpath[i], - output_gene_info_filepath=output_gene_info_filepath, - output_config_filepath=output_mrna_config_filepath, - rna_type="mrna", - cache=cache, - ) + # if provided, iterate through como-input specific directories + tasks = [] + for rna, output_config_filepath, output_matrix_filepath in rna_types: + tasks.append( + asyncio.create_task( + _create_matrix_file( + context_name=context_name, + output_config_filepath=output_config_filepath, + como_context_dir=como_context_dir, + output_counts_matrix_filepath=output_matrix_filepath, + rna=rna, ) ) + ) - if m == "provide": - tasks.append( - asyncio.create_task( - _create_gene_info_file( - counts_matrix=input_matrix_filepath[i], - output_filepath=output_gene_info_filepath, - taxon_id=taxon_id[i], - cache=cache, - ) - ) - ) await asyncio.gather(*tasks) + # create the gene info filepath based on provided data + await _create_gene_info_file( + counts_matrix_filepaths=[ + f + for f in [*input_matrix_filepath, output_trna_matrix_filepath, output_mrna_matrix_filepath] + if f is not None + ], + output_filepath=output_gene_info_filepath, + taxon=taxon, + cache=cache, + ) -def _validate_matrix_output_args( - output_count_matrices_dirpath: list, - output_trna_count_matrix_filepath: list, - output_mrna_count_matrix_filepath: list, -): - def _raise(): - raise ValueError( - "output_count_matrices_dirpath OR " - "(output_trna_count_matrix_filepath AND output_mrna_count_matrix_filepath) can be provided" - ) - # output_count_matrices_dir OR (output_trna_count_matrix AND output_mrna_count_matrix) can be provided - # Check this condition is satisfied - if output_count_matrices_dirpath and (output_trna_count_matrix_filepath or output_mrna_count_matrix_filepath): - _raise() - if output_trna_count_matrix_filepath and not output_mrna_count_matrix_filepath: - _raise() - if not output_trna_count_matrix_filepath and output_mrna_count_matrix_filepath: - _raise() async def rnaseq_preprocess( # noqa: C901 From 6a9601754febdedb69b28d19095af1aa14ebf7db Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 10:54:45 -0600 Subject: [PATCH 28/91] =?UTF-8?q?=E2=99=BB=EF=B8=8F=20refactor:=20allow=20?= =?UTF-8?q?processing=20specific=20filepaths?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main/como/rnaseq_preprocess.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index 3276e90a..6d84837b 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -553,21 +553,20 @@ async def _process( ) - - -async def rnaseq_preprocess( # noqa: C901 - context_names: str | list[str], - mode: Literal["create", "provide"] | list[Literal["create", "provide"]], - taxon_id: type_taxon | list[type_taxon], - input_como_dirpath: type_path | list[type_path] | None = None, - input_matrix_filepath: type_path | list[type_path] | None = None, - output_gene_info_filepath: Path | None = None, +async def rnaseq_preprocess( + context_name: str, + taxon: int, + output_gene_info_filepath: Path, + como_context_dir: Path | None = None, + input_matrix_filepath: Path | list[Path] | None = None, + preparation_method: RNAPrepMethod | list[RNAPrepMethod] | None = None, output_trna_config_filepath: Path | None = None, - output_mrna_config_filepath: Path | None = None, - output_count_matrices_dirpath: list[Path] | None = None, - output_trna_count_matrix: list[Path] | None = None, - output_mrna_count_matrix: list[Path] | None = None, + output_polya_config_filepath: Path | None = None, + output_trna_count_matrix_filepath: Path | None = None, + output_polya_count_matrix_filepath: Path | None = None, cache: bool = True, + log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO", + log_location: str | TextIOWrapper = sys.stderr, ) -> None: """Preprocesses RNA-seq data for downstream analysis. From c74e83f9716347a91418bac7bc56f21cf4c84073 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 10:58:15 -0600 Subject: [PATCH 29/91] feat: use commitizen for conventional commits --- .pre-commit-config.yaml | 7 +- pyproject.toml | 6 + uv.lock | 1438 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 1393 insertions(+), 58 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8a1635ef..bf67014b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,6 @@ repos: - - repo: https://github.com/ljnsn/cz-conventional-gitmoji - rev: v0.6.1 + - repo: https://github.com/commitizen-tools/commitizen + rev: master hooks: - - id: conventional-gitmoji + - id: commitizen + stages: [ commit-msg ] \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 859ddcbe..85a1308f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,3 +62,9 @@ troppo = { git = "https://github.com/JoshLoecker/troppo", rev = "update_dependen cobamp = { git = "https://github.com/JoshLoecker/cobamp", rev = "update_packages" } pydantic-settings = { git = "https://github.com/pydantic/pydantic-settings" } +[tool.commitizen] +name = "cz_conventional_commits" +tag_format = "$version" +version_scheme = "semver2" +version_provider = "pep621" +update_changelog_on_bump = true diff --git a/uv.lock b/uv.lock index f090d8cc..3a417c7a 100644 --- a/uv.lock +++ b/uv.lock @@ -78,6 +78,57 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3b/00/2344469e2084fb287c2e0b57b72910309874c3245463acd6cf5e3db69324/appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128", size = 9566 }, ] +[[package]] +name = "appnope" +version = "0.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/35/5d/752690df9ef5b76e169e68d6a129fa6d08a7100ca7f754c89495db3c6019/appnope-0.1.4.tar.gz", hash = "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee", size = 4170 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/29/5ecc3a15d5a33e31b26c11426c45c501e439cb865d0bff96315d86443b78/appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c", size = 4321 }, +] + +[[package]] +name = "argcomplete" +version = "3.5.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7f/03/581b1c29d88fffaa08abbced2e628c34dd92d32f1adaed7e42fc416938b0/argcomplete-3.5.2.tar.gz", hash = "sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb", size = 82341 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/37/3fa718aaadd36e073891138dc3ebd919a71bafd4881c97d8a133265af191/argcomplete-3.5.2-py3-none-any.whl", hash = "sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472", size = 43506 }, +] + +[[package]] +name = "argon2-cffi" +version = "23.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "argon2-cffi-bindings" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/31/fa/57ec2c6d16ecd2ba0cf15f3c7d1c3c2e7b5fcb83555ff56d7ab10888ec8f/argon2_cffi-23.1.0.tar.gz", hash = "sha256:879c3e79a2729ce768ebb7d36d4609e3a78a4ca2ec3a9f12286ca057e3d0db08", size = 42798 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/6a/e8a041599e78b6b3752da48000b14c8d1e8a04ded09c88c714ba047f34f5/argon2_cffi-23.1.0-py3-none-any.whl", hash = "sha256:c670642b78ba29641818ab2e68bd4e6a78ba53b7eff7b4c3815ae16abf91c7ea", size = 15124 }, +] + +[[package]] +name = "argon2-cffi-bindings" +version = "21.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b9/e9/184b8ccce6683b0aa2fbb7ba5683ea4b9c5763f1356347f1312c32e3c66e/argon2-cffi-bindings-21.2.0.tar.gz", hash = "sha256:bb89ceffa6c791807d1305ceb77dbfacc5aa499891d2c55661c6459651fc39e3", size = 1779911 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/13/838ce2620025e9666aa8f686431f67a29052241692a3dd1ae9d3692a89d3/argon2_cffi_bindings-21.2.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ccb949252cb2ab3a08c02024acb77cfb179492d5701c7cbdbfd776124d4d2367", size = 29658 }, + { url = "https://files.pythonhosted.org/packages/b3/02/f7f7bb6b6af6031edb11037639c697b912e1dea2db94d436e681aea2f495/argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9524464572e12979364b7d600abf96181d3541da11e23ddf565a32e70bd4dc0d", size = 80583 }, + { url = "https://files.pythonhosted.org/packages/ec/f7/378254e6dd7ae6f31fe40c8649eea7d4832a42243acaf0f1fff9083b2bed/argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b746dba803a79238e925d9046a63aa26bf86ab2a2fe74ce6b009a1c3f5c8f2ae", size = 86168 }, + { url = "https://files.pythonhosted.org/packages/74/f6/4a34a37a98311ed73bb80efe422fed95f2ac25a4cacc5ae1d7ae6a144505/argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:58ed19212051f49a523abb1dbe954337dc82d947fb6e5a0da60f7c8471a8476c", size = 82709 }, + { url = "https://files.pythonhosted.org/packages/74/2b/73d767bfdaab25484f7e7901379d5f8793cccbb86c6e0cbc4c1b96f63896/argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:bd46088725ef7f58b5a1ef7ca06647ebaf0eb4baff7d1d0d177c6cc8744abd86", size = 83613 }, + { url = "https://files.pythonhosted.org/packages/4f/fd/37f86deef67ff57c76f137a67181949c2d408077e2e3dd70c6c42912c9bf/argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_i686.whl", hash = "sha256:8cd69c07dd875537a824deec19f978e0f2078fdda07fd5c42ac29668dda5f40f", size = 84583 }, + { url = "https://files.pythonhosted.org/packages/6f/52/5a60085a3dae8fded8327a4f564223029f5f54b0cb0455a31131b5363a01/argon2_cffi_bindings-21.2.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:f1152ac548bd5b8bcecfb0b0371f082037e47128653df2e8ba6e914d384f3c3e", size = 88475 }, + { url = "https://files.pythonhosted.org/packages/8b/95/143cd64feb24a15fa4b189a3e1e7efbaeeb00f39a51e99b26fc62fbacabd/argon2_cffi_bindings-21.2.0-cp36-abi3-win32.whl", hash = "sha256:603ca0aba86b1349b147cab91ae970c63118a0f30444d4bc80355937c950c082", size = 27698 }, + { url = "https://files.pythonhosted.org/packages/37/2c/e34e47c7dee97ba6f01a6203e0383e15b60fb85d78ac9a15cd066f6fe28b/argon2_cffi_bindings-21.2.0-cp36-abi3-win_amd64.whl", hash = "sha256:b2ef1c30440dbbcba7a5dc3e319408b59676e2e039e2ae11a8775ecf482b192f", size = 30817 }, + { url = "https://files.pythonhosted.org/packages/5a/e4/bf8034d25edaa495da3c8a3405627d2e35758e44ff6eaa7948092646fdcc/argon2_cffi_bindings-21.2.0-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e415e3f62c8d124ee16018e491a009937f8cf7ebf5eb430ffc5de21b900dad93", size = 53104 }, +] + [[package]] name = "array-api-compat" version = "1.9" @@ -87,6 +138,40 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/45/78/17985eac75d04c30f8cc375e4400e20b0787dc4a1c853a8fe9fad7932f55/array_api_compat-1.9-py3-none-any.whl", hash = "sha256:76db63c2d2461ba0e86b920c8b087f0a1617eb14de3ec29fe6811eeecad9c5e8", size = 49489 }, ] +[[package]] +name = "arrow" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "python-dateutil" }, + { name = "types-python-dateutil" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2e/00/0f6e8fcdb23ea632c866620cc872729ff43ed91d284c866b515c6342b173/arrow-1.3.0.tar.gz", hash = "sha256:d4540617648cb5f895730f1ad8c82a65f2dad0166f57b75f3ca54759c4d67a85", size = 131960 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f8/ed/e97229a566617f2ae958a6b13e7cc0f585470eac730a73e9e82c32a3cdd2/arrow-1.3.0-py3-none-any.whl", hash = "sha256:c728b120ebc00eb84e01882a6f5e7927a53960aa990ce7dd2b10f39005a67f80", size = 66419 }, +] + +[[package]] +name = "asttokens" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4a/e7/82da0a03e7ba5141f05cce0d302e6eed121ae055e0456ca228bf693984bc/asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7", size = 61978 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918 }, +] + +[[package]] +name = "async-lru" +version = "2.0.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/80/e2/2b4651eff771f6fd900d233e175ddc5e2be502c7eb62c0c42f975c6d36cd/async-lru-2.0.4.tar.gz", hash = "sha256:b8a59a5df60805ff63220b2a0c5b5393da5521b113cd5465a44eb037d81a5627", size = 10019 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/9f/3c3503693386c4b0f245eaf5ca6198e3b28879ca0a40bde6b0e319793453/async_lru-2.0.4-py3-none-any.whl", hash = "sha256:ff02944ce3c288c5be660c42dbcca0742b32c3b279d6dceda655190240b99224", size = 6111 }, +] + [[package]] name = "attrs" version = "24.2.0" @@ -96,6 +181,39 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/21/5b6702a7f963e95456c0de2d495f67bf5fd62840ac655dc451586d23d39a/attrs-24.2.0-py3-none-any.whl", hash = "sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2", size = 63001 }, ] +[[package]] +name = "babel" +version = "2.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2a/74/f1bc80f23eeba13393b7222b11d95ca3af2c1e28edca18af487137eefed9/babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316", size = 9348104 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/20/bc79bc575ba2e2a7f70e8a1155618bb1301eaa5132a8271373a6903f73f8/babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b", size = 9587599 }, +] + +[[package]] +name = "beautifulsoup4" +version = "4.12.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "soupsieve" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/ca/824b1195773ce6166d388573fc106ce56d4a805bd7427b624e063596ec58/beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051", size = 581181 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b1/fe/e8c672695b37eecc5cbf43e1d0638d88d66ba3a44c4d321c796f4e59167f/beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed", size = 147925 }, +] + +[[package]] +name = "bleach" +version = "6.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "webencodings" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/76/9a/0e33f5054c54d349ea62c277191c020c2d6ef1d65ab2cb1993f91ec846d1/bleach-6.2.0.tar.gz", hash = "sha256:123e894118b8a599fd80d3ec1a6d4cc7ce4e5882b1317a7e1ba69b56e95f991f", size = 203083 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/55/96142937f66150805c25c4d0f31ee4132fd33497753400734f9dfdcbdc66/bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e", size = 163406 }, +] + [[package]] name = "boolean-py" version = "4.0" @@ -114,6 +232,106 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/12/90/3c9ff0512038035f59d279fddeb79f5f1eccd8859f06d6163c58798b9487/certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8", size = 167321 }, ] +[[package]] +name = "cffi" +version = "1.17.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/97/c783634659c2920c3fc70419e3af40972dbaf758daa229a7d6ea6135c90d/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", size = 516621 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/07/f44ca684db4e4f08a3fdc6eeb9a0d15dc6883efc7b8c90357fdbf74e186c/cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14", size = 182191 }, + { url = "https://files.pythonhosted.org/packages/08/fd/cc2fedbd887223f9f5d170c96e57cbf655df9831a6546c1727ae13fa977a/cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67", size = 178592 }, + { url = "https://files.pythonhosted.org/packages/de/cc/4635c320081c78d6ffc2cab0a76025b691a91204f4aa317d568ff9280a2d/cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382", size = 426024 }, + { url = "https://files.pythonhosted.org/packages/b6/7b/3b2b250f3aab91abe5f8a51ada1b717935fdaec53f790ad4100fe2ec64d1/cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702", size = 448188 }, + { url = "https://files.pythonhosted.org/packages/d3/48/1b9283ebbf0ec065148d8de05d647a986c5f22586b18120020452fff8f5d/cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3", size = 455571 }, + { url = "https://files.pythonhosted.org/packages/40/87/3b8452525437b40f39ca7ff70276679772ee7e8b394934ff60e63b7b090c/cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6", size = 436687 }, + { url = "https://files.pythonhosted.org/packages/8d/fb/4da72871d177d63649ac449aec2e8a29efe0274035880c7af59101ca2232/cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17", size = 446211 }, + { url = "https://files.pythonhosted.org/packages/ab/a0/62f00bcb411332106c02b663b26f3545a9ef136f80d5df746c05878f8c4b/cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8", size = 461325 }, + { url = "https://files.pythonhosted.org/packages/36/83/76127035ed2e7e27b0787604d99da630ac3123bfb02d8e80c633f218a11d/cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e", size = 438784 }, + { url = "https://files.pythonhosted.org/packages/21/81/a6cd025db2f08ac88b901b745c163d884641909641f9b826e8cb87645942/cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be", size = 461564 }, + { url = "https://files.pythonhosted.org/packages/f8/fe/4d41c2f200c4a457933dbd98d3cf4e911870877bd94d9656cc0fcb390681/cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c", size = 171804 }, + { url = "https://files.pythonhosted.org/packages/d1/b6/0b0f5ab93b0df4acc49cae758c81fe4e5ef26c3ae2e10cc69249dfd8b3ab/cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15", size = 181299 }, + { url = "https://files.pythonhosted.org/packages/6b/f4/927e3a8899e52a27fa57a48607ff7dc91a9ebe97399b357b85a0c7892e00/cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401", size = 182264 }, + { url = "https://files.pythonhosted.org/packages/6c/f5/6c3a8efe5f503175aaddcbea6ad0d2c96dad6f5abb205750d1b3df44ef29/cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf", size = 178651 }, + { url = "https://files.pythonhosted.org/packages/94/dd/a3f0118e688d1b1a57553da23b16bdade96d2f9bcda4d32e7d2838047ff7/cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4", size = 445259 }, + { url = "https://files.pythonhosted.org/packages/2e/ea/70ce63780f096e16ce8588efe039d3c4f91deb1dc01e9c73a287939c79a6/cffi-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41", size = 469200 }, + { url = "https://files.pythonhosted.org/packages/1c/a0/a4fa9f4f781bda074c3ddd57a572b060fa0df7655d2a4247bbe277200146/cffi-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1", size = 477235 }, + { url = "https://files.pythonhosted.org/packages/62/12/ce8710b5b8affbcdd5c6e367217c242524ad17a02fe5beec3ee339f69f85/cffi-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6", size = 459721 }, + { url = "https://files.pythonhosted.org/packages/ff/6b/d45873c5e0242196f042d555526f92aa9e0c32355a1be1ff8c27f077fd37/cffi-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d", size = 467242 }, + { url = "https://files.pythonhosted.org/packages/1a/52/d9a0e523a572fbccf2955f5abe883cfa8bcc570d7faeee06336fbd50c9fc/cffi-1.17.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6", size = 477999 }, + { url = "https://files.pythonhosted.org/packages/44/74/f2a2460684a1a2d00ca799ad880d54652841a780c4c97b87754f660c7603/cffi-1.17.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f", size = 454242 }, + { url = "https://files.pythonhosted.org/packages/f8/4a/34599cac7dfcd888ff54e801afe06a19c17787dfd94495ab0c8d35fe99fb/cffi-1.17.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b", size = 478604 }, + { url = "https://files.pythonhosted.org/packages/34/33/e1b8a1ba29025adbdcda5fb3a36f94c03d771c1b7b12f726ff7fef2ebe36/cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655", size = 171727 }, + { url = "https://files.pythonhosted.org/packages/3d/97/50228be003bb2802627d28ec0627837ac0bf35c90cf769812056f235b2d1/cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0", size = 181400 }, + { url = "https://files.pythonhosted.org/packages/5a/84/e94227139ee5fb4d600a7a4927f322e1d4aea6fdc50bd3fca8493caba23f/cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4", size = 183178 }, + { url = "https://files.pythonhosted.org/packages/da/ee/fb72c2b48656111c4ef27f0f91da355e130a923473bf5ee75c5643d00cca/cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c", size = 178840 }, + { url = "https://files.pythonhosted.org/packages/cc/b6/db007700f67d151abadf508cbfd6a1884f57eab90b1bb985c4c8c02b0f28/cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36", size = 454803 }, + { url = "https://files.pythonhosted.org/packages/1a/df/f8d151540d8c200eb1c6fba8cd0dfd40904f1b0682ea705c36e6c2e97ab3/cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5", size = 478850 }, + { url = "https://files.pythonhosted.org/packages/28/c0/b31116332a547fd2677ae5b78a2ef662dfc8023d67f41b2a83f7c2aa78b1/cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff", size = 485729 }, + { url = "https://files.pythonhosted.org/packages/91/2b/9a1ddfa5c7f13cab007a2c9cc295b70fbbda7cb10a286aa6810338e60ea1/cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99", size = 471256 }, + { url = "https://files.pythonhosted.org/packages/b2/d5/da47df7004cb17e4955df6a43d14b3b4ae77737dff8bf7f8f333196717bf/cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93", size = 479424 }, + { url = "https://files.pythonhosted.org/packages/0b/ac/2a28bcf513e93a219c8a4e8e125534f4f6db03e3179ba1c45e949b76212c/cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3", size = 484568 }, + { url = "https://files.pythonhosted.org/packages/d4/38/ca8a4f639065f14ae0f1d9751e70447a261f1a30fa7547a828ae08142465/cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8", size = 488736 }, + { url = "https://files.pythonhosted.org/packages/86/c5/28b2d6f799ec0bdecf44dced2ec5ed43e0eb63097b0f58c293583b406582/cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65", size = 172448 }, + { url = "https://files.pythonhosted.org/packages/50/b9/db34c4755a7bd1cb2d1603ac3863f22bcecbd1ba29e5ee841a4bc510b294/cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903", size = 181976 }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f2/4f/e1808dc01273379acc506d18f1504eb2d299bd4131743b9fc54d7be4df1e/charset_normalizer-3.4.0.tar.gz", hash = "sha256:223217c3d4f82c3ac5e29032b3f1c2eb0fb591b72161f86d93f5719079dae93e", size = 106620 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/8b/825cc84cf13a28bfbcba7c416ec22bf85a9584971be15b21dd8300c65b7f/charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4f9fc98dad6c2eaa32fc3af1417d95b5e3d08aff968df0cd320066def971f9a6", size = 196363 }, + { url = "https://files.pythonhosted.org/packages/23/81/d7eef6a99e42c77f444fdd7bc894b0ceca6c3a95c51239e74a722039521c/charset_normalizer-3.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0de7b687289d3c1b3e8660d0741874abe7888100efe14bd0f9fd7141bcbda92b", size = 125639 }, + { url = "https://files.pythonhosted.org/packages/21/67/b4564d81f48042f520c948abac7079356e94b30cb8ffb22e747532cf469d/charset_normalizer-3.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5ed2e36c3e9b4f21dd9422f6893dec0abf2cca553af509b10cd630f878d3eb99", size = 120451 }, + { url = "https://files.pythonhosted.org/packages/c2/72/12a7f0943dd71fb5b4e7b55c41327ac0a1663046a868ee4d0d8e9c369b85/charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d3ff7fc90b98c637bda91c89d51264a3dcf210cade3a2c6f838c7268d7a4ca", size = 140041 }, + { url = "https://files.pythonhosted.org/packages/67/56/fa28c2c3e31217c4c52158537a2cf5d98a6c1e89d31faf476c89391cd16b/charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1110e22af8ca26b90bd6364fe4c763329b0ebf1ee213ba32b68c73de5752323d", size = 150333 }, + { url = "https://files.pythonhosted.org/packages/f9/d2/466a9be1f32d89eb1554cf84073a5ed9262047acee1ab39cbaefc19635d2/charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:86f4e8cca779080f66ff4f191a685ced73d2f72d50216f7112185dc02b90b9b7", size = 142921 }, + { url = "https://files.pythonhosted.org/packages/f8/01/344ec40cf5d85c1da3c1f57566c59e0c9b56bcc5566c08804a95a6cc8257/charset_normalizer-3.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f683ddc7eedd742e2889d2bfb96d69573fde1d92fcb811979cdb7165bb9c7d3", size = 144785 }, + { url = "https://files.pythonhosted.org/packages/73/8b/2102692cb6d7e9f03b9a33a710e0164cadfce312872e3efc7cfe22ed26b4/charset_normalizer-3.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27623ba66c183eca01bf9ff833875b459cad267aeeb044477fedac35e19ba907", size = 146631 }, + { url = "https://files.pythonhosted.org/packages/d8/96/cc2c1b5d994119ce9f088a9a0c3ebd489d360a2eb058e2c8049f27092847/charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f606a1881d2663630ea5b8ce2efe2111740df4b687bd78b34a8131baa007f79b", size = 140867 }, + { url = "https://files.pythonhosted.org/packages/c9/27/cde291783715b8ec30a61c810d0120411844bc4c23b50189b81188b273db/charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0b309d1747110feb25d7ed6b01afdec269c647d382c857ef4663bbe6ad95a912", size = 149273 }, + { url = "https://files.pythonhosted.org/packages/3a/a4/8633b0fc1a2d1834d5393dafecce4a1cc56727bfd82b4dc18fc92f0d3cc3/charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:136815f06a3ae311fae551c3df1f998a1ebd01ddd424aa5603a4336997629e95", size = 152437 }, + { url = "https://files.pythonhosted.org/packages/64/ea/69af161062166b5975ccbb0961fd2384853190c70786f288684490913bf5/charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:14215b71a762336254351b00ec720a8e85cada43b987da5a042e4ce3e82bd68e", size = 150087 }, + { url = "https://files.pythonhosted.org/packages/3b/fd/e60a9d9fd967f4ad5a92810138192f825d77b4fa2a557990fd575a47695b/charset_normalizer-3.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:79983512b108e4a164b9c8d34de3992f76d48cadc9554c9e60b43f308988aabe", size = 145142 }, + { url = "https://files.pythonhosted.org/packages/6d/02/8cb0988a1e49ac9ce2eed1e07b77ff118f2923e9ebd0ede41ba85f2dcb04/charset_normalizer-3.4.0-cp310-cp310-win32.whl", hash = "sha256:c94057af19bc953643a33581844649a7fdab902624d2eb739738a30e2b3e60fc", size = 94701 }, + { url = "https://files.pythonhosted.org/packages/d6/20/f1d4670a8a723c46be695dff449d86d6092916f9e99c53051954ee33a1bc/charset_normalizer-3.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:55f56e2ebd4e3bc50442fbc0888c9d8c94e4e06a933804e2af3e89e2f9c1c749", size = 102191 }, + { url = "https://files.pythonhosted.org/packages/9c/61/73589dcc7a719582bf56aae309b6103d2762b526bffe189d635a7fcfd998/charset_normalizer-3.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0d99dd8ff461990f12d6e42c7347fd9ab2532fb70e9621ba520f9e8637161d7c", size = 193339 }, + { url = "https://files.pythonhosted.org/packages/77/d5/8c982d58144de49f59571f940e329ad6e8615e1e82ef84584c5eeb5e1d72/charset_normalizer-3.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c57516e58fd17d03ebe67e181a4e4e2ccab1168f8c2976c6a334d4f819fe5944", size = 124366 }, + { url = "https://files.pythonhosted.org/packages/bf/19/411a64f01ee971bed3231111b69eb56f9331a769072de479eae7de52296d/charset_normalizer-3.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6dba5d19c4dfab08e58d5b36304b3f92f3bd5d42c1a3fa37b5ba5cdf6dfcbcee", size = 118874 }, + { url = "https://files.pythonhosted.org/packages/4c/92/97509850f0d00e9f14a46bc751daabd0ad7765cff29cdfb66c68b6dad57f/charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf4475b82be41b07cc5e5ff94810e6a01f276e37c2d55571e3fe175e467a1a1c", size = 138243 }, + { url = "https://files.pythonhosted.org/packages/e2/29/d227805bff72ed6d6cb1ce08eec707f7cfbd9868044893617eb331f16295/charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce031db0408e487fd2775d745ce30a7cd2923667cf3b69d48d219f1d8f5ddeb6", size = 148676 }, + { url = "https://files.pythonhosted.org/packages/13/bc/87c2c9f2c144bedfa62f894c3007cd4530ba4b5351acb10dc786428a50f0/charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ff4e7cdfdb1ab5698e675ca622e72d58a6fa2a8aa58195de0c0061288e6e3ea", size = 141289 }, + { url = "https://files.pythonhosted.org/packages/eb/5b/6f10bad0f6461fa272bfbbdf5d0023b5fb9bc6217c92bf068fa5a99820f5/charset_normalizer-3.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3710a9751938947e6327ea9f3ea6332a09bf0ba0c09cae9cb1f250bd1f1549bc", size = 142585 }, + { url = "https://files.pythonhosted.org/packages/3b/a0/a68980ab8a1f45a36d9745d35049c1af57d27255eff8c907e3add84cf68f/charset_normalizer-3.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82357d85de703176b5587dbe6ade8ff67f9f69a41c0733cf2425378b49954de5", size = 144408 }, + { url = "https://files.pythonhosted.org/packages/d7/a1/493919799446464ed0299c8eef3c3fad0daf1c3cd48bff9263c731b0d9e2/charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:47334db71978b23ebcf3c0f9f5ee98b8d65992b65c9c4f2d34c2eaf5bcaf0594", size = 139076 }, + { url = "https://files.pythonhosted.org/packages/fb/9d/9c13753a5a6e0db4a0a6edb1cef7aee39859177b64e1a1e748a6e3ba62c2/charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8ce7fd6767a1cc5a92a639b391891bf1c268b03ec7e021c7d6d902285259685c", size = 146874 }, + { url = "https://files.pythonhosted.org/packages/75/d2/0ab54463d3410709c09266dfb416d032a08f97fd7d60e94b8c6ef54ae14b/charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f1a2f519ae173b5b6a2c9d5fa3116ce16e48b3462c8b96dfdded11055e3d6365", size = 150871 }, + { url = "https://files.pythonhosted.org/packages/8d/c9/27e41d481557be53d51e60750b85aa40eaf52b841946b3cdeff363105737/charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:63bc5c4ae26e4bc6be6469943b8253c0fd4e4186c43ad46e713ea61a0ba49129", size = 148546 }, + { url = "https://files.pythonhosted.org/packages/ee/44/4f62042ca8cdc0cabf87c0fc00ae27cd8b53ab68be3605ba6d071f742ad3/charset_normalizer-3.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bcb4f8ea87d03bc51ad04add8ceaf9b0f085ac045ab4d74e73bbc2dc033f0236", size = 143048 }, + { url = "https://files.pythonhosted.org/packages/01/f8/38842422988b795220eb8038745d27a675ce066e2ada79516c118f291f07/charset_normalizer-3.4.0-cp311-cp311-win32.whl", hash = "sha256:9ae4ef0b3f6b41bad6366fb0ea4fc1d7ed051528e113a60fa2a65a9abb5b1d99", size = 94389 }, + { url = "https://files.pythonhosted.org/packages/0b/6e/b13bd47fa9023b3699e94abf565b5a2f0b0be6e9ddac9812182596ee62e4/charset_normalizer-3.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:cee4373f4d3ad28f1ab6290684d8e2ebdb9e7a1b74fdc39e4c211995f77bec27", size = 101752 }, + { url = "https://files.pythonhosted.org/packages/d3/0b/4b7a70987abf9b8196845806198975b6aab4ce016632f817ad758a5aa056/charset_normalizer-3.4.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0713f3adb9d03d49d365b70b84775d0a0d18e4ab08d12bc46baa6132ba78aaf6", size = 194445 }, + { url = "https://files.pythonhosted.org/packages/50/89/354cc56cf4dd2449715bc9a0f54f3aef3dc700d2d62d1fa5bbea53b13426/charset_normalizer-3.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:de7376c29d95d6719048c194a9cf1a1b0393fbe8488a22008610b0361d834ecf", size = 125275 }, + { url = "https://files.pythonhosted.org/packages/fa/44/b730e2a2580110ced837ac083d8ad222343c96bb6b66e9e4e706e4d0b6df/charset_normalizer-3.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a51b48f42d9358460b78725283f04bddaf44a9358197b889657deba38f329db", size = 119020 }, + { url = "https://files.pythonhosted.org/packages/9d/e4/9263b8240ed9472a2ae7ddc3e516e71ef46617fe40eaa51221ccd4ad9a27/charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b295729485b06c1a0683af02a9e42d2caa9db04a373dc38a6a58cdd1e8abddf1", size = 139128 }, + { url = "https://files.pythonhosted.org/packages/6b/e3/9f73e779315a54334240353eaea75854a9a690f3f580e4bd85d977cb2204/charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ee803480535c44e7f5ad00788526da7d85525cfefaf8acf8ab9a310000be4b03", size = 149277 }, + { url = "https://files.pythonhosted.org/packages/1a/cf/f1f50c2f295312edb8a548d3fa56a5c923b146cd3f24114d5adb7e7be558/charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3d59d125ffbd6d552765510e3f31ed75ebac2c7470c7274195b9161a32350284", size = 142174 }, + { url = "https://files.pythonhosted.org/packages/16/92/92a76dc2ff3a12e69ba94e7e05168d37d0345fa08c87e1fe24d0c2a42223/charset_normalizer-3.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cda06946eac330cbe6598f77bb54e690b4ca93f593dee1568ad22b04f347c15", size = 143838 }, + { url = "https://files.pythonhosted.org/packages/a4/01/2117ff2b1dfc61695daf2babe4a874bca328489afa85952440b59819e9d7/charset_normalizer-3.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07afec21bbbbf8a5cc3651aa96b980afe2526e7f048fdfb7f1014d84acc8b6d8", size = 146149 }, + { url = "https://files.pythonhosted.org/packages/f6/9b/93a332b8d25b347f6839ca0a61b7f0287b0930216994e8bf67a75d050255/charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6b40e8d38afe634559e398cc32b1472f376a4099c75fe6299ae607e404c033b2", size = 140043 }, + { url = "https://files.pythonhosted.org/packages/ab/f6/7ac4a01adcdecbc7a7587767c776d53d369b8b971382b91211489535acf0/charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b8dcd239c743aa2f9c22ce674a145e0a25cb1566c495928440a181ca1ccf6719", size = 148229 }, + { url = "https://files.pythonhosted.org/packages/9d/be/5708ad18161dee7dc6a0f7e6cf3a88ea6279c3e8484844c0590e50e803ef/charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:84450ba661fb96e9fd67629b93d2941c871ca86fc38d835d19d4225ff946a631", size = 151556 }, + { url = "https://files.pythonhosted.org/packages/5a/bb/3d8bc22bacb9eb89785e83e6723f9888265f3a0de3b9ce724d66bd49884e/charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:44aeb140295a2f0659e113b31cfe92c9061622cadbc9e2a2f7b8ef6b1e29ef4b", size = 149772 }, + { url = "https://files.pythonhosted.org/packages/f7/fa/d3fc622de05a86f30beea5fc4e9ac46aead4731e73fd9055496732bcc0a4/charset_normalizer-3.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1db4e7fefefd0f548d73e2e2e041f9df5c59e178b4c72fbac4cc6f535cfb1565", size = 144800 }, + { url = "https://files.pythonhosted.org/packages/9a/65/bdb9bc496d7d190d725e96816e20e2ae3a6fa42a5cac99c3c3d6ff884118/charset_normalizer-3.4.0-cp312-cp312-win32.whl", hash = "sha256:5726cf76c982532c1863fb64d8c6dd0e4c90b6ece9feb06c9f202417a31f7dd7", size = 94836 }, + { url = "https://files.pythonhosted.org/packages/3e/67/7b72b69d25b89c0b3cea583ee372c43aa24df15f0e0f8d3982c57804984b/charset_normalizer-3.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:b197e7094f232959f8f20541ead1d9862ac5ebea1d58e9849c1bf979255dfac9", size = 102187 }, + { url = "https://files.pythonhosted.org/packages/bf/9b/08c0432272d77b04803958a4598a51e2a4b51c06640af8b8f0f908c18bf2/charset_normalizer-3.4.0-py3-none-any.whl", hash = "sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079", size = 49446 }, +] + [[package]] name = "cobamp" version = "0.2.2" @@ -166,6 +384,40 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, ] +[[package]] +name = "comm" +version = "0.2.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/a8/fb783cb0abe2b5fded9f55e5703015cdf1c9c85b3669087c538dd15a6a86/comm-0.2.2.tar.gz", hash = "sha256:3fd7a84065306e07bea1773df6eb8282de51ba82f77c72f9c85716ab11fe980e", size = 6210 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/75/49e5bfe642f71f272236b5b2d2691cf915a7283cc0ceda56357b61daa538/comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3", size = 7180 }, +] + +[[package]] +name = "commitizen" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "argcomplete" }, + { name = "charset-normalizer" }, + { name = "colorama" }, + { name = "decli" }, + { name = "jinja2" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "questionary" }, + { name = "termcolor" }, + { name = "tomlkit" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7a/c5/66f1b977b48501a33f5fd33253aba14786483b08aba987718d272e99e732/commitizen-4.1.0.tar.gz", hash = "sha256:4f2d9400ec411aec1c738d4c63fc7fd5807cd6ddf6be970869e03e68b88ff718", size = 51252 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/f7/7f70adfbf3553ffdbe391eaacde72b21dbc1b4226ae56ca32e8ded1bf70b/commitizen-4.1.0-py3-none-any.whl", hash = "sha256:2e6c5fbd442cab4bcc5a04bc86ef2196ef84bcf611317d6c596e87f5bb4c09f5", size = 72282 }, +] + [[package]] name = "commitlint" version = "1.3.0" @@ -191,6 +443,8 @@ dependencies = [ { name = "openpyxl" }, { name = "pandas" }, { name = "plotly" }, + { name = "pydantic" }, + { name = "pydantic-settings" }, { name = "scanpy" }, { name = "scikit-learn" }, { name = "scipy", version = "1.7.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, @@ -199,9 +453,17 @@ dependencies = [ { name = "troppo" }, ] +[package.optional-dependencies] +interactive = [ + { name = "ipython" }, + { name = "jupyterlab" }, +] + [package.dev-dependencies] dev = [ + { name = "commitizen" }, { name = "commitlint" }, + { name = "cz-conventional-gitmoji" }, { name = "hypothesis" }, { name = "pytest" }, { name = "pytest-asyncio" }, @@ -217,11 +479,15 @@ requires-dist = [ { name = "cobra", specifier = ">=0.28.0" }, { name = "fast-bioservices", specifier = ">=0.3.9" }, { name = "gurobipy", specifier = ">=11.0" }, + { name = "ipython", marker = "extra == 'interactive'", specifier = ">=8.0.0" }, + { name = "jupyterlab", marker = "extra == 'interactive'", specifier = ">=4.3.2" }, { name = "kaleido", specifier = "==0.2.1" }, { name = "loguru", specifier = ">=0.7.2" }, { name = "openpyxl", specifier = ">=3.1.5" }, { name = "pandas", specifier = ">=1.3.5" }, { name = "plotly", specifier = ">=5.24.1" }, + { name = "pydantic", specifier = ">=2.10.3" }, + { name = "pydantic-settings", git = "https://github.com/pydantic/pydantic-settings" }, { name = "scanpy", specifier = ">=1.9.8" }, { name = "scikit-learn", specifier = ">=1.5.2" }, { name = "scipy", specifier = ">=1.7.3" }, @@ -231,7 +497,9 @@ requires-dist = [ [package.metadata.requires-dev] dev = [ + { name = "commitizen", specifier = ">=4.1.0" }, { name = "commitlint", specifier = ">=1.3.0" }, + { name = "cz-conventional-gitmoji", specifier = ">=0.6.1" }, { name = "hypothesis", specifier = ">=6.122.1" }, { name = "pytest", specifier = ">=8.3.3" }, { name = "pytest-asyncio", specifier = ">=0.24.0" }, @@ -334,6 +602,67 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321 }, ] +[[package]] +name = "cz-conventional-gitmoji" +version = "0.6.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "commitizen" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/de/6e/0bd29b8df2fc2a1aca050bf92429b81b9dd91672dddf9d20e94008c80d99/cz_conventional_gitmoji-0.6.1.tar.gz", hash = "sha256:3fd4b355fa9a1ffc31192112f1807ab6c33203204c00f38a54c80643a2f407b8", size = 13755 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/a3/2d9b33cb8b9898b0d3cc6147242ce9c3cee1e78739d793d1b197d8b573b8/cz_conventional_gitmoji-0.6.1-py3-none-any.whl", hash = "sha256:db8f51ea2f41389e6cc9986234524df6b4e1a3846ad738f5b946a811c3948623", size = 15151 }, +] + +[[package]] +name = "debugpy" +version = "1.8.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/88/92/15b454c516c4c53cc8c03967e4be12b65a1ea36db3bb4513a7453f75c8d8/debugpy-1.8.9.zip", hash = "sha256:1339e14c7d980407248f09824d1b25ff5c5616651689f1e0f0e51bdead3ea13e", size = 4921695 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/2e/92fda96b1b773e454daae3e2962726dd9f7aedb1f26d7f2ca353d91a930b/debugpy-1.8.9-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:cfe1e6c6ad7178265f74981edf1154ffce97b69005212fbc90ca22ddfe3d017e", size = 2080529 }, + { url = "https://files.pythonhosted.org/packages/87/c0/d13cdbae394c7ae65ef93d7ccde2ff364445248e367bda93fc0650c08849/debugpy-1.8.9-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ada7fb65102a4d2c9ab62e8908e9e9f12aed9d76ef44880367bc9308ebe49a0f", size = 3565151 }, + { url = "https://files.pythonhosted.org/packages/23/40/237c0a7a68cb982dcced4a0199b7c464630f75b9280d6bebde32490135d1/debugpy-1.8.9-cp310-cp310-win32.whl", hash = "sha256:c36856343cbaa448171cba62a721531e10e7ffb0abff838004701454149bc037", size = 5117068 }, + { url = "https://files.pythonhosted.org/packages/00/89/e0be9f01ee461e3369dde418492244acb1b67adaf04cb5ea98f1380ab101/debugpy-1.8.9-cp310-cp310-win_amd64.whl", hash = "sha256:17c5e0297678442511cf00a745c9709e928ea4ca263d764e90d233208889a19e", size = 5149364 }, + { url = "https://files.pythonhosted.org/packages/f7/bf/c41b688ad490d644b3bcca505a87ea58ec0442234def9a641ba62dce9c11/debugpy-1.8.9-cp311-cp311-macosx_14_0_universal2.whl", hash = "sha256:b74a49753e21e33e7cf030883a92fa607bddc4ede1aa4145172debc637780040", size = 2179080 }, + { url = "https://files.pythonhosted.org/packages/f4/dd/e9de11423db7bde62469fbd932243c64f66d6d87924976f49ec336415522/debugpy-1.8.9-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:62d22dacdb0e296966d7d74a7141aaab4bec123fa43d1a35ddcb39bf9fd29d70", size = 3137893 }, + { url = "https://files.pythonhosted.org/packages/2c/bf/e1f2c81220591728f35585b4abd67e71e9b39b3cb983f428b23d4ca6c22e/debugpy-1.8.9-cp311-cp311-win32.whl", hash = "sha256:8138efff315cd09b8dcd14226a21afda4ca582284bf4215126d87342bba1cc66", size = 5042644 }, + { url = "https://files.pythonhosted.org/packages/96/20/a407252954fd2812771e4ea3ab523f73889fd5027e305dec5ee4f0af149a/debugpy-1.8.9-cp311-cp311-win_amd64.whl", hash = "sha256:ff54ef77ad9f5c425398efb150239f6fe8e20c53ae2f68367eba7ece1e96226d", size = 5066943 }, + { url = "https://files.pythonhosted.org/packages/da/ab/1420baf8404d2b499349a44de5037133e06d489009032ce016fedb66eea1/debugpy-1.8.9-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:957363d9a7a6612a37458d9a15e72d03a635047f946e5fceee74b50d52a9c8e2", size = 2504180 }, + { url = "https://files.pythonhosted.org/packages/58/ec/e0f88c6764314bda7887274e0b980812709b3d6363dcae124a49a9ceaa3c/debugpy-1.8.9-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e565fc54b680292b418bb809f1386f17081d1346dca9a871bf69a8ac4071afe", size = 4224563 }, + { url = "https://files.pythonhosted.org/packages/dd/49/d9ea004ee2e4531d2b528841689ee2ba01c6a4b58840efd15e57dd866a86/debugpy-1.8.9-cp312-cp312-win32.whl", hash = "sha256:3e59842d6c4569c65ceb3751075ff8d7e6a6ada209ceca6308c9bde932bcef11", size = 5163641 }, + { url = "https://files.pythonhosted.org/packages/b1/63/c8b0718024c1187a446316037680e1564bf063c6665c815f17b42c244aba/debugpy-1.8.9-cp312-cp312-win_amd64.whl", hash = "sha256:66eeae42f3137eb428ea3a86d4a55f28da9bd5a4a3d369ba95ecc3a92c1bba53", size = 5203862 }, + { url = "https://files.pythonhosted.org/packages/2d/23/3f5804202da11c950dc0caae4a62d0c9aadabdb2daeb5f7aa09838647b5d/debugpy-1.8.9-py2.py3-none-any.whl", hash = "sha256:cc37a6c9987ad743d9c3a14fa1b1a14b7e4e6041f9dd0c8abf8895fe7a97b899", size = 5166094 }, +] + +[[package]] +name = "decli" +version = "0.6.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3d/a0/a4658f93ecb589f479037b164dc13c68d108b50bf6594e54c820749f97ac/decli-0.6.2.tar.gz", hash = "sha256:36f71eb55fd0093895efb4f416ec32b7f6e00147dda448e3365cf73ceab42d6f", size = 7424 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bf/70/3ea48dc9e958d7d66c44c9944809181f1ca79aaef25703c023b5092d34ff/decli-0.6.2-py3-none-any.whl", hash = "sha256:2fc84106ce9a8f523ed501ca543bdb7e416c064917c12a59ebdc7f311a97b7ed", size = 7854 }, +] + +[[package]] +name = "decorator" +version = "5.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/0c/8d907af351aa16b42caae42f9d6aa37b900c67308052d10fdce809f8d952/decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330", size = 35016 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/50/83c593b07763e1161326b3b8c6686f0f4b0f24d5526546bee538c89837d6/decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186", size = 9073 }, +] + +[[package]] +name = "defusedxml" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604 }, +] + [[package]] name = "depinfo" version = "2.2.0" @@ -379,6 +708,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/cc/b7e31358aac6ed1ef2bb790a9746ac2c69bcb3c8588b41616914eb106eaf/exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", size = 16453 }, ] +[[package]] +name = "executing" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/e3/7d45f492c2c4a0e8e0fad57d081a7c8a0286cdd86372b070cca1ec0caa1e/executing-2.1.0.tar.gz", hash = "sha256:8ea27ddd260da8150fa5a708269c4a10e76161e2496ec3e587da9e3c0fe4b9ab", size = 977485 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/fd/afcd0496feca3276f509df3dbd5dae726fcc756f1a08d9e25abe1733f962/executing-2.1.0-py2.py3-none-any.whl", hash = "sha256:8d63781349375b5ebccc3142f4b30350c0cd9c79f921cde38be2be4637e98eaf", size = 25805 }, +] + [[package]] name = "fast-bioservices" version = "0.3.9" @@ -396,6 +734,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/48/6c/6033e67a7d734ba90ff186e5404f78b0e3b59ae46e78bec11764ae50c508/fast_bioservices-0.3.9-py3-none-any.whl", hash = "sha256:f041a30300d4de5c7d2d5e0405b8505e7a7f79248e986ecf45ddb3473d7c4d8f", size = 22687 }, ] +[[package]] +name = "fastjsonschema" +version = "2.21.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8b/50/4b769ce1ac4071a1ef6d86b1a3fb56cdc3a37615e8c5519e1af96cdac366/fastjsonschema-2.21.1.tar.gz", hash = "sha256:794d4f0a58f848961ba16af7b9c85a3e88cd360df008c59aac6fc5ae9323b5d4", size = 373939 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/2b/0817a2b257fe88725c25589d89aec060581aabf668707a8d03b2e9e0cb2a/fastjsonschema-2.21.1-py3-none-any.whl", hash = "sha256:c9e5b7e908310918cf494a434eeb31384dd84a98b57a30bcb1f535015b554667", size = 23924 }, +] + [[package]] name = "fonttools" version = "4.54.1" @@ -429,6 +776,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/57/5e/de2e6e51cb6894f2f2bc2641f6c845561361b622e96df3cca04df77222c9/fonttools-4.54.1-py3-none-any.whl", hash = "sha256:37cddd62d83dc4f72f7c3f3c2bcf2697e89a30efb152079896544a93907733bd", size = 1096920 }, ] +[[package]] +name = "fqdn" +version = "1.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/30/3e/a80a8c077fd798951169626cde3e239adeba7dab75deb3555716415bd9b0/fqdn-1.5.1.tar.gz", hash = "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f", size = 6015 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cf/58/8acf1b3e91c58313ce5cb67df61001fc9dcd21be4fadb76c1a2d540e09ed/fqdn-1.5.1-py3-none-any.whl", hash = "sha256:3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014", size = 9121 }, +] + [[package]] name = "future" version = "1.0.0" @@ -520,18 +876,17 @@ wheels = [ [[package]] name = "httpx" -version = "0.27.2" +version = "0.28.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "certifi" }, { name = "httpcore" }, { name = "idna" }, - { name = "sniffio" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/78/82/08f8c936781f67d9e6b9eeb8a0c8b4e406136ea4c3d1f89a5db71d42e0e6/httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2", size = 144189 } +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406 } wheels = [ - { url = "https://files.pythonhosted.org/packages/56/95/9377bcb415797e44274b51d46e3249eba641711cf3348050f76ee7b15ffc/httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0", size = 76395 }, + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 }, ] [[package]] @@ -584,6 +939,87 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, ] +[[package]] +name = "ipykernel" +version = "6.29.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "appnope", marker = "platform_system == 'Darwin'" }, + { name = "comm" }, + { name = "debugpy" }, + { name = "ipython" }, + { name = "jupyter-client" }, + { name = "jupyter-core" }, + { name = "matplotlib-inline" }, + { name = "nest-asyncio" }, + { name = "packaging" }, + { name = "psutil" }, + { name = "pyzmq" }, + { name = "tornado" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/5c/67594cb0c7055dc50814b21731c22a601101ea3b1b50a9a1b090e11f5d0f/ipykernel-6.29.5.tar.gz", hash = "sha256:f093a22c4a40f8828f8e330a9c297cb93dcab13bd9678ded6de8e5cf81c56215", size = 163367 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/5c/368ae6c01c7628438358e6d337c19b05425727fbb221d2a3c4303c372f42/ipykernel-6.29.5-py3-none-any.whl", hash = "sha256:afdb66ba5aa354b09b91379bac28ae4afebbb30e8b39510c9690afb7a10421b5", size = 117173 }, +] + +[[package]] +name = "ipython" +version = "8.18.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "decorator" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "jedi" }, + { name = "matplotlib-inline" }, + { name = "pexpect", marker = "sys_platform != 'win32'" }, + { name = "prompt-toolkit" }, + { name = "pygments" }, + { name = "stack-data" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9b/af/e1ff4d5499165e330413e0fb9811ca937c332d20900eae641cd590c0ab71/ipython-8.18.0.tar.gz", hash = "sha256:4feb61210160f75e229ce932dbf8b719bff37af123c0b985fd038b14233daa16", size = 5486388 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/57/ef12725f8af19920db1d8f2eaee44ebbaee6d9fdcf853be5db76bfdb9ce6/ipython-8.18.0-py3-none-any.whl", hash = "sha256:d538a7a98ad9b7e018926447a5f35856113a85d08fd68a165d7871ab5175f6e0", size = 808166 }, +] + +[[package]] +name = "isoduration" +version = "20.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "arrow" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7c/1a/3c8edc664e06e6bd06cce40c6b22da5f1429aa4224d0c590f3be21c91ead/isoduration-20.11.0.tar.gz", hash = "sha256:ac2f9015137935279eac671f94f89eb00584f940f5dc49462a0c4ee692ba1bd9", size = 11649 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/55/e5326141505c5d5e34c5e0935d2908a74e4561eca44108fbfb9c13d2911a/isoduration-20.11.0-py3-none-any.whl", hash = "sha256:b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042", size = 11321 }, +] + +[[package]] +name = "jedi" +version = "0.19.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "parso" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/3a/79a912fbd4d8dd6fbb02bf69afd3bb72cf0c729bb3063c6f4498603db17a/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0", size = 1231287 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278 }, +] + +[[package]] +name = "jinja2" +version = "3.1.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ed/55/39036716d19cab0747a5020fc7e907f362fbf48c984b14e62127f7e68e5d/jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369", size = 240245 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/31/80/3a54838c3fb461f6fec263ebf3a3a41771bd05190238de3486aae8540c36/jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d", size = 133271 }, +] + [[package]] name = "joblib" version = "1.4.2" @@ -593,6 +1029,218 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/91/29/df4b9b42f2be0b623cbd5e2140cafcaa2bef0759a00b7b70104dcfe2fb51/joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6", size = 301817 }, ] +[[package]] +name = "json5" +version = "0.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/85/3d/bbe62f3d0c05a689c711cff57b2e3ac3d3e526380adb7c781989f075115c/json5-0.10.0.tar.gz", hash = "sha256:e66941c8f0a02026943c52c2eb34ebeb2a6f819a0be05920a6f5243cd30fd559", size = 48202 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/aa/42/797895b952b682c3dafe23b1834507ee7f02f4d6299b65aaa61425763278/json5-0.10.0-py3-none-any.whl", hash = "sha256:19b23410220a7271e8377f81ba8aacba2fdd56947fbb137ee5977cbe1f5e8dfa", size = 34049 }, +] + +[[package]] +name = "jsonpointer" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6a/0a/eebeb1fa92507ea94016a2a790b93c2ae41a7e18778f85471dc54475ed25/jsonpointer-3.0.0.tar.gz", hash = "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef", size = 9114 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/71/92/5e77f98553e9e75130c78900d000368476aed74276eb8ae8796f65f00918/jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942", size = 7595 }, +] + +[[package]] +name = "jsonschema" +version = "4.23.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "jsonschema-specifications" }, + { name = "referencing" }, + { name = "rpds-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/2e/03362ee4034a4c917f697890ccd4aec0800ccf9ded7f511971c75451deec/jsonschema-4.23.0.tar.gz", hash = "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4", size = 325778 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/4a/4f9dbeb84e8850557c02365a0eee0649abe5eb1d84af92a25731c6c0f922/jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566", size = 88462 }, +] + +[package.optional-dependencies] +format-nongpl = [ + { name = "fqdn" }, + { name = "idna" }, + { name = "isoduration" }, + { name = "jsonpointer" }, + { name = "rfc3339-validator" }, + { name = "rfc3986-validator" }, + { name = "uri-template" }, + { name = "webcolors" }, +] + +[[package]] +name = "jsonschema-specifications" +version = "2024.10.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "referencing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/10/db/58f950c996c793472e336ff3655b13fbcf1e3b359dcf52dcf3ed3b52c352/jsonschema_specifications-2024.10.1.tar.gz", hash = "sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272", size = 15561 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/0f/8910b19ac0670a0f80ce1008e5e751c4a57e14d2c4c13a482aa6079fa9d6/jsonschema_specifications-2024.10.1-py3-none-any.whl", hash = "sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf", size = 18459 }, +] + +[[package]] +name = "jupyter-client" +version = "8.6.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jupyter-core" }, + { name = "python-dateutil" }, + { name = "pyzmq" }, + { name = "tornado" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/22/bf9f12fdaeae18019a468b68952a60fe6dbab5d67cd2a103cac7659b41ca/jupyter_client-8.6.3.tar.gz", hash = "sha256:35b3a0947c4a6e9d589eb97d7d4cd5e90f910ee73101611f01283732bd6d9419", size = 342019 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/85/b0394e0b6fcccd2c1eeefc230978a6f8cb0c5df1e4cd3e7625735a0d7d1e/jupyter_client-8.6.3-py3-none-any.whl", hash = "sha256:e8a19cc986cc45905ac3362915f410f3af85424b4c0905e94fa5f2cb08e8f23f", size = 106105 }, +] + +[[package]] +name = "jupyter-core" +version = "5.7.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "platformdirs" }, + { name = "pywin32", marker = "platform_python_implementation != 'PyPy' and sys_platform == 'win32'" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/00/11/b56381fa6c3f4cc5d2cf54a7dbf98ad9aa0b339ef7a601d6053538b079a7/jupyter_core-5.7.2.tar.gz", hash = "sha256:aa5f8d32bbf6b431ac830496da7392035d6f61b4f54872f15c4bd2a9c3f536d9", size = 87629 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c9/fb/108ecd1fe961941959ad0ee4e12ee7b8b1477247f30b1fdfd83ceaf017f0/jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409", size = 28965 }, +] + +[[package]] +name = "jupyter-events" +version = "0.10.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jsonschema", extra = ["format-nongpl"] }, + { name = "python-json-logger" }, + { name = "pyyaml" }, + { name = "referencing" }, + { name = "rfc3339-validator" }, + { name = "rfc3986-validator" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8d/53/7537a1aa558229bb0b1b178d814c9d68a9c697d3aecb808a1cb2646acf1f/jupyter_events-0.10.0.tar.gz", hash = "sha256:670b8229d3cc882ec782144ed22e0d29e1c2d639263f92ca8383e66682845e22", size = 61516 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a5/94/059180ea70a9a326e1815176b2370da56376da347a796f8c4f0b830208ef/jupyter_events-0.10.0-py3-none-any.whl", hash = "sha256:4b72130875e59d57716d327ea70d3ebc3af1944d3717e5a498b8a06c6c159960", size = 18777 }, +] + +[[package]] +name = "jupyter-lsp" +version = "2.2.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jupyter-server" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/85/b4/3200b0b09c12bc3b72d943d923323c398eff382d1dcc7c0dbc8b74630e40/jupyter-lsp-2.2.5.tar.gz", hash = "sha256:793147a05ad446f809fd53ef1cd19a9f5256fd0a2d6b7ce943a982cb4f545001", size = 48741 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/e0/7bd7cff65594fd9936e2f9385701e44574fc7d721331ff676ce440b14100/jupyter_lsp-2.2.5-py3-none-any.whl", hash = "sha256:45fbddbd505f3fbfb0b6cb2f1bc5e15e83ab7c79cd6e89416b248cb3c00c11da", size = 69146 }, +] + +[[package]] +name = "jupyter-server" +version = "2.14.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "argon2-cffi" }, + { name = "jinja2" }, + { name = "jupyter-client" }, + { name = "jupyter-core" }, + { name = "jupyter-events" }, + { name = "jupyter-server-terminals" }, + { name = "nbconvert" }, + { name = "nbformat" }, + { name = "overrides" }, + { name = "packaging" }, + { name = "prometheus-client" }, + { name = "pywinpty", marker = "os_name == 'nt'" }, + { name = "pyzmq" }, + { name = "send2trash" }, + { name = "terminado" }, + { name = "tornado" }, + { name = "traitlets" }, + { name = "websocket-client" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0c/34/88b47749c7fa9358e10eac356c4b97d94a91a67d5c935a73f69bc4a31118/jupyter_server-2.14.2.tar.gz", hash = "sha256:66095021aa9638ced276c248b1d81862e4c50f292d575920bbe960de1c56b12b", size = 719933 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/57/e1/085edea6187a127ca8ea053eb01f4e1792d778b4d192c74d32eb6730fed6/jupyter_server-2.14.2-py3-none-any.whl", hash = "sha256:47ff506127c2f7851a17bf4713434208fc490955d0e8632e95014a9a9afbeefd", size = 383556 }, +] + +[[package]] +name = "jupyter-server-terminals" +version = "0.5.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pywinpty", marker = "os_name == 'nt'" }, + { name = "terminado" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/d5/562469734f476159e99a55426d697cbf8e7eb5efe89fb0e0b4f83a3d3459/jupyter_server_terminals-0.5.3.tar.gz", hash = "sha256:5ae0295167220e9ace0edcfdb212afd2b01ee8d179fe6f23c899590e9b8a5269", size = 31430 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/2d/2b32cdbe8d2a602f697a649798554e4f072115438e92249624e532e8aca6/jupyter_server_terminals-0.5.3-py3-none-any.whl", hash = "sha256:41ee0d7dc0ebf2809c668e0fc726dfaf258fcd3e769568996ca731b6194ae9aa", size = 13656 }, +] + +[[package]] +name = "jupyterlab" +version = "4.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "async-lru" }, + { name = "httpx" }, + { name = "ipykernel" }, + { name = "jinja2" }, + { name = "jupyter-core" }, + { name = "jupyter-lsp" }, + { name = "jupyter-server" }, + { name = "jupyterlab-server" }, + { name = "notebook-shim" }, + { name = "packaging" }, + { name = "setuptools" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "tornado" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6b/2b/a3b8005643a5583841e0ec3e5e187330f5d4e5f4be232b2f00a653ab2d3d/jupyterlab-4.3.2.tar.gz", hash = "sha256:3c0a6882dbddcc0a7bfdd5e2236f351b2b263e48780236e6996c2aca13ac5b22", size = 21797175 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/fc/f12dbf6e3f44d8f95645c9142e40e7e7de1e7af284b286f35acf88df5b87/jupyterlab-4.3.2-py3-none-any.whl", hash = "sha256:e87100cbab8b886ff7a4f325c856100ba6fdfe916162a85409daf0e707e19d1d", size = 11664945 }, +] + +[[package]] +name = "jupyterlab-pygments" +version = "0.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/90/51/9187be60d989df97f5f0aba133fa54e7300f17616e065d1ada7d7646b6d6/jupyterlab_pygments-0.3.0.tar.gz", hash = "sha256:721aca4d9029252b11cfa9d185e5b5af4d54772bb8072f9b7036f4170054d35d", size = 512900 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b1/dd/ead9d8ea85bf202d90cc513b533f9c363121c7792674f78e0d8a854b63b4/jupyterlab_pygments-0.3.0-py3-none-any.whl", hash = "sha256:841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780", size = 15884 }, +] + +[[package]] +name = "jupyterlab-server" +version = "2.27.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "babel" }, + { name = "jinja2" }, + { name = "json5" }, + { name = "jsonschema" }, + { name = "jupyter-server" }, + { name = "packaging" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0a/c9/a883ce65eb27905ce77ace410d83587c82ea64dc85a48d1f7ed52bcfa68d/jupyterlab_server-2.27.3.tar.gz", hash = "sha256:eb36caca59e74471988f0ae25c77945610b887f777255aa21f8065def9e51ed4", size = 76173 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/09/2032e7d15c544a0e3cd831c51d77a8ca57f7555b2e1b2922142eddb02a84/jupyterlab_server-2.27.3-py3-none-any.whl", hash = "sha256:e697488f66c3db49df675158a77b3b017520d772c6e1548c7d9bcc5df7944ee4", size = 59700 }, +] + [[package]] name = "kaleido" version = "0.2.1" @@ -725,6 +1373,44 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 }, ] +[[package]] +name = "markupsafe" +version = "3.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/90/d08277ce111dd22f77149fd1a5d4653eeb3b3eaacbdfcbae5afb2600eebd/MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8", size = 14357 }, + { url = "https://files.pythonhosted.org/packages/04/e1/6e2194baeae0bca1fae6629dc0cbbb968d4d941469cbab11a3872edff374/MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158", size = 12393 }, + { url = "https://files.pythonhosted.org/packages/1d/69/35fa85a8ece0a437493dc61ce0bb6d459dcba482c34197e3efc829aa357f/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a9ef736c01fccdd6600705b09dc574584b89bea478200c5fbf112a6b0d5579", size = 21732 }, + { url = "https://files.pythonhosted.org/packages/22/35/137da042dfb4720b638d2937c38a9c2df83fe32d20e8c8f3185dbfef05f7/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbcb445fa71794da8f178f0f6d66789a28d7319071af7a496d4d507ed566270d", size = 20866 }, + { url = "https://files.pythonhosted.org/packages/29/28/6d029a903727a1b62edb51863232152fd335d602def598dade38996887f0/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57cb5a3cf367aeb1d316576250f65edec5bb3be939e9247ae594b4bcbc317dfb", size = 20964 }, + { url = "https://files.pythonhosted.org/packages/cc/cd/07438f95f83e8bc028279909d9c9bd39e24149b0d60053a97b2bc4f8aa51/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3809ede931876f5b2ec92eef964286840ed3540dadf803dd570c3b7e13141a3b", size = 21977 }, + { url = "https://files.pythonhosted.org/packages/29/01/84b57395b4cc062f9c4c55ce0df7d3108ca32397299d9df00fedd9117d3d/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e07c3764494e3776c602c1e78e298937c3315ccc9043ead7e685b7f2b8d47b3c", size = 21366 }, + { url = "https://files.pythonhosted.org/packages/bd/6e/61ebf08d8940553afff20d1fb1ba7294b6f8d279df9fd0c0db911b4bbcfd/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b424c77b206d63d500bcb69fa55ed8d0e6a3774056bdc4839fc9298a7edca171", size = 21091 }, + { url = "https://files.pythonhosted.org/packages/11/23/ffbf53694e8c94ebd1e7e491de185124277964344733c45481f32ede2499/MarkupSafe-3.0.2-cp310-cp310-win32.whl", hash = "sha256:fcabf5ff6eea076f859677f5f0b6b5c1a51e70a376b0579e0eadef8db48c6b50", size = 15065 }, + { url = "https://files.pythonhosted.org/packages/44/06/e7175d06dd6e9172d4a69a72592cb3f7a996a9c396eee29082826449bbc3/MarkupSafe-3.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a", size = 15514 }, + { url = "https://files.pythonhosted.org/packages/6b/28/bbf83e3f76936960b850435576dd5e67034e200469571be53f69174a2dfd/MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d", size = 14353 }, + { url = "https://files.pythonhosted.org/packages/6c/30/316d194b093cde57d448a4c3209f22e3046c5bb2fb0820b118292b334be7/MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93", size = 12392 }, + { url = "https://files.pythonhosted.org/packages/f2/96/9cdafba8445d3a53cae530aaf83c38ec64c4d5427d975c974084af5bc5d2/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832", size = 23984 }, + { url = "https://files.pythonhosted.org/packages/f1/a4/aefb044a2cd8d7334c8a47d3fb2c9f328ac48cb349468cc31c20b539305f/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84", size = 23120 }, + { url = "https://files.pythonhosted.org/packages/8d/21/5e4851379f88f3fad1de30361db501300d4f07bcad047d3cb0449fc51f8c/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca", size = 23032 }, + { url = "https://files.pythonhosted.org/packages/00/7b/e92c64e079b2d0d7ddf69899c98842f3f9a60a1ae72657c89ce2655c999d/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798", size = 24057 }, + { url = "https://files.pythonhosted.org/packages/f9/ac/46f960ca323037caa0a10662ef97d0a4728e890334fc156b9f9e52bcc4ca/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e", size = 23359 }, + { url = "https://files.pythonhosted.org/packages/69/84/83439e16197337b8b14b6a5b9c2105fff81d42c2a7c5b58ac7b62ee2c3b1/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4", size = 23306 }, + { url = "https://files.pythonhosted.org/packages/9a/34/a15aa69f01e2181ed8d2b685c0d2f6655d5cca2c4db0ddea775e631918cd/MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d", size = 15094 }, + { url = "https://files.pythonhosted.org/packages/da/b8/3a3bd761922d416f3dc5d00bfbed11f66b1ab89a0c2b6e887240a30b0f6b/MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b", size = 15521 }, + { url = "https://files.pythonhosted.org/packages/22/09/d1f21434c97fc42f09d290cbb6350d44eb12f09cc62c9476effdb33a18aa/MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf", size = 14274 }, + { url = "https://files.pythonhosted.org/packages/6b/b0/18f76bba336fa5aecf79d45dcd6c806c280ec44538b3c13671d49099fdd0/MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225", size = 12348 }, + { url = "https://files.pythonhosted.org/packages/e0/25/dd5c0f6ac1311e9b40f4af06c78efde0f3b5cbf02502f8ef9501294c425b/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028", size = 24149 }, + { url = "https://files.pythonhosted.org/packages/f3/f0/89e7aadfb3749d0f52234a0c8c7867877876e0a20b60e2188e9850794c17/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8", size = 23118 }, + { url = "https://files.pythonhosted.org/packages/d5/da/f2eeb64c723f5e3777bc081da884b414671982008c47dcc1873d81f625b6/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c", size = 22993 }, + { url = "https://files.pythonhosted.org/packages/da/0e/1f32af846df486dce7c227fe0f2398dc7e2e51d4a370508281f3c1c5cddc/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557", size = 24178 }, + { url = "https://files.pythonhosted.org/packages/c4/f6/bb3ca0532de8086cbff5f06d137064c8410d10779c4c127e0e47d17c0b71/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22", size = 23319 }, + { url = "https://files.pythonhosted.org/packages/a2/82/8be4c96ffee03c5b4a034e60a31294daf481e12c7c43ab8e34a1453ee48b/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48", size = 23352 }, + { url = "https://files.pythonhosted.org/packages/51/ae/97827349d3fcffee7e184bdf7f41cd6b88d9919c80f0263ba7acd1bbcb18/MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30", size = 15097 }, + { url = "https://files.pythonhosted.org/packages/c1/80/a61f99dc3a936413c3ee4e1eecac96c0da5ed07ad56fd975f1a9da5bc630/MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87", size = 15601 }, +] + [[package]] name = "matplotlib" version = "3.8.4" @@ -763,13 +1449,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7d/ca/e7bd1876a341ed8c456095962a582696cac1691cb6e55bd5ead15a755c5d/matplotlib-3.8.4-cp312-cp312-win_amd64.whl", hash = "sha256:7a6769f58ce51791b4cb8b4d7642489df347697cd3e23d88266aaaee93b41d9a", size = 7659712 }, ] +[[package]] +name = "matplotlib-inline" +version = "0.1.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/99/5b/a36a337438a14116b16480db471ad061c36c3694df7c2084a0da7ba538b7/matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90", size = 8159 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899 }, +] + [[package]] name = "mdurl" version = "0.1.2" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, + { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, +] + +[[package]] +name = "mistune" +version = "3.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ef/c8/f0173fe3bf85fd891aee2e7bcd8207dfe26c2c683d727c5a6cc3aec7b628/mistune-3.0.2.tar.gz", hash = "sha256:fc7f93ded930c92394ef2cb6f04a8aabab4117a91449e72dcc8dfa646a508be8", size = 90840 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f0/74/c95adcdf032956d9ef6c89a9b8a5152bf73915f8c633f3e3d88d06bd699c/mistune-3.0.2-py3-none-any.whl", hash = "sha256:71481854c30fdbc938963d3605b72501f5c10a9320ecd412c121c163a1c7d205", size = 47958 }, ] [[package]] @@ -809,6 +1516,71 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ef/82/7a9d0550484a62c6da82858ee9419f3dd1ccc9aa1c26a1e43da3ecd20b0d/natsort-8.4.0-py3-none-any.whl", hash = "sha256:4732914fb471f56b5cce04d7bae6f164a592c7712e1c85f9ef585e197299521c", size = 38268 }, ] +[[package]] +name = "nbclient" +version = "0.10.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jupyter-client" }, + { name = "jupyter-core" }, + { name = "nbformat" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/db/25929926860ba8a3f6123d2d0a235e558e0e4be7b46e9db063a7dfefa0a2/nbclient-0.10.1.tar.gz", hash = "sha256:3e93e348ab27e712acd46fccd809139e356eb9a31aab641d1a7991a6eb4e6f68", size = 62273 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/1a/ed6d1299b1a00c1af4a033fdee565f533926d819e084caf0d2832f6f87c6/nbclient-0.10.1-py3-none-any.whl", hash = "sha256:949019b9240d66897e442888cfb618f69ef23dc71c01cb5fced8499c2cfc084d", size = 25344 }, +] + +[[package]] +name = "nbconvert" +version = "7.16.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beautifulsoup4" }, + { name = "bleach" }, + { name = "defusedxml" }, + { name = "jinja2" }, + { name = "jupyter-core" }, + { name = "jupyterlab-pygments" }, + { name = "markupsafe" }, + { name = "mistune" }, + { name = "nbclient" }, + { name = "nbformat" }, + { name = "packaging" }, + { name = "pandocfilters" }, + { name = "pygments" }, + { name = "tinycss2" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/af/e8/ba521a033b21132008e520c28ceb818f9f092da5f0261e94e509401b29f9/nbconvert-7.16.4.tar.gz", hash = "sha256:86ca91ba266b0a448dc96fa6c5b9d98affabde2867b363258703536807f9f7f4", size = 854422 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/bb/bb5b6a515d1584aa2fd89965b11db6632e4bdc69495a52374bcc36e56cfa/nbconvert-7.16.4-py3-none-any.whl", hash = "sha256:05873c620fe520b6322bf8a5ad562692343fe3452abda5765c7a34b7d1aa3eb3", size = 257388 }, +] + +[[package]] +name = "nbformat" +version = "5.10.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "fastjsonschema" }, + { name = "jsonschema" }, + { name = "jupyter-core" }, + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6d/fd/91545e604bc3dad7dca9ed03284086039b294c6b3d75c0d2fa45f9e9caf3/nbformat-5.10.4.tar.gz", hash = "sha256:322168b14f937a5d11362988ecac2a4952d3d8e3a2cbeb2319584631226d5b3a", size = 142749 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/82/0340caa499416c78e5d8f5f05947ae4bc3cba53c9f038ab6e9ed964e22f1/nbformat-5.10.4-py3-none-any.whl", hash = "sha256:3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b", size = 78454 }, +] + +[[package]] +name = "nest-asyncio" +version = "1.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/83/f8/51569ac65d696c8ecbee95938f89d4abf00f47d58d48f6fbabfe8f0baefe/nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe", size = 7418 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195 }, +] + [[package]] name = "networkx" version = "3.2.1" @@ -818,6 +1590,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d5/f0/8fbc882ca80cf077f1b246c0e3c3465f7f415439bdea6b899f6b19f61f70/networkx-3.2.1-py3-none-any.whl", hash = "sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2", size = 1647772 }, ] +[[package]] +name = "notebook-shim" +version = "0.2.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jupyter-server" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/54/d2/92fa3243712b9a3e8bafaf60aac366da1cada3639ca767ff4b5b3654ec28/notebook_shim-0.2.4.tar.gz", hash = "sha256:b4b2cfa1b65d98307ca24361f5b30fe785b53c3fd07b7a47e89acb5e6ac638cb", size = 13167 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/33/bd5b9137445ea4b680023eb0469b2bb969d61303dedb2aac6560ff3d14a1/notebook_shim-0.2.4-py3-none-any.whl", hash = "sha256:411a5be4e9dc882a074ccbcae671eda64cceb068767e9a3419096986560e1cef", size = 13307 }, +] + [[package]] name = "numba" version = "0.53.1" @@ -948,6 +1732,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/12/3e/9d0b72cf5a8ff660e5787a0797906e04942081f3ad4a95f860488affff2b/optlang-1.5.2-py2.py3-none-any.whl", hash = "sha256:14464cff638b58670c1a7f5896f19dd7b595a12c1d30a27c59074700833c1677", size = 147733 }, ] +[[package]] +name = "overrides" +version = "7.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/36/86/b585f53236dec60aba864e050778b25045f857e17f6e5ea0ae95fe80edd2/overrides-7.7.0.tar.gz", hash = "sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a", size = 22812 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/ab/fc8290c6a4c722e5514d80f62b2dc4c4df1a68a41d1364e625c35990fcf3/overrides-7.7.0-py3-none-any.whl", hash = "sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49", size = 17832 }, +] + [[package]] name = "packaging" version = "24.1" @@ -993,6 +1786,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248 }, ] +[[package]] +name = "pandocfilters" +version = "1.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/70/6f/3dd4940bbe001c06a65f88e36bad298bc7a0de5036115639926b0c5c0458/pandocfilters-1.5.1.tar.gz", hash = "sha256:002b4a555ee4ebc03f8b66307e287fa492e4a77b4ea14d3f934328297bb4939e", size = 8454 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/af/4fbc8cab944db5d21b7e2a5b8e9211a03a79852b1157e2c102fcc61ac440/pandocfilters-1.5.1-py2.py3-none-any.whl", hash = "sha256:93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc", size = 8663 }, +] + +[[package]] +name = "parso" +version = "0.8.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/94/68e2e17afaa9169cf6412ab0f28623903be73d1b32e208d9e8e541bb086d/parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d", size = 400609 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/ac/dac4a63f978e4dcb3c6d3a78c4d8e0192a113d288502a1216950c41b1027/parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18", size = 103650 }, +] + [[package]] name = "pathos" version = "0.3.3" @@ -1022,6 +1833,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/43/f3/1d311a09c34f14f5973bb0bb0dc3a6e007e1eda90b5492d082689936ca51/patsy-0.5.6-py2.py3-none-any.whl", hash = "sha256:19056886fd8fa71863fa32f0eb090267f21fb74be00f19f5c70b2e9d76c883c6", size = 233945 }, ] +[[package]] +name = "pexpect" +version = "4.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ptyprocess" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772 }, +] + [[package]] name = "pillow" version = "11.0.0" @@ -1070,6 +1893,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/3d/c32a51d848401bd94cabb8767a39621496491ee7cd5199856b77da9b18ad/pillow-11.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:224aaa38177597bb179f3ec87eeefcce8e4f85e608025e9cfac60de237ba6316", size = 2567508 }, ] +[[package]] +name = "platformdirs" +version = "4.3.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/13/fc/128cc9cb8f03208bdbf93d3aa862e16d376844a14f9a0ce5cf4507372de4/platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907", size = 21302 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/a6/bc1012356d8ece4d66dd75c4b9fc6c1f6650ddd5991e421177d9f8f671be/platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb", size = 18439 }, +] + [[package]] name = "plotly" version = "5.24.1" @@ -1110,73 +1942,151 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/b3/45a04dabc39d93ad4836d99625e7c5350257b48e9ae2c5b701f3d5da6960/ppft-1.7.6.9-py3-none-any.whl", hash = "sha256:dab36548db5ca3055067fbe6b1a17db5fee29f3c366c579a9a27cebb52ed96f0", size = 56792 }, ] +[[package]] +name = "prometheus-client" +version = "0.21.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/62/14/7d0f567991f3a9af8d1cd4f619040c93b68f09a02b6d0b6ab1b2d1ded5fe/prometheus_client-0.21.1.tar.gz", hash = "sha256:252505a722ac04b0456be05c05f75f45d760c2911ffc45f2a06bcaed9f3ae3fb", size = 78551 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ff/c2/ab7d37426c179ceb9aeb109a85cda8948bb269b7561a0be870cc656eefe4/prometheus_client-0.21.1-py3-none-any.whl", hash = "sha256:594b45c410d6f4f8888940fe80b5cc2521b305a1fafe1c58609ef715a001f301", size = 54682 }, +] + +[[package]] +name = "prompt-toolkit" +version = "3.0.36" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wcwidth" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fb/93/180be2342f89f16543ec4eb3f25083b5b84eba5378f68efff05409fb39a9/prompt_toolkit-3.0.36.tar.gz", hash = "sha256:3e163f254bef5a03b146397d7c1963bd3e2812f0964bb9a24e6ec761fd28db63", size = 423863 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/eb/37/791f1a6edd13c61cac85282368aa68cb0f3f164440fdf60032f2cc6ca34e/prompt_toolkit-3.0.36-py3-none-any.whl", hash = "sha256:aa64ad242a462c5ff0363a7b9cfe696c20d55d9fc60c11fd8e632d064804d305", size = 386414 }, +] + +[[package]] +name = "psutil" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/26/10/2a30b13c61e7cf937f4adf90710776b7918ed0a9c434e2c38224732af310/psutil-6.1.0.tar.gz", hash = "sha256:353815f59a7f64cdaca1c0307ee13558a0512f6db064e92fe833784f08539c7a", size = 508565 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/01/9e/8be43078a171381953cfee33c07c0d628594b5dbfc5157847b85022c2c1b/psutil-6.1.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6e2dcd475ce8b80522e51d923d10c7871e45f20918e027ab682f94f1c6351688", size = 247762 }, + { url = "https://files.pythonhosted.org/packages/1d/cb/313e80644ea407f04f6602a9e23096540d9dc1878755f3952ea8d3d104be/psutil-6.1.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:0895b8414afafc526712c498bd9de2b063deaac4021a3b3c34566283464aff8e", size = 248777 }, + { url = "https://files.pythonhosted.org/packages/65/8e/bcbe2025c587b5d703369b6a75b65d41d1367553da6e3f788aff91eaf5bd/psutil-6.1.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9dcbfce5d89f1d1f2546a2090f4fcf87c7f669d1d90aacb7d7582addece9fb38", size = 284259 }, + { url = "https://files.pythonhosted.org/packages/58/4d/8245e6f76a93c98aab285a43ea71ff1b171bcd90c9d238bf81f7021fb233/psutil-6.1.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:498c6979f9c6637ebc3a73b3f87f9eb1ec24e1ce53a7c5173b8508981614a90b", size = 287255 }, + { url = "https://files.pythonhosted.org/packages/27/c2/d034856ac47e3b3cdfa9720d0e113902e615f4190d5d1bdb8df4b2015fb2/psutil-6.1.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d905186d647b16755a800e7263d43df08b790d709d575105d419f8b6ef65423a", size = 288804 }, + { url = "https://files.pythonhosted.org/packages/ea/55/5389ed243c878725feffc0d6a3bc5ef6764312b6fc7c081faaa2cfa7ef37/psutil-6.1.0-cp37-abi3-win32.whl", hash = "sha256:1ad45a1f5d0b608253b11508f80940985d1d0c8f6111b5cb637533a0e6ddc13e", size = 250386 }, + { url = "https://files.pythonhosted.org/packages/11/91/87fa6f060e649b1e1a7b19a4f5869709fbf750b7c8c262ee776ec32f3028/psutil-6.1.0-cp37-abi3-win_amd64.whl", hash = "sha256:a8fb3752b491d246034fa4d279ff076501588ce8cbcdbb62c32fd7a377d996be", size = 254228 }, +] + +[[package]] +name = "ptyprocess" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/e5/16ff212c1e452235a90aeb09066144d0c5a6a8c0834397e03f5224495c4e/ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220", size = 70762 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", size = 13993 }, +] + +[[package]] +name = "pure-eval" +version = "0.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/05/0a34433a064256a578f1783a10da6df098ceaa4a57bbeaa96a6c0352786b/pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42", size = 19752 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842 }, +] + +[[package]] +name = "pycparser" +version = "2.22" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1d/b2/31537cf4b1ca988837256c910a668b553fceb8f069bedc4b1c826024b52c/pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6", size = 172736 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552 }, +] + [[package]] name = "pydantic" -version = "2.9.2" +version = "2.10.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-types" }, { name = "pydantic-core" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a9/b7/d9e3f12af310e1120c21603644a1cd86f59060e040ec5c3a80b8f05fae30/pydantic-2.9.2.tar.gz", hash = "sha256:d155cef71265d1e9807ed1c32b4c8deec042a44a50a4188b25ac67ecd81a9c0f", size = 769917 } +sdist = { url = "https://files.pythonhosted.org/packages/45/0f/27908242621b14e649a84e62b133de45f84c255eecb350ab02979844a788/pydantic-2.10.3.tar.gz", hash = "sha256:cb5ac360ce894ceacd69c403187900a02c4b20b693a9dd1d643e1effab9eadf9", size = 786486 } wheels = [ - { url = "https://files.pythonhosted.org/packages/df/e4/ba44652d562cbf0bf320e0f3810206149c8a4e99cdbf66da82e97ab53a15/pydantic-2.9.2-py3-none-any.whl", hash = "sha256:f048cec7b26778210e28a0459867920654d48e5e62db0958433636cde4254f12", size = 434928 }, + { url = "https://files.pythonhosted.org/packages/62/51/72c18c55cf2f46ff4f91ebcc8f75aa30f7305f3d726be3f4ebffb4ae972b/pydantic-2.10.3-py3-none-any.whl", hash = "sha256:be04d85bbc7b65651c5f8e6b9976ed9c6f41782a55524cef079a34a0bb82144d", size = 456997 }, ] [[package]] name = "pydantic-core" -version = "2.23.4" +version = "2.27.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e2/aa/6b6a9b9f8537b872f552ddd46dd3da230367754b6f707b8e1e963f515ea3/pydantic_core-2.23.4.tar.gz", hash = "sha256:2584f7cf844ac4d970fba483a717dbe10c1c1c96a969bf65d61ffe94df1b2863", size = 402156 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5c/8b/d3ae387f66277bd8104096d6ec0a145f4baa2966ebb2cad746c0920c9526/pydantic_core-2.23.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:b10bd51f823d891193d4717448fab065733958bdb6a6b351967bd349d48d5c9b", size = 1867835 }, - { url = "https://files.pythonhosted.org/packages/46/76/f68272e4c3a7df8777798282c5e47d508274917f29992d84e1898f8908c7/pydantic_core-2.23.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4fc714bdbfb534f94034efaa6eadd74e5b93c8fa6315565a222f7b6f42ca1166", size = 1776689 }, - { url = "https://files.pythonhosted.org/packages/cc/69/5f945b4416f42ea3f3bc9d2aaec66c76084a6ff4ff27555bf9415ab43189/pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63e46b3169866bd62849936de036f901a9356e36376079b05efa83caeaa02ceb", size = 1800748 }, - { url = "https://files.pythonhosted.org/packages/50/ab/891a7b0054bcc297fb02d44d05c50e68154e31788f2d9d41d0b72c89fdf7/pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed1a53de42fbe34853ba90513cea21673481cd81ed1be739f7f2efb931b24916", size = 1806469 }, - { url = "https://files.pythonhosted.org/packages/31/7c/6e3fa122075d78f277a8431c4c608f061881b76c2b7faca01d317ee39b5d/pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cfdd16ab5e59fc31b5e906d1a3f666571abc367598e3e02c83403acabc092e07", size = 2002246 }, - { url = "https://files.pythonhosted.org/packages/ad/6f/22d5692b7ab63fc4acbc74de6ff61d185804a83160adba5e6cc6068e1128/pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:255a8ef062cbf6674450e668482456abac99a5583bbafb73f9ad469540a3a232", size = 2659404 }, - { url = "https://files.pythonhosted.org/packages/11/ac/1e647dc1121c028b691028fa61a4e7477e6aeb5132628fde41dd34c1671f/pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a7cd62e831afe623fbb7aabbb4fe583212115b3ef38a9f6b71869ba644624a2", size = 2053940 }, - { url = "https://files.pythonhosted.org/packages/91/75/984740c17f12c3ce18b5a2fcc4bdceb785cce7df1511a4ce89bca17c7e2d/pydantic_core-2.23.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f09e2ff1f17c2b51f2bc76d1cc33da96298f0a036a137f5440ab3ec5360b624f", size = 1921437 }, - { url = "https://files.pythonhosted.org/packages/a0/74/13c5f606b64d93f0721e7768cd3e8b2102164866c207b8cd6f90bb15d24f/pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e38e63e6f3d1cec5a27e0afe90a085af8b6806ee208b33030e65b6516353f1a3", size = 1966129 }, - { url = "https://files.pythonhosted.org/packages/18/03/9c4aa5919457c7b57a016c1ab513b1a926ed9b2bb7915bf8e506bf65c34b/pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0dbd8dbed2085ed23b5c04afa29d8fd2771674223135dc9bc937f3c09284d071", size = 2110908 }, - { url = "https://files.pythonhosted.org/packages/92/2c/053d33f029c5dc65e5cf44ff03ceeefb7cce908f8f3cca9265e7f9b540c8/pydantic_core-2.23.4-cp310-none-win32.whl", hash = "sha256:6531b7ca5f951d663c339002e91aaebda765ec7d61b7d1e3991051906ddde119", size = 1735278 }, - { url = "https://files.pythonhosted.org/packages/de/81/7dfe464eca78d76d31dd661b04b5f2036ec72ea8848dd87ab7375e185c23/pydantic_core-2.23.4-cp310-none-win_amd64.whl", hash = "sha256:7c9129eb40958b3d4500fa2467e6a83356b3b61bfff1b414c7361d9220f9ae8f", size = 1917453 }, - { url = "https://files.pythonhosted.org/packages/5d/30/890a583cd3f2be27ecf32b479d5d615710bb926d92da03e3f7838ff3e58b/pydantic_core-2.23.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:77733e3892bb0a7fa797826361ce8a9184d25c8dffaec60b7ffe928153680ba8", size = 1865160 }, - { url = "https://files.pythonhosted.org/packages/1d/9a/b634442e1253bc6889c87afe8bb59447f106ee042140bd57680b3b113ec7/pydantic_core-2.23.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b84d168f6c48fabd1f2027a3d1bdfe62f92cade1fb273a5d68e621da0e44e6d", size = 1776777 }, - { url = "https://files.pythonhosted.org/packages/75/9a/7816295124a6b08c24c96f9ce73085032d8bcbaf7e5a781cd41aa910c891/pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df49e7a0861a8c36d089c1ed57d308623d60416dab2647a4a17fe050ba85de0e", size = 1799244 }, - { url = "https://files.pythonhosted.org/packages/a9/8f/89c1405176903e567c5f99ec53387449e62f1121894aa9fc2c4fdc51a59b/pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ff02b6d461a6de369f07ec15e465a88895f3223eb75073ffea56b84d9331f607", size = 1805307 }, - { url = "https://files.pythonhosted.org/packages/d5/a5/1a194447d0da1ef492e3470680c66048fef56fc1f1a25cafbea4bc1d1c48/pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:996a38a83508c54c78a5f41456b0103c30508fed9abcad0a59b876d7398f25fd", size = 2000663 }, - { url = "https://files.pythonhosted.org/packages/13/a5/1df8541651de4455e7d587cf556201b4f7997191e110bca3b589218745a5/pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d97683ddee4723ae8c95d1eddac7c192e8c552da0c73a925a89fa8649bf13eea", size = 2655941 }, - { url = "https://files.pythonhosted.org/packages/44/31/a3899b5ce02c4316865e390107f145089876dff7e1dfc770a231d836aed8/pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:216f9b2d7713eb98cb83c80b9c794de1f6b7e3145eef40400c62e86cee5f4e1e", size = 2052105 }, - { url = "https://files.pythonhosted.org/packages/1b/aa/98e190f8745d5ec831f6d5449344c48c0627ac5fed4e5340a44b74878f8e/pydantic_core-2.23.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f783e0ec4803c787bcea93e13e9932edab72068f68ecffdf86a99fd5918878b", size = 1919967 }, - { url = "https://files.pythonhosted.org/packages/ae/35/b6e00b6abb2acfee3e8f85558c02a0822e9a8b2f2d812ea8b9079b118ba0/pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d0776dea117cf5272382634bd2a5c1b6eb16767c223c6a5317cd3e2a757c61a0", size = 1964291 }, - { url = "https://files.pythonhosted.org/packages/13/46/7bee6d32b69191cd649bbbd2361af79c472d72cb29bb2024f0b6e350ba06/pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d5f7a395a8cf1621939692dba2a6b6a830efa6b3cee787d82c7de1ad2930de64", size = 2109666 }, - { url = "https://files.pythonhosted.org/packages/39/ef/7b34f1b122a81b68ed0a7d0e564da9ccdc9a2924c8d6c6b5b11fa3a56970/pydantic_core-2.23.4-cp311-none-win32.whl", hash = "sha256:74b9127ffea03643e998e0c5ad9bd3811d3dac8c676e47db17b0ee7c3c3bf35f", size = 1732940 }, - { url = "https://files.pythonhosted.org/packages/2f/76/37b7e76c645843ff46c1d73e046207311ef298d3f7b2f7d8f6ac60113071/pydantic_core-2.23.4-cp311-none-win_amd64.whl", hash = "sha256:98d134c954828488b153d88ba1f34e14259284f256180ce659e8d83e9c05eaa3", size = 1916804 }, - { url = "https://files.pythonhosted.org/packages/74/7b/8e315f80666194b354966ec84b7d567da77ad927ed6323db4006cf915f3f/pydantic_core-2.23.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f3e0da4ebaef65158d4dfd7d3678aad692f7666877df0002b8a522cdf088f231", size = 1856459 }, - { url = "https://files.pythonhosted.org/packages/14/de/866bdce10ed808323d437612aca1ec9971b981e1c52e5e42ad9b8e17a6f6/pydantic_core-2.23.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f69a8e0b033b747bb3e36a44e7732f0c99f7edd5cea723d45bc0d6e95377ffee", size = 1770007 }, - { url = "https://files.pythonhosted.org/packages/dc/69/8edd5c3cd48bb833a3f7ef9b81d7666ccddd3c9a635225214e044b6e8281/pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:723314c1d51722ab28bfcd5240d858512ffd3116449c557a1336cbe3919beb87", size = 1790245 }, - { url = "https://files.pythonhosted.org/packages/80/33/9c24334e3af796ce80d2274940aae38dd4e5676298b4398eff103a79e02d/pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb2802e667b7051a1bebbfe93684841cc9351004e2badbd6411bf357ab8d5ac8", size = 1801260 }, - { url = "https://files.pythonhosted.org/packages/a5/6f/e9567fd90104b79b101ca9d120219644d3314962caa7948dd8b965e9f83e/pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d18ca8148bebe1b0a382a27a8ee60350091a6ddaf475fa05ef50dc35b5df6327", size = 1996872 }, - { url = "https://files.pythonhosted.org/packages/2d/ad/b5f0fe9e6cfee915dd144edbd10b6e9c9c9c9d7a56b69256d124b8ac682e/pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33e3d65a85a2a4a0dc3b092b938a4062b1a05f3a9abde65ea93b233bca0e03f2", size = 2661617 }, - { url = "https://files.pythonhosted.org/packages/06/c8/7d4b708f8d05a5cbfda3243aad468052c6e99de7d0937c9146c24d9f12e9/pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:128585782e5bfa515c590ccee4b727fb76925dd04a98864182b22e89a4e6ed36", size = 2071831 }, - { url = "https://files.pythonhosted.org/packages/89/4d/3079d00c47f22c9a9a8220db088b309ad6e600a73d7a69473e3a8e5e3ea3/pydantic_core-2.23.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:68665f4c17edcceecc112dfed5dbe6f92261fb9d6054b47d01bf6371a6196126", size = 1917453 }, - { url = "https://files.pythonhosted.org/packages/e9/88/9df5b7ce880a4703fcc2d76c8c2d8eb9f861f79d0c56f4b8f5f2607ccec8/pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:20152074317d9bed6b7a95ade3b7d6054845d70584216160860425f4fbd5ee9e", size = 1968793 }, - { url = "https://files.pythonhosted.org/packages/e3/b9/41f7efe80f6ce2ed3ee3c2dcfe10ab7adc1172f778cc9659509a79518c43/pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9261d3ce84fa1d38ed649c3638feefeae23d32ba9182963e465d58d62203bd24", size = 2116872 }, - { url = "https://files.pythonhosted.org/packages/63/08/b59b7a92e03dd25554b0436554bf23e7c29abae7cce4b1c459cd92746811/pydantic_core-2.23.4-cp312-none-win32.whl", hash = "sha256:4ba762ed58e8d68657fc1281e9bb72e1c3e79cc5d464be146e260c541ec12d84", size = 1738535 }, - { url = "https://files.pythonhosted.org/packages/88/8d/479293e4d39ab409747926eec4329de5b7129beaedc3786eca070605d07f/pydantic_core-2.23.4-cp312-none-win_amd64.whl", hash = "sha256:97df63000f4fea395b2824da80e169731088656d1818a11b95f3b173747b6cd9", size = 1917992 }, - { url = "https://files.pythonhosted.org/packages/13/a9/5d582eb3204464284611f636b55c0a7410d748ff338756323cb1ce721b96/pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f455ee30a9d61d3e1a15abd5068827773d6e4dc513e795f380cdd59932c782d5", size = 1857135 }, - { url = "https://files.pythonhosted.org/packages/2c/57/faf36290933fe16717f97829eabfb1868182ac495f99cf0eda9f59687c9d/pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1e90d2e3bd2c3863d48525d297cd143fe541be8bbf6f579504b9712cb6b643ec", size = 1740583 }, - { url = "https://files.pythonhosted.org/packages/91/7c/d99e3513dc191c4fec363aef1bf4c8af9125d8fa53af7cb97e8babef4e40/pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e203fdf807ac7e12ab59ca2bfcabb38c7cf0b33c41efeb00f8e5da1d86af480", size = 1793637 }, - { url = "https://files.pythonhosted.org/packages/29/18/812222b6d18c2d13eebbb0f7cdc170a408d9ced65794fdb86147c77e1982/pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e08277a400de01bc72436a0ccd02bdf596631411f592ad985dcee21445bd0068", size = 1941963 }, - { url = "https://files.pythonhosted.org/packages/0f/36/c1f3642ac3f05e6bb4aec3ffc399fa3f84895d259cf5f0ce3054b7735c29/pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f220b0eea5965dec25480b6333c788fb72ce5f9129e8759ef876a1d805d00801", size = 1915332 }, - { url = "https://files.pythonhosted.org/packages/f7/ca/9c0854829311fb446020ebb540ee22509731abad886d2859c855dd29b904/pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d06b0c8da4f16d1d1e352134427cb194a0a6e19ad5db9161bf32b2113409e728", size = 1957926 }, - { url = "https://files.pythonhosted.org/packages/c0/1c/7836b67c42d0cd4441fcd9fafbf6a027ad4b79b6559f80cf11f89fd83648/pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ba1a0996f6c2773bd83e63f18914c1de3c9dd26d55f4ac302a7efe93fb8e7433", size = 2100342 }, - { url = "https://files.pythonhosted.org/packages/a9/f9/b6bcaf874f410564a78908739c80861a171788ef4d4f76f5009656672dfe/pydantic_core-2.23.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:9a5bce9d23aac8f0cf0836ecfc033896aa8443b501c58d0602dbfd5bd5b37753", size = 1920344 }, +sdist = { url = "https://files.pythonhosted.org/packages/a6/9f/7de1f19b6aea45aeb441838782d68352e71bfa98ee6fa048d5041991b33e/pydantic_core-2.27.1.tar.gz", hash = "sha256:62a763352879b84aa31058fc931884055fd75089cccbd9d58bb6afd01141b235", size = 412785 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/ce/60fd96895c09738648c83f3f00f595c807cb6735c70d3306b548cc96dd49/pydantic_core-2.27.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:71a5e35c75c021aaf400ac048dacc855f000bdfed91614b4a726f7432f1f3d6a", size = 1897984 }, + { url = "https://files.pythonhosted.org/packages/fd/b9/84623d6b6be98cc209b06687d9bca5a7b966ffed008d15225dd0d20cce2e/pydantic_core-2.27.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f82d068a2d6ecfc6e054726080af69a6764a10015467d7d7b9f66d6ed5afa23b", size = 1807491 }, + { url = "https://files.pythonhosted.org/packages/01/72/59a70165eabbc93b1111d42df9ca016a4aa109409db04304829377947028/pydantic_core-2.27.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:121ceb0e822f79163dd4699e4c54f5ad38b157084d97b34de8b232bcaad70278", size = 1831953 }, + { url = "https://files.pythonhosted.org/packages/7c/0c/24841136476adafd26f94b45bb718a78cb0500bd7b4f8d667b67c29d7b0d/pydantic_core-2.27.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4603137322c18eaf2e06a4495f426aa8d8388940f3c457e7548145011bb68e05", size = 1856071 }, + { url = "https://files.pythonhosted.org/packages/53/5e/c32957a09cceb2af10d7642df45d1e3dbd8596061f700eac93b801de53c0/pydantic_core-2.27.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a33cd6ad9017bbeaa9ed78a2e0752c5e250eafb9534f308e7a5f7849b0b1bfb4", size = 2038439 }, + { url = "https://files.pythonhosted.org/packages/e4/8f/979ab3eccd118b638cd6d8f980fea8794f45018255a36044dea40fe579d4/pydantic_core-2.27.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:15cc53a3179ba0fcefe1e3ae50beb2784dede4003ad2dfd24f81bba4b23a454f", size = 2787416 }, + { url = "https://files.pythonhosted.org/packages/02/1d/00f2e4626565b3b6d3690dab4d4fe1a26edd6a20e53749eb21ca892ef2df/pydantic_core-2.27.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45d9c5eb9273aa50999ad6adc6be5e0ecea7e09dbd0d31bd0c65a55a2592ca08", size = 2134548 }, + { url = "https://files.pythonhosted.org/packages/9d/46/3112621204128b90898adc2e721a3cd6cf5626504178d6f32c33b5a43b79/pydantic_core-2.27.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8bf7b66ce12a2ac52d16f776b31d16d91033150266eb796967a7e4621707e4f6", size = 1989882 }, + { url = "https://files.pythonhosted.org/packages/49/ec/557dd4ff5287ffffdf16a31d08d723de6762bb1b691879dc4423392309bc/pydantic_core-2.27.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:655d7dd86f26cb15ce8a431036f66ce0318648f8853d709b4167786ec2fa4807", size = 1995829 }, + { url = "https://files.pythonhosted.org/packages/6e/b2/610dbeb74d8d43921a7234555e4c091cb050a2bdb8cfea86d07791ce01c5/pydantic_core-2.27.1-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:5556470f1a2157031e676f776c2bc20acd34c1990ca5f7e56f1ebf938b9ab57c", size = 2091257 }, + { url = "https://files.pythonhosted.org/packages/8c/7f/4bf8e9d26a9118521c80b229291fa9558a07cdd9a968ec2d5c1026f14fbc/pydantic_core-2.27.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f69ed81ab24d5a3bd93861c8c4436f54afdf8e8cc421562b0c7504cf3be58206", size = 2143894 }, + { url = "https://files.pythonhosted.org/packages/1f/1c/875ac7139c958f4390f23656fe696d1acc8edf45fb81e4831960f12cd6e4/pydantic_core-2.27.1-cp310-none-win32.whl", hash = "sha256:f5a823165e6d04ccea61a9f0576f345f8ce40ed533013580e087bd4d7442b52c", size = 1816081 }, + { url = "https://files.pythonhosted.org/packages/d7/41/55a117acaeda25ceae51030b518032934f251b1dac3704a53781383e3491/pydantic_core-2.27.1-cp310-none-win_amd64.whl", hash = "sha256:57866a76e0b3823e0b56692d1a0bf722bffb324839bb5b7226a7dbd6c9a40b17", size = 1981109 }, + { url = "https://files.pythonhosted.org/packages/27/39/46fe47f2ad4746b478ba89c561cafe4428e02b3573df882334bd2964f9cb/pydantic_core-2.27.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:ac3b20653bdbe160febbea8aa6c079d3df19310d50ac314911ed8cc4eb7f8cb8", size = 1895553 }, + { url = "https://files.pythonhosted.org/packages/1c/00/0804e84a78b7fdb394fff4c4f429815a10e5e0993e6ae0e0b27dd20379ee/pydantic_core-2.27.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a5a8e19d7c707c4cadb8c18f5f60c843052ae83c20fa7d44f41594c644a1d330", size = 1807220 }, + { url = "https://files.pythonhosted.org/packages/01/de/df51b3bac9820d38371f5a261020f505025df732ce566c2a2e7970b84c8c/pydantic_core-2.27.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f7059ca8d64fea7f238994c97d91f75965216bcbe5f695bb44f354893f11d52", size = 1829727 }, + { url = "https://files.pythonhosted.org/packages/5f/d9/c01d19da8f9e9fbdb2bf99f8358d145a312590374d0dc9dd8dbe484a9cde/pydantic_core-2.27.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bed0f8a0eeea9fb72937ba118f9db0cb7e90773462af7962d382445f3005e5a4", size = 1854282 }, + { url = "https://files.pythonhosted.org/packages/5f/84/7db66eb12a0dc88c006abd6f3cbbf4232d26adfd827a28638c540d8f871d/pydantic_core-2.27.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a3cb37038123447cf0f3ea4c74751f6a9d7afef0eb71aa07bf5f652b5e6a132c", size = 2037437 }, + { url = "https://files.pythonhosted.org/packages/34/ac/a2537958db8299fbabed81167d58cc1506049dba4163433524e06a7d9f4c/pydantic_core-2.27.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:84286494f6c5d05243456e04223d5a9417d7f443c3b76065e75001beb26f88de", size = 2780899 }, + { url = "https://files.pythonhosted.org/packages/4a/c1/3e38cd777ef832c4fdce11d204592e135ddeedb6c6f525478a53d1c7d3e5/pydantic_core-2.27.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:acc07b2cfc5b835444b44a9956846b578d27beeacd4b52e45489e93276241025", size = 2135022 }, + { url = "https://files.pythonhosted.org/packages/7a/69/b9952829f80fd555fe04340539d90e000a146f2a003d3fcd1e7077c06c71/pydantic_core-2.27.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4fefee876e07a6e9aad7a8c8c9f85b0cdbe7df52b8a9552307b09050f7512c7e", size = 1987969 }, + { url = "https://files.pythonhosted.org/packages/05/72/257b5824d7988af43460c4e22b63932ed651fe98804cc2793068de7ec554/pydantic_core-2.27.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:258c57abf1188926c774a4c94dd29237e77eda19462e5bb901d88adcab6af919", size = 1994625 }, + { url = "https://files.pythonhosted.org/packages/73/c3/78ed6b7f3278a36589bcdd01243189ade7fc9b26852844938b4d7693895b/pydantic_core-2.27.1-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:35c14ac45fcfdf7167ca76cc80b2001205a8d5d16d80524e13508371fb8cdd9c", size = 2090089 }, + { url = "https://files.pythonhosted.org/packages/8d/c8/b4139b2f78579960353c4cd987e035108c93a78371bb19ba0dc1ac3b3220/pydantic_core-2.27.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d1b26e1dff225c31897696cab7d4f0a315d4c0d9e8666dbffdb28216f3b17fdc", size = 2142496 }, + { url = "https://files.pythonhosted.org/packages/3e/f8/171a03e97eb36c0b51981efe0f78460554a1d8311773d3d30e20c005164e/pydantic_core-2.27.1-cp311-none-win32.whl", hash = "sha256:2cdf7d86886bc6982354862204ae3b2f7f96f21a3eb0ba5ca0ac42c7b38598b9", size = 1811758 }, + { url = "https://files.pythonhosted.org/packages/6a/fe/4e0e63c418c1c76e33974a05266e5633e879d4061f9533b1706a86f77d5b/pydantic_core-2.27.1-cp311-none-win_amd64.whl", hash = "sha256:3af385b0cee8df3746c3f406f38bcbfdc9041b5c2d5ce3e5fc6637256e60bbc5", size = 1980864 }, + { url = "https://files.pythonhosted.org/packages/50/fc/93f7238a514c155a8ec02fc7ac6376177d449848115e4519b853820436c5/pydantic_core-2.27.1-cp311-none-win_arm64.whl", hash = "sha256:81f2ec23ddc1b476ff96563f2e8d723830b06dceae348ce02914a37cb4e74b89", size = 1864327 }, + { url = "https://files.pythonhosted.org/packages/be/51/2e9b3788feb2aebff2aa9dfbf060ec739b38c05c46847601134cc1fed2ea/pydantic_core-2.27.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9cbd94fc661d2bab2bc702cddd2d3370bbdcc4cd0f8f57488a81bcce90c7a54f", size = 1895239 }, + { url = "https://files.pythonhosted.org/packages/7b/9e/f8063952e4a7d0127f5d1181addef9377505dcce3be224263b25c4f0bfd9/pydantic_core-2.27.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5f8c4718cd44ec1580e180cb739713ecda2bdee1341084c1467802a417fe0f02", size = 1805070 }, + { url = "https://files.pythonhosted.org/packages/2c/9d/e1d6c4561d262b52e41b17a7ef8301e2ba80b61e32e94520271029feb5d8/pydantic_core-2.27.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15aae984e46de8d376df515f00450d1522077254ef6b7ce189b38ecee7c9677c", size = 1828096 }, + { url = "https://files.pythonhosted.org/packages/be/65/80ff46de4266560baa4332ae3181fffc4488ea7d37282da1a62d10ab89a4/pydantic_core-2.27.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1ba5e3963344ff25fc8c40da90f44b0afca8cfd89d12964feb79ac1411a260ac", size = 1857708 }, + { url = "https://files.pythonhosted.org/packages/d5/ca/3370074ad758b04d9562b12ecdb088597f4d9d13893a48a583fb47682cdf/pydantic_core-2.27.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:992cea5f4f3b29d6b4f7f1726ed8ee46c8331c6b4eed6db5b40134c6fe1768bb", size = 2037751 }, + { url = "https://files.pythonhosted.org/packages/b1/e2/4ab72d93367194317b99d051947c071aef6e3eb95f7553eaa4208ecf9ba4/pydantic_core-2.27.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0325336f348dbee6550d129b1627cb8f5351a9dc91aad141ffb96d4937bd9529", size = 2733863 }, + { url = "https://files.pythonhosted.org/packages/8a/c6/8ae0831bf77f356bb73127ce5a95fe115b10f820ea480abbd72d3cc7ccf3/pydantic_core-2.27.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7597c07fbd11515f654d6ece3d0e4e5093edc30a436c63142d9a4b8e22f19c35", size = 2161161 }, + { url = "https://files.pythonhosted.org/packages/f1/f4/b2fe73241da2429400fc27ddeaa43e35562f96cf5b67499b2de52b528cad/pydantic_core-2.27.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3bbd5d8cc692616d5ef6fbbbd50dbec142c7e6ad9beb66b78a96e9c16729b089", size = 1993294 }, + { url = "https://files.pythonhosted.org/packages/77/29/4bb008823a7f4cc05828198153f9753b3bd4c104d93b8e0b1bfe4e187540/pydantic_core-2.27.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:dc61505e73298a84a2f317255fcc72b710b72980f3a1f670447a21efc88f8381", size = 2001468 }, + { url = "https://files.pythonhosted.org/packages/f2/a9/0eaceeba41b9fad851a4107e0cf999a34ae8f0d0d1f829e2574f3d8897b0/pydantic_core-2.27.1-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:e1f735dc43da318cad19b4173dd1ffce1d84aafd6c9b782b3abc04a0d5a6f5bb", size = 2091413 }, + { url = "https://files.pythonhosted.org/packages/d8/36/eb8697729725bc610fd73940f0d860d791dc2ad557faaefcbb3edbd2b349/pydantic_core-2.27.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f4e5658dbffe8843a0f12366a4c2d1c316dbe09bb4dfbdc9d2d9cd6031de8aae", size = 2154735 }, + { url = "https://files.pythonhosted.org/packages/52/e5/4f0fbd5c5995cc70d3afed1b5c754055bb67908f55b5cb8000f7112749bf/pydantic_core-2.27.1-cp312-none-win32.whl", hash = "sha256:672ebbe820bb37988c4d136eca2652ee114992d5d41c7e4858cdd90ea94ffe5c", size = 1833633 }, + { url = "https://files.pythonhosted.org/packages/ee/f2/c61486eee27cae5ac781305658779b4a6b45f9cc9d02c90cb21b940e82cc/pydantic_core-2.27.1-cp312-none-win_amd64.whl", hash = "sha256:66ff044fd0bb1768688aecbe28b6190f6e799349221fb0de0e6f4048eca14c16", size = 1986973 }, + { url = "https://files.pythonhosted.org/packages/df/a6/e3f12ff25f250b02f7c51be89a294689d175ac76e1096c32bf278f29ca1e/pydantic_core-2.27.1-cp312-none-win_arm64.whl", hash = "sha256:9a3b0793b1bbfd4146304e23d90045f2a9b5fd5823aa682665fbdaf2a6c28f3e", size = 1883215 }, + { url = "https://files.pythonhosted.org/packages/7c/60/e5eb2d462595ba1f622edbe7b1d19531e510c05c405f0b87c80c1e89d5b1/pydantic_core-2.27.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3fa80ac2bd5856580e242dbc202db873c60a01b20309c8319b5c5986fbe53ce6", size = 1894016 }, + { url = "https://files.pythonhosted.org/packages/61/20/da7059855225038c1c4326a840908cc7ca72c7198cb6addb8b92ec81c1d6/pydantic_core-2.27.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d950caa237bb1954f1b8c9227b5065ba6875ac9771bb8ec790d956a699b78676", size = 1771648 }, + { url = "https://files.pythonhosted.org/packages/8f/fc/5485cf0b0bb38da31d1d292160a4d123b5977841ddc1122c671a30b76cfd/pydantic_core-2.27.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e4216e64d203e39c62df627aa882f02a2438d18a5f21d7f721621f7a5d3611d", size = 1826929 }, + { url = "https://files.pythonhosted.org/packages/a1/ff/fb1284a210e13a5f34c639efc54d51da136074ffbe25ec0c279cf9fbb1c4/pydantic_core-2.27.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02a3d637bd387c41d46b002f0e49c52642281edacd2740e5a42f7017feea3f2c", size = 1980591 }, + { url = "https://files.pythonhosted.org/packages/f1/14/77c1887a182d05af74f6aeac7b740da3a74155d3093ccc7ee10b900cc6b5/pydantic_core-2.27.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:161c27ccce13b6b0c8689418da3885d3220ed2eae2ea5e9b2f7f3d48f1d52c27", size = 1981326 }, + { url = "https://files.pythonhosted.org/packages/06/aa/6f1b2747f811a9c66b5ef39d7f02fbb200479784c75e98290d70004b1253/pydantic_core-2.27.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:19910754e4cc9c63bc1c7f6d73aa1cfee82f42007e407c0f413695c2f7ed777f", size = 1989205 }, + { url = "https://files.pythonhosted.org/packages/7a/d2/8ce2b074d6835f3c88d85f6d8a399790043e9fdb3d0e43455e72d19df8cc/pydantic_core-2.27.1-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:e173486019cc283dc9778315fa29a363579372fe67045e971e89b6365cc035ed", size = 2079616 }, + { url = "https://files.pythonhosted.org/packages/65/71/af01033d4e58484c3db1e5d13e751ba5e3d6b87cc3368533df4c50932c8b/pydantic_core-2.27.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:af52d26579b308921b73b956153066481f064875140ccd1dfd4e77db89dbb12f", size = 2133265 }, + { url = "https://files.pythonhosted.org/packages/33/72/f881b5e18fbb67cf2fb4ab253660de3c6899dbb2dba409d0b757e3559e3d/pydantic_core-2.27.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:981fb88516bd1ae8b0cbbd2034678a39dedc98752f264ac9bc5839d3923fa04c", size = 2001864 }, +] + +[[package]] +name = "pydantic-settings" +version = "2.6.1" +source = { git = "https://github.com/pydantic/pydantic-settings#7bcb6edba2f683c83419283f977a7d16ec4c3b64" } +dependencies = [ + { name = "pydantic" }, + { name = "python-dotenv" }, ] [[package]] @@ -1270,6 +2180,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 }, ] +[[package]] +name = "python-dotenv" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bc/57/e84d88dfe0aec03b7a2d4327012c1627ab5f03652216c63d49846d7a6c58/python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca", size = 39115 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6a/3e/b68c118422ec867fa7ab88444e1274aa40681c606d59ac27de5a5588f082/python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a", size = 19863 }, +] + +[[package]] +name = "python-json-logger" +version = "2.0.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4f/da/95963cebfc578dabd323d7263958dfb68898617912bb09327dd30e9c8d13/python-json-logger-2.0.7.tar.gz", hash = "sha256:23e7ec02d34237c5aa1e29a070193a4ea87583bb4e7f8fd06d3de8264c4b2e1c", size = 10508 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/35/a6/145655273568ee78a581e734cf35beb9e33a370b29c5d3c8fee3744de29f/python_json_logger-2.0.7-py3-none-any.whl", hash = "sha256:f380b826a991ebbe3de4d897aeec42760035ac760345e57b812938dc8b35e2bd", size = 8067 }, +] + [[package]] name = "python-libsbml" version = "5.20.4" @@ -1315,6 +2243,181 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/11/c3/005fcca25ce078d2cc29fd559379817424e94885510568bc1bc53d7d5846/pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725", size = 508002 }, ] +[[package]] +name = "pywin32" +version = "308" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/a6/3e9f2c474895c1bb61b11fa9640be00067b5c5b363c501ee9c3fa53aec01/pywin32-308-cp310-cp310-win32.whl", hash = "sha256:796ff4426437896550d2981b9c2ac0ffd75238ad9ea2d3bfa67a1abd546d262e", size = 5927028 }, + { url = "https://files.pythonhosted.org/packages/d9/b4/84e2463422f869b4b718f79eb7530a4c1693e96b8a4e5e968de38be4d2ba/pywin32-308-cp310-cp310-win_amd64.whl", hash = "sha256:4fc888c59b3c0bef905ce7eb7e2106a07712015ea1c8234b703a088d46110e8e", size = 6558484 }, + { url = "https://files.pythonhosted.org/packages/9f/8f/fb84ab789713f7c6feacaa08dad3ec8105b88ade8d1c4f0f0dfcaaa017d6/pywin32-308-cp310-cp310-win_arm64.whl", hash = "sha256:a5ab5381813b40f264fa3495b98af850098f814a25a63589a8e9eb12560f450c", size = 7971454 }, + { url = "https://files.pythonhosted.org/packages/eb/e2/02652007469263fe1466e98439831d65d4ca80ea1a2df29abecedf7e47b7/pywin32-308-cp311-cp311-win32.whl", hash = "sha256:5d8c8015b24a7d6855b1550d8e660d8daa09983c80e5daf89a273e5c6fb5095a", size = 5928156 }, + { url = "https://files.pythonhosted.org/packages/48/ef/f4fb45e2196bc7ffe09cad0542d9aff66b0e33f6c0954b43e49c33cad7bd/pywin32-308-cp311-cp311-win_amd64.whl", hash = "sha256:575621b90f0dc2695fec346b2d6302faebd4f0f45c05ea29404cefe35d89442b", size = 6559559 }, + { url = "https://files.pythonhosted.org/packages/79/ef/68bb6aa865c5c9b11a35771329e95917b5559845bd75b65549407f9fc6b4/pywin32-308-cp311-cp311-win_arm64.whl", hash = "sha256:100a5442b7332070983c4cd03f2e906a5648a5104b8a7f50175f7906efd16bb6", size = 7972495 }, + { url = "https://files.pythonhosted.org/packages/00/7c/d00d6bdd96de4344e06c4afbf218bc86b54436a94c01c71a8701f613aa56/pywin32-308-cp312-cp312-win32.whl", hash = "sha256:587f3e19696f4bf96fde9d8a57cec74a57021ad5f204c9e627e15c33ff568897", size = 5939729 }, + { url = "https://files.pythonhosted.org/packages/21/27/0c8811fbc3ca188f93b5354e7c286eb91f80a53afa4e11007ef661afa746/pywin32-308-cp312-cp312-win_amd64.whl", hash = "sha256:00b3e11ef09ede56c6a43c71f2d31857cf7c54b0ab6e78ac659497abd2834f47", size = 6543015 }, + { url = "https://files.pythonhosted.org/packages/9d/0f/d40f8373608caed2255781a3ad9a51d03a594a1248cd632d6a298daca693/pywin32-308-cp312-cp312-win_arm64.whl", hash = "sha256:9b4de86c8d909aed15b7011182c8cab38c8850de36e6afb1f0db22b8959e3091", size = 7976033 }, +] + +[[package]] +name = "pywinpty" +version = "2.0.14" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/82/90f8750423cba4b9b6c842df227609fb60704482d7abf6dd47e2babc055a/pywinpty-2.0.14.tar.gz", hash = "sha256:18bd9529e4a5daf2d9719aa17788ba6013e594ae94c5a0c27e83df3278b0660e", size = 27769 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/09/56376af256eab8cc5f8982a3b138d387136eca27fa1a8a68660e8ed59e4b/pywinpty-2.0.14-cp310-none-win_amd64.whl", hash = "sha256:0b149c2918c7974f575ba79f5a4aad58bd859a52fa9eb1296cc22aa412aa411f", size = 1397115 }, + { url = "https://files.pythonhosted.org/packages/be/e2/af1a99c0432e4e58c9ac8e334ee191790ec9793d33559189b9d2069bdc1d/pywinpty-2.0.14-cp311-none-win_amd64.whl", hash = "sha256:cf2a43ac7065b3e0dc8510f8c1f13a75fb8fde805efa3b8cff7599a1ef497bc7", size = 1397223 }, + { url = "https://files.pythonhosted.org/packages/ad/79/759ae767a3b78d340446efd54dd1fe4f7dafa4fc7be96ed757e44bcdba54/pywinpty-2.0.14-cp312-none-win_amd64.whl", hash = "sha256:55dad362ef3e9408ade68fd173e4f9032b3ce08f68cfe7eacb2c263ea1179737", size = 1397207 }, +] + +[[package]] +name = "pyyaml" +version = "6.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/95/a3fac87cb7158e231b5a6012e438c647e1a87f09f8e0d123acec8ab8bf71/PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086", size = 184199 }, + { url = "https://files.pythonhosted.org/packages/c7/7a/68bd47624dab8fd4afbfd3c48e3b79efe09098ae941de5b58abcbadff5cb/PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf", size = 171758 }, + { url = "https://files.pythonhosted.org/packages/49/ee/14c54df452143b9ee9f0f29074d7ca5516a36edb0b4cc40c3f280131656f/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237", size = 718463 }, + { url = "https://files.pythonhosted.org/packages/4d/61/de363a97476e766574650d742205be468921a7b532aa2499fcd886b62530/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b", size = 719280 }, + { url = "https://files.pythonhosted.org/packages/6b/4e/1523cb902fd98355e2e9ea5e5eb237cbc5f3ad5f3075fa65087aa0ecb669/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed", size = 751239 }, + { url = "https://files.pythonhosted.org/packages/b7/33/5504b3a9a4464893c32f118a9cc045190a91637b119a9c881da1cf6b7a72/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180", size = 695802 }, + { url = "https://files.pythonhosted.org/packages/5c/20/8347dcabd41ef3a3cdc4f7b7a2aff3d06598c8779faa189cdbf878b626a4/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68", size = 720527 }, + { url = "https://files.pythonhosted.org/packages/be/aa/5afe99233fb360d0ff37377145a949ae258aaab831bde4792b32650a4378/PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99", size = 144052 }, + { url = "https://files.pythonhosted.org/packages/b5/84/0fa4b06f6d6c958d207620fc60005e241ecedceee58931bb20138e1e5776/PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e", size = 161774 }, + { url = "https://files.pythonhosted.org/packages/f8/aa/7af4e81f7acba21a4c6be026da38fd2b872ca46226673c89a758ebdc4fd2/PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774", size = 184612 }, + { url = "https://files.pythonhosted.org/packages/8b/62/b9faa998fd185f65c1371643678e4d58254add437edb764a08c5a98fb986/PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee", size = 172040 }, + { url = "https://files.pythonhosted.org/packages/ad/0c/c804f5f922a9a6563bab712d8dcc70251e8af811fce4524d57c2c0fd49a4/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c", size = 736829 }, + { url = "https://files.pythonhosted.org/packages/51/16/6af8d6a6b210c8e54f1406a6b9481febf9c64a3109c541567e35a49aa2e7/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317", size = 764167 }, + { url = "https://files.pythonhosted.org/packages/75/e4/2c27590dfc9992f73aabbeb9241ae20220bd9452df27483b6e56d3975cc5/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85", size = 762952 }, + { url = "https://files.pythonhosted.org/packages/9b/97/ecc1abf4a823f5ac61941a9c00fe501b02ac3ab0e373c3857f7d4b83e2b6/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4", size = 735301 }, + { url = "https://files.pythonhosted.org/packages/45/73/0f49dacd6e82c9430e46f4a027baa4ca205e8b0a9dce1397f44edc23559d/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e", size = 756638 }, + { url = "https://files.pythonhosted.org/packages/22/5f/956f0f9fc65223a58fbc14459bf34b4cc48dec52e00535c79b8db361aabd/PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5", size = 143850 }, + { url = "https://files.pythonhosted.org/packages/ed/23/8da0bbe2ab9dcdd11f4f4557ccaf95c10b9811b13ecced089d43ce59c3c8/PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44", size = 161980 }, + { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873 }, + { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302 }, + { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154 }, + { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223 }, + { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542 }, + { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164 }, + { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611 }, + { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591 }, + { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338 }, +] + +[[package]] +name = "pyzmq" +version = "26.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "implementation_name == 'pypy'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fd/05/bed626b9f7bb2322cdbbf7b4bd8f54b1b617b0d2ab2d3547d6e39428a48e/pyzmq-26.2.0.tar.gz", hash = "sha256:070672c258581c8e4f640b5159297580a9974b026043bd4ab0470be9ed324f1f", size = 271975 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1f/a8/9837c39aba390eb7d01924ace49d761c8dbe7bc2d6082346d00c8332e431/pyzmq-26.2.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:ddf33d97d2f52d89f6e6e7ae66ee35a4d9ca6f36eda89c24591b0c40205a3629", size = 1340058 }, + { url = "https://files.pythonhosted.org/packages/a2/1f/a006f2e8e4f7d41d464272012695da17fb95f33b54342612a6890da96ff6/pyzmq-26.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dacd995031a01d16eec825bf30802fceb2c3791ef24bcce48fa98ce40918c27b", size = 1008818 }, + { url = "https://files.pythonhosted.org/packages/b6/09/b51b6683fde5ca04593a57bbe81788b6b43114d8f8ee4e80afc991e14760/pyzmq-26.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89289a5ee32ef6c439086184529ae060c741334b8970a6855ec0b6ad3ff28764", size = 673199 }, + { url = "https://files.pythonhosted.org/packages/c9/78/486f3e2e824f3a645238332bf5a4c4b4477c3063033a27c1e4052358dee2/pyzmq-26.2.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5506f06d7dc6ecf1efacb4a013b1f05071bb24b76350832c96449f4a2d95091c", size = 911762 }, + { url = "https://files.pythonhosted.org/packages/5e/3b/2eb1667c9b866f53e76ee8b0c301b0469745a23bd5a87b7ee3d5dd9eb6e5/pyzmq-26.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ea039387c10202ce304af74def5021e9adc6297067f3441d348d2b633e8166a", size = 868773 }, + { url = "https://files.pythonhosted.org/packages/16/29/ca99b4598a9dc7e468b5417eda91f372b595be1e3eec9b7cbe8e5d3584e8/pyzmq-26.2.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a2224fa4a4c2ee872886ed00a571f5e967c85e078e8e8c2530a2fb01b3309b88", size = 868834 }, + { url = "https://files.pythonhosted.org/packages/ad/e5/9efaeb1d2f4f8c50da04144f639b042bc52869d3a206d6bf672ab3522163/pyzmq-26.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:28ad5233e9c3b52d76196c696e362508959741e1a005fb8fa03b51aea156088f", size = 1202861 }, + { url = "https://files.pythonhosted.org/packages/c3/62/c721b5608a8ac0a69bb83cbb7d07a56f3ff00b3991a138e44198a16f94c7/pyzmq-26.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:1c17211bc037c7d88e85ed8b7d8f7e52db6dc8eca5590d162717c654550f7282", size = 1515304 }, + { url = "https://files.pythonhosted.org/packages/87/84/e8bd321aa99b72f48d4606fc5a0a920154125bd0a4608c67eab742dab087/pyzmq-26.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b8f86dd868d41bea9a5f873ee13bf5551c94cf6bc51baebc6f85075971fe6eea", size = 1414712 }, + { url = "https://files.pythonhosted.org/packages/cd/cd/420e3fd1ac6977b008b72e7ad2dae6350cc84d4c5027fc390b024e61738f/pyzmq-26.2.0-cp310-cp310-win32.whl", hash = "sha256:46a446c212e58456b23af260f3d9fb785054f3e3653dbf7279d8f2b5546b21c2", size = 578113 }, + { url = "https://files.pythonhosted.org/packages/5c/57/73930d56ed45ae0cb4946f383f985c855c9b3d4063f26416998f07523c0e/pyzmq-26.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:49d34ab71db5a9c292a7644ce74190b1dd5a3475612eefb1f8be1d6961441971", size = 641631 }, + { url = "https://files.pythonhosted.org/packages/61/d2/ae6ac5c397f1ccad59031c64beaafce7a0d6182e0452cc48f1c9c87d2dd0/pyzmq-26.2.0-cp310-cp310-win_arm64.whl", hash = "sha256:bfa832bfa540e5b5c27dcf5de5d82ebc431b82c453a43d141afb1e5d2de025fa", size = 543528 }, + { url = "https://files.pythonhosted.org/packages/12/20/de7442172f77f7c96299a0ac70e7d4fb78cd51eca67aa2cf552b66c14196/pyzmq-26.2.0-cp311-cp311-macosx_10_15_universal2.whl", hash = "sha256:8f7e66c7113c684c2b3f1c83cdd3376103ee0ce4c49ff80a648643e57fb22218", size = 1340639 }, + { url = "https://files.pythonhosted.org/packages/98/4d/5000468bd64c7910190ed0a6c76a1ca59a68189ec1f007c451dc181a22f4/pyzmq-26.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3a495b30fc91db2db25120df5847d9833af237546fd59170701acd816ccc01c4", size = 1008710 }, + { url = "https://files.pythonhosted.org/packages/e1/bf/c67fd638c2f9fbbab8090a3ee779370b97c82b84cc12d0c498b285d7b2c0/pyzmq-26.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77eb0968da535cba0470a5165468b2cac7772cfb569977cff92e240f57e31bef", size = 673129 }, + { url = "https://files.pythonhosted.org/packages/86/94/99085a3f492aa538161cbf27246e8886ff850e113e0c294a5b8245f13b52/pyzmq-26.2.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ace4f71f1900a548f48407fc9be59c6ba9d9aaf658c2eea6cf2779e72f9f317", size = 910107 }, + { url = "https://files.pythonhosted.org/packages/31/1d/346809e8a9b999646d03f21096428453465b1bca5cd5c64ecd048d9ecb01/pyzmq-26.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:92a78853d7280bffb93df0a4a6a2498cba10ee793cc8076ef797ef2f74d107cf", size = 867960 }, + { url = "https://files.pythonhosted.org/packages/ab/68/6fb6ae5551846ad5beca295b7bca32bf0a7ce19f135cb30e55fa2314e6b6/pyzmq-26.2.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:689c5d781014956a4a6de61d74ba97b23547e431e9e7d64f27d4922ba96e9d6e", size = 869204 }, + { url = "https://files.pythonhosted.org/packages/0f/f9/18417771dee223ccf0f48e29adf8b4e25ba6d0e8285e33bcbce078070bc3/pyzmq-26.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0aca98bc423eb7d153214b2df397c6421ba6373d3397b26c057af3c904452e37", size = 1203351 }, + { url = "https://files.pythonhosted.org/packages/e0/46/f13e67fe0d4f8a2315782cbad50493de6203ea0d744610faf4d5f5b16e90/pyzmq-26.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:1f3496d76b89d9429a656293744ceca4d2ac2a10ae59b84c1da9b5165f429ad3", size = 1514204 }, + { url = "https://files.pythonhosted.org/packages/50/11/ddcf7343b7b7a226e0fc7b68cbf5a5bb56291fac07f5c3023bb4c319ebb4/pyzmq-26.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5c2b3bfd4b9689919db068ac6c9911f3fcb231c39f7dd30e3138be94896d18e6", size = 1414339 }, + { url = "https://files.pythonhosted.org/packages/01/14/1c18d7d5b7be2708f513f37c61bfadfa62161c10624f8733f1c8451b3509/pyzmq-26.2.0-cp311-cp311-win32.whl", hash = "sha256:eac5174677da084abf378739dbf4ad245661635f1600edd1221f150b165343f4", size = 576928 }, + { url = "https://files.pythonhosted.org/packages/3b/1b/0a540edd75a41df14ec416a9a500b9fec66e554aac920d4c58fbd5756776/pyzmq-26.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:5a509df7d0a83a4b178d0f937ef14286659225ef4e8812e05580776c70e155d5", size = 642317 }, + { url = "https://files.pythonhosted.org/packages/98/77/1cbfec0358078a4c5add529d8a70892db1be900980cdb5dd0898b3d6ab9d/pyzmq-26.2.0-cp311-cp311-win_arm64.whl", hash = "sha256:c0e6091b157d48cbe37bd67233318dbb53e1e6327d6fc3bb284afd585d141003", size = 543834 }, + { url = "https://files.pythonhosted.org/packages/28/2f/78a766c8913ad62b28581777ac4ede50c6d9f249d39c2963e279524a1bbe/pyzmq-26.2.0-cp312-cp312-macosx_10_15_universal2.whl", hash = "sha256:ded0fc7d90fe93ae0b18059930086c51e640cdd3baebdc783a695c77f123dcd9", size = 1343105 }, + { url = "https://files.pythonhosted.org/packages/b7/9c/4b1e2d3d4065be715e007fe063ec7885978fad285f87eae1436e6c3201f4/pyzmq-26.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:17bf5a931c7f6618023cdacc7081f3f266aecb68ca692adac015c383a134ca52", size = 1008365 }, + { url = "https://files.pythonhosted.org/packages/4f/ef/5a23ec689ff36d7625b38d121ef15abfc3631a9aecb417baf7a4245e4124/pyzmq-26.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55cf66647e49d4621a7e20c8d13511ef1fe1efbbccf670811864452487007e08", size = 665923 }, + { url = "https://files.pythonhosted.org/packages/ae/61/d436461a47437d63c6302c90724cf0981883ec57ceb6073873f32172d676/pyzmq-26.2.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4661c88db4a9e0f958c8abc2b97472e23061f0bc737f6f6179d7a27024e1faa5", size = 903400 }, + { url = "https://files.pythonhosted.org/packages/47/42/fc6d35ecefe1739a819afaf6f8e686f7f02a4dd241c78972d316f403474c/pyzmq-26.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea7f69de383cb47522c9c208aec6dd17697db7875a4674c4af3f8cfdac0bdeae", size = 860034 }, + { url = "https://files.pythonhosted.org/packages/07/3b/44ea6266a6761e9eefaa37d98fabefa112328808ac41aa87b4bbb668af30/pyzmq-26.2.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:7f98f6dfa8b8ccaf39163ce872bddacca38f6a67289116c8937a02e30bbe9711", size = 860579 }, + { url = "https://files.pythonhosted.org/packages/38/6f/4df2014ab553a6052b0e551b37da55166991510f9e1002c89cab7ce3b3f2/pyzmq-26.2.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e3e0210287329272539eea617830a6a28161fbbd8a3271bf4150ae3e58c5d0e6", size = 1196246 }, + { url = "https://files.pythonhosted.org/packages/38/9d/ee240fc0c9fe9817f0c9127a43238a3e28048795483c403cc10720ddef22/pyzmq-26.2.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:6b274e0762c33c7471f1a7471d1a2085b1a35eba5cdc48d2ae319f28b6fc4de3", size = 1507441 }, + { url = "https://files.pythonhosted.org/packages/85/4f/01711edaa58d535eac4a26c294c617c9a01f09857c0ce191fd574d06f359/pyzmq-26.2.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:29c6a4635eef69d68a00321e12a7d2559fe2dfccfa8efae3ffb8e91cd0b36a8b", size = 1406498 }, + { url = "https://files.pythonhosted.org/packages/07/18/907134c85c7152f679ed744e73e645b365f3ad571f38bdb62e36f347699a/pyzmq-26.2.0-cp312-cp312-win32.whl", hash = "sha256:989d842dc06dc59feea09e58c74ca3e1678c812a4a8a2a419046d711031f69c7", size = 575533 }, + { url = "https://files.pythonhosted.org/packages/ce/2c/a6f4a20202a4d3c582ad93f95ee78d79bbdc26803495aec2912b17dbbb6c/pyzmq-26.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:2a50625acdc7801bc6f74698c5c583a491c61d73c6b7ea4dee3901bb99adb27a", size = 637768 }, + { url = "https://files.pythonhosted.org/packages/5f/0e/eb16ff731632d30554bf5af4dbba3ffcd04518219d82028aea4ae1b02ca5/pyzmq-26.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:4d29ab8592b6ad12ebbf92ac2ed2bedcfd1cec192d8e559e2e099f648570e19b", size = 540675 }, + { url = "https://files.pythonhosted.org/packages/53/fb/36b2b2548286e9444e52fcd198760af99fd89102b5be50f0660fcfe902df/pyzmq-26.2.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:706e794564bec25819d21a41c31d4df2d48e1cc4b061e8d345d7fb4dd3e94072", size = 906955 }, + { url = "https://files.pythonhosted.org/packages/77/8f/6ce54f8979a01656e894946db6299e2273fcee21c8e5fa57c6295ef11f57/pyzmq-26.2.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b435f2753621cd36e7c1762156815e21c985c72b19135dac43a7f4f31d28dd1", size = 565701 }, + { url = "https://files.pythonhosted.org/packages/ee/1c/bf8cd66730a866b16db8483286078892b7f6536f8c389fb46e4beba0a970/pyzmq-26.2.0-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:160c7e0a5eb178011e72892f99f918c04a131f36056d10d9c1afb223fc952c2d", size = 794312 }, + { url = "https://files.pythonhosted.org/packages/71/43/91fa4ff25bbfdc914ab6bafa0f03241d69370ef31a761d16bb859f346582/pyzmq-26.2.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c4a71d5d6e7b28a47a394c0471b7e77a0661e2d651e7ae91e0cab0a587859ca", size = 752775 }, + { url = "https://files.pythonhosted.org/packages/ec/d2/3b2ab40f455a256cb6672186bea95cd97b459ce4594050132d71e76f0d6f/pyzmq-26.2.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:90412f2db8c02a3864cbfc67db0e3dcdbda336acf1c469526d3e869394fe001c", size = 550762 }, +] + +[[package]] +name = "questionary" +version = "2.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "prompt-toolkit" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/84/d0/d73525aeba800df7030ac187d09c59dc40df1c878b4fab8669bdc805535d/questionary-2.0.1.tar.gz", hash = "sha256:bcce898bf3dbb446ff62830c86c5c6fb9a22a54146f0f5597d3da43b10d8fc8b", size = 24726 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/e7/2dd8f59d1d328773505f78b85405ddb1cfe74126425d076ce72e65540b8b/questionary-2.0.1-py3-none-any.whl", hash = "sha256:8ab9a01d0b91b68444dff7f6652c1e754105533f083cbe27597c8110ecc230a2", size = 34248 }, +] + +[[package]] +name = "referencing" +version = "0.35.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "rpds-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/99/5b/73ca1f8e72fff6fa52119dbd185f73a907b1989428917b24cff660129b6d/referencing-0.35.1.tar.gz", hash = "sha256:25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c", size = 62991 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/59/2056f61236782a2c86b33906c025d4f4a0b17be0161b63b70fd9e8775d36/referencing-0.35.1-py3-none-any.whl", hash = "sha256:eda6d3234d62814d1c64e305c1331c9a3a6132da475ab6382eaa997b21ee75de", size = 26684 }, +] + +[[package]] +name = "requests" +version = "2.32.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, +] + +[[package]] +name = "rfc3339-validator" +version = "0.1.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/28/ea/a9387748e2d111c3c2b275ba970b735e04e15cdb1eb30693b6b5708c4dbd/rfc3339_validator-0.1.4.tar.gz", hash = "sha256:138a2abdf93304ad60530167e51d2dfb9549521a836871b88d7f4695d0022f6b", size = 5513 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/44/4e421b96b67b2daff264473f7465db72fbdf36a07e05494f50300cc7b0c6/rfc3339_validator-0.1.4-py2.py3-none-any.whl", hash = "sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa", size = 3490 }, +] + +[[package]] +name = "rfc3986-validator" +version = "0.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/da/88/f270de456dd7d11dcc808abfa291ecdd3f45ff44e3b549ffa01b126464d0/rfc3986_validator-0.1.1.tar.gz", hash = "sha256:3d44bde7921b3b9ec3ae4e3adca370438eccebc676456449b145d533b240d055", size = 6760 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/51/17023c0f8f1869d8806b979a2bffa3f861f26a3f1a66b094288323fba52f/rfc3986_validator-0.1.1-py2.py3-none-any.whl", hash = "sha256:2f235c432ef459970b4306369336b9d5dbdda31b510ca1e327636e01f528bfa9", size = 4242 }, +] + [[package]] name = "rich" version = "13.9.3" @@ -1329,6 +2432,65 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9a/e2/10e9819cf4a20bd8ea2f5dabafc2e6bf4a78d6a0965daeb60a4b34d1c11f/rich-13.9.3-py3-none-any.whl", hash = "sha256:9836f5096eb2172c9e77df411c1b009bace4193d6a481d534fea75ebba758283", size = 242157 }, ] +[[package]] +name = "rpds-py" +version = "0.22.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/80/cce854d0921ff2f0a9fa831ba3ad3c65cee3a46711addf39a2af52df2cfd/rpds_py-0.22.3.tar.gz", hash = "sha256:e32fee8ab45d3c2db6da19a5323bc3362237c8b653c70194414b892fd06a080d", size = 26771 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/42/2a/ead1d09e57449b99dcc190d8d2323e3a167421d8f8fdf0f217c6f6befe47/rpds_py-0.22.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:6c7b99ca52c2c1752b544e310101b98a659b720b21db00e65edca34483259967", size = 359514 }, + { url = "https://files.pythonhosted.org/packages/8f/7e/1254f406b7793b586c68e217a6a24ec79040f85e030fff7e9049069284f4/rpds_py-0.22.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:be2eb3f2495ba669d2a985f9b426c1797b7d48d6963899276d22f23e33d47e37", size = 349031 }, + { url = "https://files.pythonhosted.org/packages/aa/da/17c6a2c73730d426df53675ff9cc6653ac7a60b6438d03c18e1c822a576a/rpds_py-0.22.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70eb60b3ae9245ddea20f8a4190bd79c705a22f8028aaf8bbdebe4716c3fab24", size = 381485 }, + { url = "https://files.pythonhosted.org/packages/aa/13/2dbacd820466aa2a3c4b747afb18d71209523d353cf865bf8f4796c969ea/rpds_py-0.22.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4041711832360a9b75cfb11b25a6a97c8fb49c07b8bd43d0d02b45d0b499a4ff", size = 386794 }, + { url = "https://files.pythonhosted.org/packages/6d/62/96905d0a35ad4e4bc3c098b2f34b2e7266e211d08635baa690643d2227be/rpds_py-0.22.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:64607d4cbf1b7e3c3c8a14948b99345eda0e161b852e122c6bb71aab6d1d798c", size = 423523 }, + { url = "https://files.pythonhosted.org/packages/eb/1b/d12770f2b6a9fc2c3ec0d810d7d440f6d465ccd8b7f16ae5385952c28b89/rpds_py-0.22.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e69b0a0e2537f26d73b4e43ad7bc8c8efb39621639b4434b76a3de50c6966e", size = 446695 }, + { url = "https://files.pythonhosted.org/packages/4d/cf/96f1fd75512a017f8e07408b6d5dbeb492d9ed46bfe0555544294f3681b3/rpds_py-0.22.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc27863442d388870c1809a87507727b799c8460573cfbb6dc0eeaef5a11b5ec", size = 381959 }, + { url = "https://files.pythonhosted.org/packages/ab/f0/d1c5b501c8aea85aeb938b555bfdf7612110a2f8cdc21ae0482c93dd0c24/rpds_py-0.22.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e79dd39f1e8c3504be0607e5fc6e86bb60fe3584bec8b782578c3b0fde8d932c", size = 410420 }, + { url = "https://files.pythonhosted.org/packages/33/3b/45b6c58fb6aad5a569ae40fb890fc494c6b02203505a5008ee6dc68e65f7/rpds_py-0.22.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e0fa2d4ec53dc51cf7d3bb22e0aa0143966119f42a0c3e4998293a3dd2856b09", size = 557620 }, + { url = "https://files.pythonhosted.org/packages/83/62/3fdd2d3d47bf0bb9b931c4c73036b4ab3ec77b25e016ae26fab0f02be2af/rpds_py-0.22.3-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:fda7cb070f442bf80b642cd56483b5548e43d366fe3f39b98e67cce780cded00", size = 584202 }, + { url = "https://files.pythonhosted.org/packages/04/f2/5dced98b64874b84ca824292f9cee2e3f30f3bcf231d15a903126684f74d/rpds_py-0.22.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cff63a0272fcd259dcc3be1657b07c929c466b067ceb1c20060e8d10af56f5bf", size = 552787 }, + { url = "https://files.pythonhosted.org/packages/67/13/2273dea1204eda0aea0ef55145da96a9aa28b3f88bb5c70e994f69eda7c3/rpds_py-0.22.3-cp310-cp310-win32.whl", hash = "sha256:9bd7228827ec7bb817089e2eb301d907c0d9827a9e558f22f762bb690b131652", size = 220088 }, + { url = "https://files.pythonhosted.org/packages/4e/80/8c8176b67ad7f4a894967a7a4014ba039626d96f1d4874d53e409b58d69f/rpds_py-0.22.3-cp310-cp310-win_amd64.whl", hash = "sha256:9beeb01d8c190d7581a4d59522cd3d4b6887040dcfc744af99aa59fef3e041a8", size = 231737 }, + { url = "https://files.pythonhosted.org/packages/15/ad/8d1ddf78f2805a71253fcd388017e7b4a0615c22c762b6d35301fef20106/rpds_py-0.22.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d20cfb4e099748ea39e6f7b16c91ab057989712d31761d3300d43134e26e165f", size = 359773 }, + { url = "https://files.pythonhosted.org/packages/c8/75/68c15732293a8485d79fe4ebe9045525502a067865fa4278f178851b2d87/rpds_py-0.22.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:68049202f67380ff9aa52f12e92b1c30115f32e6895cd7198fa2a7961621fc5a", size = 349214 }, + { url = "https://files.pythonhosted.org/packages/3c/4c/7ce50f3070083c2e1b2bbd0fb7046f3da55f510d19e283222f8f33d7d5f4/rpds_py-0.22.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb4f868f712b2dd4bcc538b0a0c1f63a2b1d584c925e69a224d759e7070a12d5", size = 380477 }, + { url = "https://files.pythonhosted.org/packages/9a/e9/835196a69cb229d5c31c13b8ae603bd2da9a6695f35fe4270d398e1db44c/rpds_py-0.22.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bc51abd01f08117283c5ebf64844a35144a0843ff7b2983e0648e4d3d9f10dbb", size = 386171 }, + { url = "https://files.pythonhosted.org/packages/f9/8e/33fc4eba6683db71e91e6d594a2cf3a8fbceb5316629f0477f7ece5e3f75/rpds_py-0.22.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0f3cec041684de9a4684b1572fe28c7267410e02450f4561700ca5a3bc6695a2", size = 422676 }, + { url = "https://files.pythonhosted.org/packages/37/47/2e82d58f8046a98bb9497a8319604c92b827b94d558df30877c4b3c6ccb3/rpds_py-0.22.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7ef9d9da710be50ff6809fed8f1963fecdfecc8b86656cadfca3bc24289414b0", size = 446152 }, + { url = "https://files.pythonhosted.org/packages/e1/78/79c128c3e71abbc8e9739ac27af11dc0f91840a86fce67ff83c65d1ba195/rpds_py-0.22.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59f4a79c19232a5774aee369a0c296712ad0e77f24e62cad53160312b1c1eaa1", size = 381300 }, + { url = "https://files.pythonhosted.org/packages/c9/5b/2e193be0e8b228c1207f31fa3ea79de64dadb4f6a4833111af8145a6bc33/rpds_py-0.22.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1a60bce91f81ddaac922a40bbb571a12c1070cb20ebd6d49c48e0b101d87300d", size = 409636 }, + { url = "https://files.pythonhosted.org/packages/c2/3f/687c7100b762d62186a1c1100ffdf99825f6fa5ea94556844bbbd2d0f3a9/rpds_py-0.22.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e89391e6d60251560f0a8f4bd32137b077a80d9b7dbe6d5cab1cd80d2746f648", size = 556708 }, + { url = "https://files.pythonhosted.org/packages/8c/a2/c00cbc4b857e8b3d5e7f7fc4c81e23afd8c138b930f4f3ccf9a41a23e9e4/rpds_py-0.22.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e3fb866d9932a3d7d0c82da76d816996d1667c44891bd861a0f97ba27e84fc74", size = 583554 }, + { url = "https://files.pythonhosted.org/packages/d0/08/696c9872cf56effdad9ed617ac072f6774a898d46b8b8964eab39ec562d2/rpds_py-0.22.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1352ae4f7c717ae8cba93421a63373e582d19d55d2ee2cbb184344c82d2ae55a", size = 552105 }, + { url = "https://files.pythonhosted.org/packages/18/1f/4df560be1e994f5adf56cabd6c117e02de7c88ee238bb4ce03ed50da9d56/rpds_py-0.22.3-cp311-cp311-win32.whl", hash = "sha256:b0b4136a252cadfa1adb705bb81524eee47d9f6aab4f2ee4fa1e9d3cd4581f64", size = 220199 }, + { url = "https://files.pythonhosted.org/packages/b8/1b/c29b570bc5db8237553002788dc734d6bd71443a2ceac2a58202ec06ef12/rpds_py-0.22.3-cp311-cp311-win_amd64.whl", hash = "sha256:8bd7c8cfc0b8247c8799080fbff54e0b9619e17cdfeb0478ba7295d43f635d7c", size = 231775 }, + { url = "https://files.pythonhosted.org/packages/75/47/3383ee3bd787a2a5e65a9b9edc37ccf8505c0a00170e3a5e6ea5fbcd97f7/rpds_py-0.22.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:27e98004595899949bd7a7b34e91fa7c44d7a97c40fcaf1d874168bb652ec67e", size = 352334 }, + { url = "https://files.pythonhosted.org/packages/40/14/aa6400fa8158b90a5a250a77f2077c0d0cd8a76fce31d9f2b289f04c6dec/rpds_py-0.22.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1978d0021e943aae58b9b0b196fb4895a25cc53d3956b8e35e0b7682eefb6d56", size = 342111 }, + { url = "https://files.pythonhosted.org/packages/7d/06/395a13bfaa8a28b302fb433fb285a67ce0ea2004959a027aea8f9c52bad4/rpds_py-0.22.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:655ca44a831ecb238d124e0402d98f6212ac527a0ba6c55ca26f616604e60a45", size = 384286 }, + { url = "https://files.pythonhosted.org/packages/43/52/d8eeaffab047e6b7b7ef7f00d5ead074a07973968ffa2d5820fa131d7852/rpds_py-0.22.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:feea821ee2a9273771bae61194004ee2fc33f8ec7db08117ef9147d4bbcbca8e", size = 391739 }, + { url = "https://files.pythonhosted.org/packages/83/31/52dc4bde85c60b63719610ed6f6d61877effdb5113a72007679b786377b8/rpds_py-0.22.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:22bebe05a9ffc70ebfa127efbc429bc26ec9e9b4ee4d15a740033efda515cf3d", size = 427306 }, + { url = "https://files.pythonhosted.org/packages/70/d5/1bab8e389c2261dba1764e9e793ed6830a63f830fdbec581a242c7c46bda/rpds_py-0.22.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3af6e48651c4e0d2d166dc1b033b7042ea3f871504b6805ba5f4fe31581d8d38", size = 442717 }, + { url = "https://files.pythonhosted.org/packages/82/a1/a45f3e30835b553379b3a56ea6c4eb622cf11e72008229af840e4596a8ea/rpds_py-0.22.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e67ba3c290821343c192f7eae1d8fd5999ca2dc99994114643e2f2d3e6138b15", size = 385721 }, + { url = "https://files.pythonhosted.org/packages/a6/27/780c942de3120bdd4d0e69583f9c96e179dfff082f6ecbb46b8d6488841f/rpds_py-0.22.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:02fbb9c288ae08bcb34fb41d516d5eeb0455ac35b5512d03181d755d80810059", size = 415824 }, + { url = "https://files.pythonhosted.org/packages/94/0b/aa0542ca88ad20ea719b06520f925bae348ea5c1fdf201b7e7202d20871d/rpds_py-0.22.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f56a6b404f74ab372da986d240e2e002769a7d7102cc73eb238a4f72eec5284e", size = 561227 }, + { url = "https://files.pythonhosted.org/packages/0d/92/3ed77d215f82c8f844d7f98929d56cc321bb0bcfaf8f166559b8ec56e5f1/rpds_py-0.22.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0a0461200769ab3b9ab7e513f6013b7a97fdeee41c29b9db343f3c5a8e2b9e61", size = 587424 }, + { url = "https://files.pythonhosted.org/packages/09/42/cacaeb047a22cab6241f107644f230e2935d4efecf6488859a7dd82fc47d/rpds_py-0.22.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8633e471c6207a039eff6aa116e35f69f3156b3989ea3e2d755f7bc41754a4a7", size = 555953 }, + { url = "https://files.pythonhosted.org/packages/e6/52/c921dc6d5f5d45b212a456c1f5b17df1a471127e8037eb0972379e39dff4/rpds_py-0.22.3-cp312-cp312-win32.whl", hash = "sha256:593eba61ba0c3baae5bc9be2f5232430453fb4432048de28399ca7376de9c627", size = 221339 }, + { url = "https://files.pythonhosted.org/packages/f2/c7/f82b5be1e8456600395366f86104d1bd8d0faed3802ad511ef6d60c30d98/rpds_py-0.22.3-cp312-cp312-win_amd64.whl", hash = "sha256:d115bffdd417c6d806ea9069237a4ae02f513b778e3789a359bc5856e0404cc4", size = 235786 }, + { url = "https://files.pythonhosted.org/packages/8b/63/e29f8ee14fcf383574f73b6bbdcbec0fbc2e5fc36b4de44d1ac389b1de62/rpds_py-0.22.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:d48424e39c2611ee1b84ad0f44fb3b2b53d473e65de061e3f460fc0be5f1939d", size = 360786 }, + { url = "https://files.pythonhosted.org/packages/d3/e0/771ee28b02a24e81c8c0e645796a371350a2bb6672753144f36ae2d2afc9/rpds_py-0.22.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:24e8abb5878e250f2eb0d7859a8e561846f98910326d06c0d51381fed59357bd", size = 350589 }, + { url = "https://files.pythonhosted.org/packages/cf/49/abad4c4a1e6f3adf04785a99c247bfabe55ed868133e2d1881200aa5d381/rpds_py-0.22.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b232061ca880db21fa14defe219840ad9b74b6158adb52ddf0e87bead9e8493", size = 381848 }, + { url = "https://files.pythonhosted.org/packages/3a/7d/f4bc6d6fbe6af7a0d2b5f2ee77079efef7c8528712745659ec0026888998/rpds_py-0.22.3-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac0a03221cdb5058ce0167ecc92a8c89e8d0decdc9e99a2ec23380793c4dcb96", size = 387879 }, + { url = "https://files.pythonhosted.org/packages/13/b0/575c797377fdcd26cedbb00a3324232e4cb2c5d121f6e4b0dbf8468b12ef/rpds_py-0.22.3-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb0c341fa71df5a4595f9501df4ac5abfb5a09580081dffbd1ddd4654e6e9123", size = 423916 }, + { url = "https://files.pythonhosted.org/packages/54/78/87157fa39d58f32a68d3326f8a81ad8fb99f49fe2aa7ad9a1b7d544f9478/rpds_py-0.22.3-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bf9db5488121b596dbfc6718c76092fda77b703c1f7533a226a5a9f65248f8ad", size = 448410 }, + { url = "https://files.pythonhosted.org/packages/59/69/860f89996065a88be1b6ff2d60e96a02b920a262d8aadab99e7903986597/rpds_py-0.22.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b8db6b5b2d4491ad5b6bdc2bc7c017eec108acbf4e6785f42a9eb0ba234f4c9", size = 382841 }, + { url = "https://files.pythonhosted.org/packages/bd/d7/bc144e10d27e3cb350f98df2492a319edd3caaf52ddfe1293f37a9afbfd7/rpds_py-0.22.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b3d504047aba448d70cf6fa22e06cb09f7cbd761939fdd47604f5e007675c24e", size = 409662 }, + { url = "https://files.pythonhosted.org/packages/14/2a/6bed0b05233c291a94c7e89bc76ffa1c619d4e1979fbfe5d96024020c1fb/rpds_py-0.22.3-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:e61b02c3f7a1e0b75e20c3978f7135fd13cb6cf551bf4a6d29b999a88830a338", size = 558221 }, + { url = "https://files.pythonhosted.org/packages/11/23/cd8f566de444a137bc1ee5795e47069a947e60810ba4152886fe5308e1b7/rpds_py-0.22.3-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:e35ba67d65d49080e8e5a1dd40101fccdd9798adb9b050ff670b7d74fa41c566", size = 583780 }, + { url = "https://files.pythonhosted.org/packages/8d/63/79c3602afd14d501f751e615a74a59040328da5ef29ed5754ae80d236b84/rpds_py-0.22.3-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:26fd7cac7dd51011a245f29a2cc6489c4608b5a8ce8d75661bb4a1066c52dfbe", size = 553619 }, + { url = "https://files.pythonhosted.org/packages/9f/2e/c5c1689e80298d4e94c75b70faada4c25445739d91b94c211244a3ed7ed1/rpds_py-0.22.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:177c7c0fce2855833819c98e43c262007f42ce86651ffbb84f37883308cb0e7d", size = 233338 }, +] + [[package]] name = "ruamel-yaml" version = "0.18.6" @@ -1535,6 +2697,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/83/11/00d3c3dfc25ad54e731d91449895a79e4bf2384dc3ac01809010ba88f6d5/seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987", size = 294914 }, ] +[[package]] +name = "send2trash" +version = "1.8.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fd/3a/aec9b02217bb79b87bbc1a21bc6abc51e3d5dcf65c30487ac96c0908c722/Send2Trash-1.8.3.tar.gz", hash = "sha256:b18e7a3966d99871aefeb00cfbcfdced55ce4871194810fc71f4aa484b953abf", size = 17394 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/40/b0/4562db6223154aa4e22f939003cb92514c79f3d4dccca3444253fd17f902/Send2Trash-1.8.3-py3-none-any.whl", hash = "sha256:0c31227e0bd08961c7665474a3d1ef7193929fedda4233843689baa056be46c9", size = 18072 }, +] + [[package]] name = "session-info" version = "1.0.0" @@ -1580,6 +2751,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575 }, ] +[[package]] +name = "soupsieve" +version = "2.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/ce/fbaeed4f9fb8b2daa961f90591662df6a86c1abf25c548329a86920aedfb/soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb", size = 101569 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/c2/fe97d779f3ef3b15f05c94a2f1e3d21732574ed441687474db9d342a7315/soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9", size = 36186 }, +] + +[[package]] +name = "stack-data" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "asttokens" }, + { name = "executing" }, + { name = "pure-eval" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/28/e3/55dcc2cfbc3ca9c29519eb6884dd1415ecb53b0e934862d3559ddcb7e20b/stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9", size = 44707 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521 }, +] + [[package]] name = "statsmodels" version = "0.13.2" @@ -1664,6 +2858,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b6/cb/b86984bed139586d01532a587464b5805f12e397594f19f931c4c2fbfa61/tenacity-9.0.0-py3-none-any.whl", hash = "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539", size = 28169 }, ] +[[package]] +name = "termcolor" +version = "2.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/37/72/88311445fd44c455c7d553e61f95412cf89054308a1aa2434ab835075fc5/termcolor-2.5.0.tar.gz", hash = "sha256:998d8d27da6d48442e8e1f016119076b690d962507531df4890fcd2db2ef8a6f", size = 13057 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7f/be/df630c387a0a054815d60be6a97eb4e8f17385d5d6fe660e1c02750062b4/termcolor-2.5.0-py3-none-any.whl", hash = "sha256:37b17b5fc1e604945c2642c872a3764b5d547a48009871aea3edd3afa180afb8", size = 7755 }, +] + +[[package]] +name = "terminado" +version = "0.18.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ptyprocess", marker = "os_name != 'nt'" }, + { name = "pywinpty", marker = "os_name == 'nt'" }, + { name = "tornado" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8a/11/965c6fd8e5cc254f1fe142d547387da17a8ebfd75a3455f637c663fb38a0/terminado-0.18.1.tar.gz", hash = "sha256:de09f2c4b85de4765f7714688fff57d3e75bad1f909b589fde880460c753fd2e", size = 32701 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6a/9e/2064975477fdc887e47ad42157e214526dcad8f317a948dee17e1659a62f/terminado-0.18.1-py3-none-any.whl", hash = "sha256:a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0", size = 14154 }, +] + [[package]] name = "threadpoolctl" version = "3.5.0" @@ -1673,6 +2890,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4b/2c/ffbf7a134b9ab11a67b0cf0726453cedd9c5043a4fe7a35d1cefa9a1bcfb/threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467", size = 18414 }, ] +[[package]] +name = "tinycss2" +version = "1.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "webencodings" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7a/fd/7a5ee21fd08ff70d3d33a5781c255cbe779659bd03278feb98b19ee550f4/tinycss2-1.4.0.tar.gz", hash = "sha256:10c0972f6fc0fbee87c3edb76549357415e94548c1ae10ebccdea16fb404a9b7", size = 87085 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/34/ebdc18bae6aa14fbee1a08b63c015c72b64868ff7dae68808ab500c492e2/tinycss2-1.4.0-py3-none-any.whl", hash = "sha256:3a49cf47b7675da0b15d0c6e1df8df4ebd96e9394bb905a5775adb0d884c5289", size = 26610 }, +] + [[package]] name = "tomli" version = "2.0.2" @@ -1682,6 +2911,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cf/db/ce8eda256fa131af12e0a76d481711abe4681b6923c27efb9a255c9e4594/tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38", size = 13237 }, ] +[[package]] +name = "tomlkit" +version = "0.13.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b1/09/a439bec5888f00a54b8b9f05fa94d7f901d6735ef4e55dcec9bc37b5d8fa/tomlkit-0.13.2.tar.gz", hash = "sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79", size = 192885 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/b6/a447b5e4ec71e13871be01ba81f5dfc9d0af7e473da256ff46bc0e24026f/tomlkit-0.13.2-py3-none-any.whl", hash = "sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde", size = 37955 }, +] + +[[package]] +name = "tornado" +version = "6.4.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/59/45/a0daf161f7d6f36c3ea5fc0c2de619746cc3dd4c76402e9db545bd920f63/tornado-6.4.2.tar.gz", hash = "sha256:92bad5b4746e9879fd7bf1eb21dce4e3fc5128d71601f80005afa39237ad620b", size = 501135 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/7e/71f604d8cea1b58f82ba3590290b66da1e72d840aeb37e0d5f7291bd30db/tornado-6.4.2-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e828cce1123e9e44ae2a50a9de3055497ab1d0aeb440c5ac23064d9e44880da1", size = 436299 }, + { url = "https://files.pythonhosted.org/packages/96/44/87543a3b99016d0bf54fdaab30d24bf0af2e848f1d13d34a3a5380aabe16/tornado-6.4.2-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:072ce12ada169c5b00b7d92a99ba089447ccc993ea2143c9ede887e0937aa803", size = 434253 }, + { url = "https://files.pythonhosted.org/packages/cb/fb/fdf679b4ce51bcb7210801ef4f11fdac96e9885daa402861751353beea6e/tornado-6.4.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a017d239bd1bb0919f72af256a970624241f070496635784d9bf0db640d3fec", size = 437602 }, + { url = "https://files.pythonhosted.org/packages/4f/3b/e31aeffffc22b475a64dbeb273026a21b5b566f74dee48742817626c47dc/tornado-6.4.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c36e62ce8f63409301537222faffcef7dfc5284f27eec227389f2ad11b09d946", size = 436972 }, + { url = "https://files.pythonhosted.org/packages/22/55/b78a464de78051a30599ceb6983b01d8f732e6f69bf37b4ed07f642ac0fc/tornado-6.4.2-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bca9eb02196e789c9cb5c3c7c0f04fb447dc2adffd95265b2c7223a8a615ccbf", size = 437173 }, + { url = "https://files.pythonhosted.org/packages/79/5e/be4fb0d1684eb822c9a62fb18a3e44a06188f78aa466b2ad991d2ee31104/tornado-6.4.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:304463bd0772442ff4d0f5149c6f1c2135a1fae045adf070821c6cdc76980634", size = 437892 }, + { url = "https://files.pythonhosted.org/packages/f5/33/4f91fdd94ea36e1d796147003b490fe60a0215ac5737b6f9c65e160d4fe0/tornado-6.4.2-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:c82c46813ba483a385ab2a99caeaedf92585a1f90defb5693351fa7e4ea0bf73", size = 437334 }, + { url = "https://files.pythonhosted.org/packages/2b/ae/c1b22d4524b0e10da2f29a176fb2890386f7bd1f63aacf186444873a88a0/tornado-6.4.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:932d195ca9015956fa502c6b56af9eb06106140d844a335590c1ec7f5277d10c", size = 437261 }, + { url = "https://files.pythonhosted.org/packages/b5/25/36dbd49ab6d179bcfc4c6c093a51795a4f3bed380543a8242ac3517a1751/tornado-6.4.2-cp38-abi3-win32.whl", hash = "sha256:2876cef82e6c5978fde1e0d5b1f919d756968d5b4282418f3146b79b58556482", size = 438463 }, + { url = "https://files.pythonhosted.org/packages/61/cc/58b1adeb1bb46228442081e746fcdbc4540905c87e8add7c277540934edb/tornado-6.4.2-cp38-abi3-win_amd64.whl", hash = "sha256:908b71bf3ff37d81073356a5fadcc660eb10c1476ee6e2725588626ce7e5ca38", size = 438907 }, +] + [[package]] name = "tqdm" version = "4.66.6" @@ -1694,6 +2950,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/41/73/02342de9c2d20922115f787e101527b831c0cffd2105c946c4a4826bcfd4/tqdm-4.66.6-py3-none-any.whl", hash = "sha256:223e8b5359c2efc4b30555531f09e9f2f3589bcd7fdd389271191031b49b7a63", size = 78326 }, ] +[[package]] +name = "traitlets" +version = "5.14.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/79/72064e6a701c2183016abbbfedaba506d81e30e232a68c9f0d6f6fcd1574/traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7", size = 161621 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359 }, +] + [[package]] name = "troppo" version = "0.1.0" @@ -1704,6 +2969,15 @@ dependencies = [ { name = "xlrd" }, ] +[[package]] +name = "types-python-dateutil" +version = "2.9.0.20241206" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a9/60/47d92293d9bc521cd2301e423a358abfac0ad409b3a1606d8fbae1321961/types_python_dateutil-2.9.0.20241206.tar.gz", hash = "sha256:18f493414c26ffba692a72369fea7a154c502646301ebfe3d56a04b3767284cb", size = 13802 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0f/b3/ca41df24db5eb99b00d97f89d7674a90cb6b3134c52fb8121b6d8d30f15c/types_python_dateutil-2.9.0.20241206-py3-none-any.whl", hash = "sha256:e248a4bc70a486d3e3ec84d0dc30eec3a5f979d6e7ee4123ae043eedbb987f53", size = 14384 }, +] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -1742,6 +3016,60 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3c/8f/671c0e1f2572ba625cbcc1faeba9435e00330c3d6962858711445cf1e817/umap_learn-0.5.7-py3-none-any.whl", hash = "sha256:6a7e0be2facfa365a5ed6588447102bdbef32a0ef449535c25c97ea7e680073c", size = 88815 }, ] +[[package]] +name = "uri-template" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/31/c7/0336f2bd0bcbada6ccef7aaa25e443c118a704f828a0620c6fa0207c1b64/uri-template-1.3.0.tar.gz", hash = "sha256:0e00f8eb65e18c7de20d595a14336e9f337ead580c70934141624b6d1ffdacc7", size = 21678 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/00/3fca040d7cf8a32776d3d81a00c8ee7457e00f80c649f1e4a863c8321ae9/uri_template-1.3.0-py3-none-any.whl", hash = "sha256:a44a133ea12d44a0c0f06d7d42a52d71282e77e2f937d8abd5655b8d56fc1363", size = 11140 }, +] + +[[package]] +name = "urllib3" +version = "2.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ed/63/22ba4ebfe7430b76388e7cd448d5478814d3032121827c12a2cc287e2260/urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9", size = 300677 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/d9/5f4c13cecde62396b0d3fe530a50ccea91e7dfc1ccf0e09c228841bb5ba8/urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac", size = 126338 }, +] + +[[package]] +name = "wcwidth" +version = "0.2.13" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/63/53559446a878410fc5a5974feb13d31d78d752eb18aeba59c7fef1af7598/wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5", size = 101301 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 }, +] + +[[package]] +name = "webcolors" +version = "24.11.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/29/061ec845fb58521848f3739e466efd8250b4b7b98c1b6c5bf4d40b419b7e/webcolors-24.11.1.tar.gz", hash = "sha256:ecb3d768f32202af770477b8b65f318fa4f566c22948673a977b00d589dd80f6", size = 45064 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/60/e8/c0e05e4684d13459f93d312077a9a2efbe04d59c393bc2b8802248c908d4/webcolors-24.11.1-py3-none-any.whl", hash = "sha256:515291393b4cdf0eb19c155749a096f779f7d909f7cceea072791cb9095b92e9", size = 14934 }, +] + +[[package]] +name = "webencodings" +version = "0.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/02/ae6ceac1baeda530866a85075641cec12989bd8d31af6d5ab4a3e8c92f47/webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923", size = 9721 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/24/2a3e3df732393fed8b3ebf2ec078f05546de641fe1b667ee316ec1dcf3b7/webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", size = 11774 }, +] + +[[package]] +name = "websocket-client" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e6/30/fba0d96b4b5fbf5948ed3f4681f7da2f9f64512e1d303f94b4cc174c24a5/websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da", size = 54648 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 }, +] + [[package]] name = "win32-setctime" version = "1.1.0" From b30a54f5729c0e14c80d6eb1937fa3e2acacb2b4 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 10:58:48 -0600 Subject: [PATCH 30/91] feat: update unit tests for rnaseq_preprocess --- tests/fixtures/collect_files.py | 93 ++++++++++++++++++++++++++++ tests/unit/test_rnaseq_preprocess.py | 91 +++++++++++++++++++++++++++ 2 files changed, 184 insertions(+) create mode 100644 tests/fixtures/collect_files.py create mode 100644 tests/unit/test_rnaseq_preprocess.py diff --git a/tests/fixtures/collect_files.py b/tests/fixtures/collect_files.py new file mode 100644 index 00000000..6dcefb7e --- /dev/null +++ b/tests/fixtures/collect_files.py @@ -0,0 +1,93 @@ +from pathlib import Path +from typing import NamedTuple + +import pytest +from _pytest.fixtures import SubRequest + +_fragment_size_filepaths = list(Path("main/data/COMO_input").absolute().rglob("*fragment_size*.txt")) +_gene_count_filepaths = list(Path("main/data/COMO_input").absolute().rglob("*.tab")) +_insert_size_filepaths = list(Path("main/data/COMO_input").absolute().rglob("*_insert_size.txt")) +_layout_filepaths = list(Path("main/data/COMO_input").absolute().rglob("*_layout.txt")) +_preparation_method_filepaths = list(Path("main/data/COMO_input").absolute().rglob("*_prep_method.txt")) +_strandedness_filepaths = list(Path("main/data/COMO_input").absolute().rglob("*_strandedness.txt")) + + +class PackedFilepaths(NamedTuple): + sample_name: str + fragment_size: Path + gene_count: Path + insert_size: Path + layout: Path + preparation_method: Path + strandedness: Path + + +@pytest.fixture(params=_fragment_size_filepaths) +def fragment_size_filepath(request: SubRequest) -> Path: + return request.param + + +@pytest.fixture(params=_gene_count_filepaths) +def gene_count_filepath(request: SubRequest) -> Path: + return request.param + + +@pytest.fixture +def all_gene_count_filepaths() -> list[Path]: + return _gene_count_filepaths + + +@pytest.fixture(params=_insert_size_filepaths) +def insert_size_filepath(request: SubRequest) -> Path: + return request.param + + +@pytest.fixture(params=_layout_filepaths) +def layout_filepath(request: SubRequest) -> Path: + return request.param + + +@pytest.fixture(params=_preparation_method_filepaths) +def prep_method_filepath(request: SubRequest) -> Path: + return request.param + + +@pytest.fixture(params=_strandedness_filepaths) +def strand_filepath(request: SubRequest) -> Path: + return request.param + + +@pytest.fixture( + params=[ + file + for filepaths in [ + _fragment_size_filepaths, + _gene_count_filepaths, + _insert_size_filepaths, + _layout_filepaths, + _preparation_method_filepaths, + _strandedness_filepaths, + ] + for file in filepaths + ] +) +def any_como_input_filepath(request: SubRequest) -> Path: + return request.param + + +@pytest.fixture(params=[Path("main/data/COMO_input/naiveB").absolute(), Path("main/data/COMO_input/smB").absolute()]) +def como_input_data_directory(request: SubRequest) -> Path: + return request.param + + +@pytest.fixture(params=["naiveB", "smB"]) +def packed_filepaths(sample_name: str) -> PackedFilepaths: + return PackedFilepaths( + sample_name=sample_name, + fragment_size=Path(f"main/data/COMO_input/{sample_name}/fragmentSizes/{sample_name}_fragment_size.txt"), + gene_count=Path(f"main/data/COMO_input/{sample_name}/geneCounts/{sample_name}.tab"), + insert_size=Path(f"main/data/COMO_input/{sample_name}/insertSizes/{sample_name}_insert_size.txt"), + layout=Path(f"main/data/COMO_input/{sample_name}/layouts/{sample_name}_layout.txt"), + preparation_method=Path(f"main/data/COMO_input/{sample_name}/prepMethods/{sample_name}_prep_method.txt"), + strandedness=Path(f"main/data/COMO_input/{sample_name}/strandedness/{sample_name}_strandedness.txt"), + ) diff --git a/tests/unit/test_rnaseq_preprocess.py b/tests/unit/test_rnaseq_preprocess.py new file mode 100644 index 00000000..9483689a --- /dev/null +++ b/tests/unit/test_rnaseq_preprocess.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +from pathlib import Path + +import pandas as pd +import pytest +from como.rnaseq_preprocess import ( + _organize_gene_counts_files, + _process_first_multirun_sample, + _process_standard_replicate, + _sample_name_from_filepath, + _STARinformation, + _StudyMetrics, +) + +from tests.fixtures.collect_files import ( + PackedFilepaths, + all_gene_count_filepaths, + any_como_input_filepath, + como_input_data_directory, + packed_filepaths, + strand_filepath, +) + + +class TestSTARInformation: + valid_data = Path("main/data/COMO_input/naiveB/geneCounts/S1/naiveB_S1R1.tab").absolute() + invalid_data = Path("main/data/COMO_input/naiveB/fragmentSizes/S1/naiveB_S1R1_fragment_size.txt").absolute() + + @pytest.mark.asyncio + async def test_build_from_tab_valid_file(self): + """Validate building STAR information object.""" + star: _STARinformation = await _STARinformation.build_from_tab(TestSTARInformation.valid_data) + assert len(star.gene_names) == len(star.count_matrix) == 61541 + assert len(star.num_unmapped) == 3 + assert len(star.num_multimapping) == 3 + assert len(star.num_no_feature) == 3 + assert len(star.num_ambiguous) == 3 + + @pytest.mark.asyncio + async def test_build_from_tab_invalid_file(self): + """Validate error on invalid file.""" + with pytest.raises(ValueError, match="Invalid file format"): + await _STARinformation.build_from_tab(TestSTARInformation.invalid_data) + + +def test_sample_name_from_filepath(any_como_input_filepath: Path): # noqa: F811 + expected = "_".join(any_como_input_filepath.stem.split("_")[:2]) + assert _sample_name_from_filepath(any_como_input_filepath) == expected + + +def test_organize_gene_counts_files(como_input_data_directory: Path): # noqa: F811 + metrics: list[_StudyMetrics] = _organize_gene_counts_files(como_input_data_directory) + for metric in metrics: + assert len(metric.sample_names) == metric.num_samples == len(metric.count_files) == len(metric.strand_files) + + for file in metric.count_files: + assert f"/{metric.study_name}/" in file.as_posix() + assert "geneCounts" in file.as_posix() + assert file.suffix == ".tab" + + for file in metric.strand_files: + assert f"/{metric.study_name}/" in file.as_posix() + assert "strandedness" in file.as_posix() + assert file.suffix == ".txt" + + +@pytest.mark.asyncio +async def test_process_first_multirun_sample(strand_filepath: Path, all_gene_count_filepaths: list[Path]): # noqa: F811 + result: pd.DataFrame = await _process_first_multirun_sample(strand_filepath, all_gene_count_filepaths) + assert result.columns[0] == "ensembl_gene_id" + assert len(result.columns) == 2 + assert result.columns[1] in strand_filepath.as_posix() + + +def test_pack_filepaths(packed_filepaths: PackedFilepaths): + assert packed_filepaths.sample_name in packed_filepaths.fragment_size.as_posix() + assert packed_filepaths.sample_name in packed_filepaths.gene_count.as_posix() + assert packed_filepaths.sample_name in packed_filepaths.insert_size.as_posix() + assert packed_filepaths.sample_name in packed_filepaths.layout.as_posix() + assert packed_filepaths.sample_name in packed_filepaths.preparation_method.as_posix() + assert packed_filepaths.sample_name in packed_filepaths.strandedness.as_posix() + + +@pytest.mark.asyncio +async def test_process_standard_replicate(packed_filepaths: PackedFilepaths): + await _process_standard_replicate( + counts_file=packed_filepaths.gene_count, + strand_file=packed_filepaths.strandedness, + sample_name=packed_filepaths.sample_name, + ) From 2d78d0bb74eddc922825ba9561b3dfc6757e837b Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 11:29:42 -0600 Subject: [PATCH 31/91] feat: allow long lines in jupyter notebook --- ruff.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ruff.toml b/ruff.toml index 455091e3..21802c50 100644 --- a/ruff.toml +++ b/ruff.toml @@ -48,3 +48,6 @@ ignore = [ "D103", # allow undocumented public method definitions "S101", # allow use of `assert` in test files ] +"main/COMO.ipynb" = [ + "E501", # allow long lines +] From 7768e277bceec615ccfcb31f7453fa73befdfdcc Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 11:29:51 -0600 Subject: [PATCH 32/91] style: ruff formatting --- main/COMO.ipynb | 321 +++++++++++++++++++----------------------------- 1 file changed, 124 insertions(+), 197 deletions(-) diff --git a/main/COMO.ipynb b/main/COMO.ipynb index 8178cd67..d361f9b2 100644 --- a/main/COMO.ipynb +++ b/main/COMO.ipynb @@ -235,27 +235,66 @@ ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], - "execution_count": null, + "execution_count": 1, + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-07T03:30:27.253112Z", + "start_time": "2024-12-07T03:30:27.236557Z" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001B[32m2024-12-06 23:12:10\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m629\u001B[0m - \u001B[1mTEST\u001B[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001B[32m2024-12-06 23:12:11\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m274\u001B[0m - \u001B[32m\u001B[1mWrote gene count matrix for 'total' RNA at '/Users/joshl/Projects/COMO/main/data/results/naiveB/total-rna/totalrna_naiveB.csv'\u001B[0m\n", + "\u001B[32m2024-12-06 23:12:11\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m274\u001B[0m - \u001B[32m\u001B[1mWrote gene count matrix for 'polya' RNA at '/Users/joshl/Projects/COMO/main/data/results/naiveB/polya-rna/polyarna_naiveB.csv'\u001B[0m\n", + "\u001B[32m2024-12-06 23:12:11\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m451\u001B[0m - \u001B[1mFetching gene info (this may take 1-5 minutes)\u001B[0m\n", + "\u001B[32m2024-12-06 23:13:04\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m488\u001B[0m - \u001B[32m\u001B[1mGene Info file written at '/Users/joshl/Projects/COMO/main/data/results/naiveB/gene_info.csv'\u001B[0m\n" + ] + } + ], "source": [ - "context_names = \"naiveB\"\n", - "taxon_id = \"human\" # accepts integer (bioDBnet taxon id) or \"human\" or \"mouse\"\n", - "preprocess_mode = \"create\" # \"create\" or \"provide\"\n", - "\n", - "# fmt: off\n", - "cmd = \" \".join(\n", - " [\n", - " \"python3\", \"como/rnaseq_preprocess.py\",\n", - " \"--context-names\", context_names,\n", - " \"--taxon-id\", taxon_id,\n", - " \"--mode\", preprocess_mode,\n", - " ]\n", - ")\n", - "# fmt: on\n", + "from pathlib import Path\n", "\n", - "!{cmd}" + "from como.rnaseq_preprocess import rnaseq_preprocess\n", + "from como.types import RNAPrepMethod\n", + "\n", + "context_names = [\"naiveB\"]\n", + "output_gene_info_filepaths = [Path(f\"data/results/{context}/gene_info.csv\") for context in context_names]\n", + "como_context_dirs = [Path(f\"data/COMO_input/{context}\") for context in context_names]\n", + "output_trna_filepaths = [Path(f\"data/results/{context}/total-rna/totalrna_{context}.csv\") for context in context_names]\n", + "output_polya_filepaths = [Path(f\"data/results/{context}/polya-rna/polyarna_{context}.csv\") for context in context_names]\n", + "\n", + "\n", + "for i in range(len(context_names)):\n", + " await rnaseq_preprocess(\n", + " context_name=context_names[i],\n", + " taxon=9606,\n", + " output_gene_info_filepath=output_gene_info_filepaths[i],\n", + " como_context_dir=como_context_dirs[i],\n", + " output_trna_config_filepath=Path(\"./data/config_sheets/trna_config.xlsx\"),\n", + " output_trna_count_matrix_filepath=output_trna_filepaths[i],\n", + " output_polya_config_filepath=Path(\"./data/config_sheets/mrna_config.xlsx\"),\n", + " output_polya_count_matrix_filepath=output_polya_filepaths[i],\n", + " cache=True,\n", + " log_level=\"INFO\",\n", + " )" ] }, { @@ -329,10 +368,10 @@ ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# step 2.2 RNA-seq Analysis for Total RNA-seq library preparation\n", "\n", @@ -385,10 +424,10 @@ ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "mrnaseq_config_file = \"mrnaseq_data_inputs_auto.xlsx\"\n", "rep_ratio = 0.75\n", @@ -439,10 +478,10 @@ ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "scrnaseq_config_file = \"scrnaseq_data_inputs_auto.xlsx\"\n", "rep_ratio = 0.75\n", @@ -627,10 +666,10 @@ ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "expression_requirement = 3\n", "requirement_adjust = \"regressive\"\n", @@ -730,125 +769,16 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/Users/joshl/Projects/COMO/.venv/lib/python3.10/site-packages/cobamp/wrappers/external_wrappers.py:9: UserWarning: \n", - "The wrappers.external_wrappers module will be deprecated in a future release in favour of the wrappers module. \n", - " Available ModelObjectReader classes can still be loaded using cobamp.wrappers.. An appropriate model \n", - " reader can also be created using the get_model_reader function on cobamp.wrappers\n", - " warnings.warn(\n", - "\u001B[32m2024-12-04 10:27:00.656\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36m__main__\u001B[0m:\u001B[36mcreate_context_specific_model\u001B[0m:\u001B[36m654\u001B[0m - \u001B[1mCreating 'naiveB' model using 'IMAT' reconstruction and 'GUROBI' solver\u001B[0m\n", - "\u001B[32m2024-12-04 10:27:00.656\u001B[0m | \u001B[33m\u001B[1mWARNING \u001B[0m | \u001B[36mcomo.project\u001B[0m:\u001B[36m__init__\u001B[0m:\u001B[36m30\u001B[0m - \u001B[33m\u001B[1m'data_dir' not provided to Config, using /Users/joshl/Projects/COMO/main/data\u001B[0m\n", - "\u001B[32m2024-12-04 10:27:00.656\u001B[0m | \u001B[33m\u001B[1mWARNING \u001B[0m | \u001B[36mcomo.project\u001B[0m:\u001B[36m__init__\u001B[0m:\u001B[36m36\u001B[0m - \u001B[33m\u001B[1m'config_dir' not provided to Config, using /Users/joshl/Projects/COMO/main/data/config_sheets\u001B[0m\n", - "\u001B[32m2024-12-04 10:27:00.656\u001B[0m | \u001B[33m\u001B[1mWARNING \u001B[0m | \u001B[36mcomo.project\u001B[0m:\u001B[36m__init__\u001B[0m:\u001B[36m42\u001B[0m - \u001B[33m\u001B[1m'results_dir' not provided to Config, using /Users/joshl/Projects/COMO/main/data/results\u001B[0m\n", - "Set parameter WLSAccessID\n", - "Set parameter WLSSecret\n", - "Set parameter LicenseID to value 898845\n", - "Academic license 898845 - for non-commercial use only - registered to jl___@huskers.unl.edu\n", - "No defined compartments in model GeneralModelUpdatedV2. Compartments will be deduced heuristically using regular expressions.\n", - "Using regular expression found the following compartments:c, e, g, i, l, m, n, r, x\n", - "/Users/joshl/Projects/COMO/main/como/utils.py:154: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '['1' '10' '100' ... '9992' '9993' '9997']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.\n", - " expression_data.loc[:, \"entrez_gene_id\"] = expression_data[\"entrez_gene_id\"].astype(str)\n", - "\u001B[32m2024-12-04 10:27:04.957\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36m__main__\u001B[0m:\u001B[36m_map_expression_to_reaction\u001B[0m:\u001B[36m391\u001B[0m - \u001B[1mMapped gene expression to reactions, found 2806 error(s).\u001B[0m\n", - "\u001B[32m2024-12-04 10:27:04.959\u001B[0m | \u001B[33m\u001B[1mWARNING \u001B[0m | \u001B[36m__main__\u001B[0m:\u001B[36m_build_model\u001B[0m:\u001B[36m468\u001B[0m - \u001B[33m\u001B[1mThe force reaction 'ICDH_m' was not found in the general reference_model. Check BiGG, or the relevant database for your general reference_model, for synonyms.\u001B[0m\n", - "\u001B[32m2024-12-04 10:27:04.959\u001B[0m | \u001B[33m\u001B[1mWARNING \u001B[0m | \u001B[36m__main__\u001B[0m:\u001B[36m_build_model\u001B[0m:\u001B[36m468\u001B[0m - \u001B[33m\u001B[1mThe force reaction 'CATC140_c' was not found in the general reference_model. Check BiGG, or the relevant database for your general reference_model, for synonyms.\u001B[0m\n", - "Read LP format model from file /var/folders/6s/9l9z74v90tn8lprrp7fchb9w0000gn/T/tmprdfbwfb2.lp\n", - "Reading time = 0.02 seconds\n", - ": 5837 rows, 21220 columns, 81018 nonzeros\n", - "/Users/joshl/Projects/COMO/.venv/lib/python3.10/site-packages/cobra/core/group.py:147: UserWarning: need to pass in a list\n", - " warn(\"need to pass in a list\")\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "Error encountered trying to >.\n", - "LibSBML error code -3: The requested action could not be performed. This can occur in a variety of contexts, such as passing a null object as a parameter in a situation where it does not make sense to permit a null object.\n", - "\u001B[32m2024-12-04 10:27:45.905\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36m__main__\u001B[0m:\u001B[36mcreate_context_specific_model\u001B[0m:\u001B[36m690\u001B[0m - \u001B[32m\u001B[1mSaved output file to /Users/joshl/Projects/COMO/main/data/results/naiveB\u001B[0m\n", - "\u001B[32m2024-12-04 10:27:45.905\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36m__main__\u001B[0m:\u001B[36mcreate_context_specific_model\u001B[0m:\u001B[36m691\u001B[0m - \u001B[1mNumber of Genes: 1,477\u001B[0m\n", - "\u001B[32m2024-12-04 10:27:45.905\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36m__main__\u001B[0m:\u001B[36mcreate_context_specific_model\u001B[0m:\u001B[36m692\u001B[0m - \u001B[1mNumber of Metabolites: 3,204\u001B[0m\n", - "\u001B[32m2024-12-04 10:27:45.905\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36m__main__\u001B[0m:\u001B[36mcreate_context_specific_model\u001B[0m:\u001B[36m693\u001B[0m - \u001B[1mNumber of Reactions: 5,263\u001B[0m\n", - "Warning: environment still referenced so free is deferred (Continue to use WLS)\n" - ] - } - ], + "outputs": [], "source": [ + "import json\n", + "import os\n", + "from pathlib import Path\n", + "\n", + "from como.project import Config\n", + "\n", "# Set your objectives before running!\n", "objective_dict = {\"naiveB\": \"biomass_maintenance\", \"smB\": \"biomass_maintenance\"}\n", "# -----------------\n", @@ -860,34 +790,28 @@ "recon_algorithms = [\"IMAT\"]\n", "solver = \"GUROBI\"\n", "\n", - "import json\n", - "import os\n", - "from pathlib import Path\n", - "\n", - "from como.project import Config\n", - "\n", "config = Config()\n", "\n", "# Load the output of step 1, which is a dictionary that specifies the merged list of active Gene IDs for each tissue\n", - "step1_results_file = os.path.join(config.data_dir, \"results\", \"step1_results_files.json\")\n", - "with open(step1_results_file) as json_file:\n", + "step1_results_file = config.data_dir / \"results\" \"step1_results_files.json\"\n", + "with step1_results_file.open(\"r\") as json_file:\n", " context_gene_exp = json.load(json_file)\n", "\n", "for recon_algorithm in recon_algorithms:\n", - " for context in context_gene_exp.keys():\n", + " for context in context_gene_exp:\n", " objective = objective_dict[context]\n", "\n", " if recon_algorithm.upper() in [\"IMAT\", \"TINIT\"]:\n", - " active_genes_filepath = os.path.join(config.data_dir, \"results\", context, f\"model_scores_{context}.csv\")\n", + " active_genes_filepath = config.data_dir / \"results\" / context / f\"model_scores_{context}.csv\"\n", " else:\n", " gene_expression_file = context_gene_exp[context]\n", " active_genes_filename = Path(gene_expression_file).name\n", - " active_genes_filepath = os.path.join(config.data_dir, \"results\", context, active_genes_filename)\n", + " active_genes_filepath = config.data_dir / \"results\" / context / active_genes_filename\n", "\n", - " general_model_filepath = os.path.join(config.data_dir, \"GeneralModelUpdatedV2.mat\")\n", - " boundary_reactions_filepath = os.path.join(config.data_dir, \"boundary_rxns\", f\"{context}_boundary_rxns.csv\")\n", - " force_reactions_filepath = os.path.join(config.data_dir, \"force_rxns\", f\"{context}_force_rxns.csv\")\n", - " exclude_reactions_filepath = os.path.join(config.data_dir, \"exclude_rxns\", f\"{context}_exclude_rxns.csv\")\n", + " general_model_filepath = config.data_dir / \"GeneralModelUpdatedV2.mat\"\n", + " boundary_reactions_filepath = config.data_dir / \"boundary_rxns\" / f\"{context}_boundary_rxns.csv\"\n", + " force_reactions_filepath = config.data_dir / \"force_rxns\" / f\"{context}_force_rxns.csv\"\n", + " exclude_reactions_filepath = config.data_dir / \"exclude_rxns\" / f\"{context}_exclude_rxns.csv\"\n", "\n", " # fmt: off\n", " cmd = \" \".join(\n", @@ -941,16 +865,14 @@ "metadata": {}, "outputs": [], "source": [ - "import os\n", - "from pathlib import Path\n", - "\n", "import cobra\n", "from como.project import Config\n", "from escher import Builder\n", + "from loguru import logger\n", "\n", "config = Config()\n", "\n", - "user_map_dir = Path(f\"{config.data_dir}/local_files/maps/\")\n", + "user_map_dir = config.data_dir / \"local_files\" / \"maps\"\n", "map_dict = {\n", " \"trypto\": f\"{config.data_dir}/maps/RECON1/RECON1.tryptophan_metabolism.json\",\n", " # \"lipid\": f\"{config.data_dir}/maps/RECON1/RECON1.\", # Not present in COMO by default yet\n", @@ -968,20 +890,18 @@ " index += 1\n", "\n", "# Collect any additional maps under the `{config.data_dir}/maps/` directory\n", - "for file in Path(f\"{config.data_dir}/maps\").glob(\"**/*.json\"):\n", + "for file in (config.data_dir / \"maps\").glob(\"**/*.json\"):\n", " if file not in map_dict.values():\n", " map_dict[file.stem] = file\n", "\n", "for recon_algorithm in recon_algorithms:\n", - " for context in context_gene_exp.keys():\n", - " # for context in [\"naiveB\", \"smB\"]:\n", - " print(f\"Starting {context}\")\n", - " model_json = os.path.join(config.data_dir, \"results\", context, f\"{context}_SpecificModel_{recon_algorithm}.json\")\n", + " for context in context_gene_exp:\n", + " model_json = config.data_dir / \"results\" / context / f\"{context}_SpecificModel_{recon_algorithm}.json\"\n", "\n", - " print(f\"Loading '{context}', this may take some time...\")\n", + " logger.info(f\"Loading '{context}', this may take some time...\")\n", " model = cobra.io.load_json_model(model_json)\n", - " for key in map_dict.keys():\n", - " print(f\"Running with: {key}\")\n", + " for key in map_dict:\n", + " logger.info(f\"Running with: {key}\")\n", " builder = Builder(map_json=str(map_dict[key]))\n", " builder.model = model\n", " solution = cobra.flux_analysis.pfba(model)\n", @@ -995,19 +915,22 @@ " ]\n", " builder.reaction_no_data_color = \"#8e8e8e\"\n", "\n", - " builder.save_html(os.path.join(config.data_dir, \"results\", context, \"figures\", f\"{key}_map_{context}_{recon_algorithm}.html\"))\n", + " builder.save_html(\n", + " config.data_dir / \"results\" / context / \"figures\" / f\"{key}_map_{context}_{recon_algorithm}.html\"\n", + " )\n", "\n", - " out_dir = os.path.join(config.data_dir, \"results\", context)\n", - " # for algorithm in [\"GIMME\", \"IMAT\", \"FASTCORE\", \"tINIT\"]:\n", - " report_file = os.path.join(out_dir, f\"memote_report_{context}_{recon_algorithm}.html\")\n", - " model_file = os.path.join(out_dir, f\"{context}_SpecificModel_{recon_algorithm}.xml\")\n", - " log_dir = os.path.join(out_dir, \"memote\")\n", - " log_file = os.path.join(log_dir, f\"{context}_{recon_algorithm}_memote.log\")\n", + " out_dir = config.data_dir / \"results\" / context\n", + " report_file = out_dir / f\"memote_report_{context}_{recon_algorithm}.html\"\n", + " model_file = out_dir / f\"{context}_SpecificModel_{recon_algorithm}.xml\"\n", + " log_dir = out_dir / \"memote\"\n", + " log_file = log_dir / f\"{context}_{recon_algorithm}_memote.log\"\n", "\n", - " if not os.path.exists(log_dir):\n", - " os.mkdir(log_dir)\n", + " if not log_dir.exists():\n", + " log_dir.mkdir(parents=True, exist_ok=True)\n", "\n", - " cmd = \" \".join([\"memote\", \"report\", \"snapshot\", \"--filename\", f\"{report_file}\", f\"{model_file}\", \">\", f\"{log_file}\"])\n", + " cmd = \" \".join(\n", + " [\"memote\", \"report\", \"snapshot\", \"--filename\", f\"{report_file}\", f\"{model_file}\", \">\", f\"{log_file}\"]\n", + " )\n", "\n", " !{cmd}" ] @@ -1036,11 +959,12 @@ "metadata": {}, "outputs": [], "source": [ + "from como.utils import stringlist_to_list\n", + "\n", "disease_names = [\"arthritis\", \"lupus_a\", \"lupus_b\"]\n", "data_source = \"rnaseq\"\n", "taxon_id = \"human\"\n", "\n", - "from como.utils import stringlist_to_list\n", "\n", "for context_name in stringlist_to_list(context_names):\n", " disease_config_file = f\"disease_data_inputs_{context_name}.xlsx\"\n", @@ -1100,6 +1024,12 @@ "metadata": {}, "outputs": [], "source": [ + "import json\n", + "\n", + "from como.project import Config\n", + "from como.utils import stringlist_to_list\n", + "from loguru import logger\n", + "\n", "# Knock out simulation for the analyzed tissues and diseases\n", "model_files = {\n", " # \"context_name\": \"/path/to/model.mat\"\n", @@ -1107,11 +1037,6 @@ "}\n", "sovler = \"gurobi\"\n", "\n", - "import json\n", - "import os\n", - "\n", - "from como.utils import stringlist_to_list\n", - "from como.project import Config\n", "\n", "config = Config()\n", "\n", @@ -1119,27 +1044,29 @@ "for context in stringlist_to_list(context_names):\n", " for recon_algorithm in recon_algorithms:\n", " for disease in disease_names:\n", - " disease_path = os.path.join(config.data_dir, \"results\", context, disease)\n", - " out_dir = os.path.join(config.data_dir, \"results\", context, disease)\n", - " tissue_gene_folder = os.path.join(config.data_dir, context)\n", - " os.makedirs(tissue_gene_folder, exist_ok=True)\n", + " disease_path = config.data_dir / \"results\" / context / disease\n", + " out_dir = config.data_dir / \"results\" / context / disease\n", + " tissue_gene_folder = config.data_dir / context\n", + " tissue_gene_folder.mkdir(parents=True, exist_ok=True)\n", "\n", - " if not os.path.exists(disease_path):\n", - " print(f\"Disease path doesn't exist! Looking for {disease_path}\")\n", + " if not disease_path.exists():\n", + " logger.warning(f\"Disease path doesn't exist! Looking for {disease_path}\")\n", " continue\n", "\n", " # load the results of step 3 to dictionary \"disease_files\"\n", - " step3_results_file = os.path.join(config.data_dir, \"results\", context, disease, \"step2_results_files.json\")\n", + " step3_results_file = config.data_dir / \"results\" / context / disease / \"step2_results_files.json\"\n", "\n", - " with open(step3_results_file) as json_file:\n", + " with step3_results_file.open(\"r\") as json_file:\n", " disease_files = json.load(json_file)\n", " down_regulated_disease_genes = disease_files[\"down_regulated\"]\n", " up_regulated_disease_genes = disease_files[\"up_regulated\"]\n", "\n", - " if context in model_files.keys():\n", + " if context in model_files:\n", " tissue_specific_model_filepath = model_files[context]\n", " else:\n", - " tissue_specific_model_filepath = os.path.join(config.data_dir, \"results\", context, f\"{context}_SpecificModel_{recon_algorithm}.mat\")\n", + " tissue_specific_model_filepath = (\n", + " config.data_dir / \"results\" / context / f\"{context}_SpecificModel_{recon_algorithm}.mat\"\n", + " )\n", "\n", " # fmt: off\n", " cmd = [\n", @@ -1156,7 +1083,7 @@ " # fmt: on\n", "\n", " if recon_algorithm == \"IMAT\":\n", - " cmd.extend([\"--reference-flux-file\", os.path.join(config.data_dir, \"results\", context, \"IMAT_flux.csv\")])\n", + " cmd.extend([\"--reference-flux-file\", config.data_dir / \"results\" / context / \"IMAT_flux.csv\"])\n", "\n", " cmd = \" \".join(cmd)\n", " !{cmd}" From eeef8918b30d6abbbed232d85bed3ef47290bc80 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 11:32:12 -0600 Subject: [PATCH 33/91] fix: ignore F811 --- ruff.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/ruff.toml b/ruff.toml index 21802c50..3691008f 100644 --- a/ruff.toml +++ b/ruff.toml @@ -46,6 +46,7 @@ ignore = [ "D101", # allow undocumented public class "D102", # allow undocumented class method "D103", # allow undocumented public method definitions + "F811", # allow redefinition of variables, required for pytest fixtures "S101", # allow use of `assert` in test files ] "main/COMO.ipynb" = [ From 9b5325ef7ab86238f7a9e99ea2140a2d28b45861 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 11:33:55 -0600 Subject: [PATCH 34/91] fix: duplicate noqa directive --- tests/test_proteomics.py | 2 +- tests/unit/test_rnaseq_preprocess.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_proteomics.py b/tests/test_proteomics.py index d3c6ad66..0f38be27 100644 --- a/tests/test_proteomics.py +++ b/tests/test_proteomics.py @@ -77,7 +77,7 @@ async def test_ftp_client(self): assert await client.quit() is None @pytest.mark.skip(reason="pyftpdlib is broken, no way to test this") - def test_reader(self, ftpserver, fixture_ftp_server, ftp_file_names): # noqa: F811 + def test_reader(self, ftpserver, fixture_ftp_server, ftp_file_names): # Use pytest_localftpserver and fixtures.fixture_ftp_server.fix # Now we can get login information for our local FTP server file_extensions: list[str] = ["raw"] diff --git a/tests/unit/test_rnaseq_preprocess.py b/tests/unit/test_rnaseq_preprocess.py index 9483689a..8347b790 100644 --- a/tests/unit/test_rnaseq_preprocess.py +++ b/tests/unit/test_rnaseq_preprocess.py @@ -44,12 +44,12 @@ async def test_build_from_tab_invalid_file(self): await _STARinformation.build_from_tab(TestSTARInformation.invalid_data) -def test_sample_name_from_filepath(any_como_input_filepath: Path): # noqa: F811 +def test_sample_name_from_filepath(any_como_input_filepath: Path): expected = "_".join(any_como_input_filepath.stem.split("_")[:2]) assert _sample_name_from_filepath(any_como_input_filepath) == expected -def test_organize_gene_counts_files(como_input_data_directory: Path): # noqa: F811 +def test_organize_gene_counts_files(como_input_data_directory: Path): metrics: list[_StudyMetrics] = _organize_gene_counts_files(como_input_data_directory) for metric in metrics: assert len(metric.sample_names) == metric.num_samples == len(metric.count_files) == len(metric.strand_files) @@ -66,7 +66,7 @@ def test_organize_gene_counts_files(como_input_data_directory: Path): # noqa: F @pytest.mark.asyncio -async def test_process_first_multirun_sample(strand_filepath: Path, all_gene_count_filepaths: list[Path]): # noqa: F811 +async def test_process_first_multirun_sample(strand_filepath: Path, all_gene_count_filepaths: list[Path]): result: pd.DataFrame = await _process_first_multirun_sample(strand_filepath, all_gene_count_filepaths) assert result.columns[0] == "ensembl_gene_id" assert len(result.columns) == 2 From 3279563c79cf66002df6a07d3275691afc8e32e1 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 11:34:59 -0600 Subject: [PATCH 35/91] refactor: remove unused testing file --- tests/test_rnaseq_preprocess.py | 47 --------------------------------- 1 file changed, 47 deletions(-) delete mode 100644 tests/test_rnaseq_preprocess.py diff --git a/tests/test_rnaseq_preprocess.py b/tests/test_rnaseq_preprocess.py deleted file mode 100644 index 752ec333..00000000 --- a/tests/test_rnaseq_preprocess.py +++ /dev/null @@ -1,47 +0,0 @@ -from __future__ import annotations - -import sys - -import pytest -from como import rnaseq_preprocess - - -# Define a list of arguments to test -@pytest.mark.skip("Not testing args because the input 'argv' has been removed") -@pytest.mark.parametrize( - "args", - [ - # Test using data in COMO_input data - ["--context-names", "naiveB immNK", "--gene-format", "Ensembl", "--taxon-id", "9606", "--create-matrix"], - [ - "--context-names", - "dimNK brightNK", - "--gene-format", - "SYMBOL", - "--taxon-id", - "human", - "--provide-matrix", - "--matrix", - "COMO_input/counts_matrix.tsv", - ], - ], -) -def test_arg_input(args: list[str]): - """Asserts that the arguments passed into the function are correct.""" - context_names = args[1] - gene_format = args[3] - taxon_id = args[5] - matrix_mode = args[6] - - sys.argv = args - parsed = rnaseq_preprocess._parse_args() - - assert [context_name in parsed.context_names for context_name in context_names.split()] - assert parsed.gene_format == gene_format - assert parsed.taxon_id == taxon_id - - if matrix_mode == "--create-matrix": - assert parsed.make_matrix is True - elif matrix_mode == "--provide-matrix": - assert parsed.make_matrix is False - assert parsed.provided_matrix_fname == args[8] From f262624e7f153631c5f0208938352e447266aaff Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 11:55:30 -0600 Subject: [PATCH 36/91] test: update tests based on changes --- tests/fixtures/collect_files.py | 16 ++++++++-------- tests/unit/test_rnaseq_preprocess.py | 15 +++------------ 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/tests/fixtures/collect_files.py b/tests/fixtures/collect_files.py index 6dcefb7e..93d59e70 100644 --- a/tests/fixtures/collect_files.py +++ b/tests/fixtures/collect_files.py @@ -81,13 +81,13 @@ def como_input_data_directory(request: SubRequest) -> Path: @pytest.fixture(params=["naiveB", "smB"]) -def packed_filepaths(sample_name: str) -> PackedFilepaths: +def packed_filepaths(request: SubRequest) -> PackedFilepaths: return PackedFilepaths( - sample_name=sample_name, - fragment_size=Path(f"main/data/COMO_input/{sample_name}/fragmentSizes/{sample_name}_fragment_size.txt"), - gene_count=Path(f"main/data/COMO_input/{sample_name}/geneCounts/{sample_name}.tab"), - insert_size=Path(f"main/data/COMO_input/{sample_name}/insertSizes/{sample_name}_insert_size.txt"), - layout=Path(f"main/data/COMO_input/{sample_name}/layouts/{sample_name}_layout.txt"), - preparation_method=Path(f"main/data/COMO_input/{sample_name}/prepMethods/{sample_name}_prep_method.txt"), - strandedness=Path(f"main/data/COMO_input/{sample_name}/strandedness/{sample_name}_strandedness.txt"), + sample_name=request.param, + fragment_size=Path(f"main/data/COMO_input/{request.param}/fragmentSizes/{request.param}_fragment_size.txt"), + gene_count=Path(f"main/data/COMO_input/{request.param}/geneCounts/{request.param}.tab"), + insert_size=Path(f"main/data/COMO_input/{request.param}/insertSizes/{request.param}_insert_size.txt"), + layout=Path(f"main/data/COMO_input/{request.param}/layouts/{request.param}_layout.txt"), + preparation_method=Path(f"main/data/COMO_input/{request.param}/prepMethods/{request.param}_prep_method.txt"), + strandedness=Path(f"main/data/COMO_input/{request.param}/strandedness/{request.param}_strandedness.txt"), ) diff --git a/tests/unit/test_rnaseq_preprocess.py b/tests/unit/test_rnaseq_preprocess.py index 8347b790..c0d49c2e 100644 --- a/tests/unit/test_rnaseq_preprocess.py +++ b/tests/unit/test_rnaseq_preprocess.py @@ -24,8 +24,8 @@ class TestSTARInformation: - valid_data = Path("main/data/COMO_input/naiveB/geneCounts/S1/naiveB_S1R1.tab").absolute() - invalid_data = Path("main/data/COMO_input/naiveB/fragmentSizes/S1/naiveB_S1R1_fragment_size.txt").absolute() + valid_data = Path("main/data/COMO_input/naiveB/geneCounts/S1/naiveB_S1R1.tab").resolve() + invalid_data = Path("main/data/COMO_input/naiveB/fragmentSizes/S1/naiveB_S1R1_fragment_size.txt").resolve() @pytest.mark.asyncio async def test_build_from_tab_valid_file(self): @@ -40,7 +40,7 @@ async def test_build_from_tab_valid_file(self): @pytest.mark.asyncio async def test_build_from_tab_invalid_file(self): """Validate error on invalid file.""" - with pytest.raises(ValueError, match="Invalid file format"): + with pytest.raises(ValueError, match="Building STAR information requires a '.tab' file"): await _STARinformation.build_from_tab(TestSTARInformation.invalid_data) @@ -80,12 +80,3 @@ def test_pack_filepaths(packed_filepaths: PackedFilepaths): assert packed_filepaths.sample_name in packed_filepaths.layout.as_posix() assert packed_filepaths.sample_name in packed_filepaths.preparation_method.as_posix() assert packed_filepaths.sample_name in packed_filepaths.strandedness.as_posix() - - -@pytest.mark.asyncio -async def test_process_standard_replicate(packed_filepaths: PackedFilepaths): - await _process_standard_replicate( - counts_file=packed_filepaths.gene_count, - strand_file=packed_filepaths.strandedness, - sample_name=packed_filepaths.sample_name, - ) From 02e0a3168fa5db7b7d6e57975d0b57fc7249e28d Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 11:56:17 -0600 Subject: [PATCH 37/91] fix: do not use asyncio.gather Using asyncio.gather seemed to cause a race condition where the proper number of reads was not always stored in the correct variable --- main/como/rnaseq_preprocess.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index 6d84837b..ae8bcdea 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -41,9 +41,11 @@ async def build_from_tab(cls, filepath: Path) -> _STARinformation: raise ValueError(f"Building STAR information requires a '.tab' file; received: '{filepath}'") async with aiofiles.open(filepath) as i_stream: - unmapped, multimapping, no_feature, ambiguous = await asyncio.gather( - *[i_stream.readline(), i_stream.readline(), i_stream.readline(), i_stream.readline()] - ) + unmapped = await i_stream.readline() + multimapping = await i_stream.readline() + no_feature = await i_stream.readline() + ambiguous = await i_stream.readline() + num_unmapped = [int(i) for i in unmapped.rstrip("\n").split("\t")[1:]] num_multimapping = [int(i) for i in multimapping.rstrip("\n").split("\t")[1:]] num_no_feature = [int(i) for i in no_feature.rstrip("\n").split("\t")[1:]] From 04b47f32fcd5db9f35deaa2114704ec82e8f41a4 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 12:07:46 -0600 Subject: [PATCH 38/91] fix: use proper type --- main/como/rnaseq.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/main/como/rnaseq.py b/main/como/rnaseq.py index 1c504d97..59ba8a70 100644 --- a/main/como/rnaseq.py +++ b/main/como/rnaseq.py @@ -27,9 +27,9 @@ from scipy.signal import find_peaks from sklearn.neighbors import KernelDensity -from como.custom_types import RNASeqPreparationMethod from como.migrations import gene_info_migrations from como.project import Config +from como.types import RNAPrepMethod from como.utils import convert_gene_data @@ -684,7 +684,7 @@ async def save_rnaseq_tests( config_filepath: Path, gene_info_filepath: Path, output_filepath: Path, - prep: RNASeqPreparationMethod, + prep: RNAPrepMethod, taxon_id: Taxon, replicate_ratio: float, batch_ratio: float, @@ -702,7 +702,7 @@ async def save_rnaseq_tests( high_batch_ratio=high_batch_ratio, ) - if prep == RNASeqPreparationMethod.SCRNA: + if prep == RNAPrepMethod.SCRNA: technique = FilteringTechnique.umi logger.warning( "Single cell filtration does not normalize and assumes " From ad0e0074c9554887c48ac396937143b50e1f38de Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 12:08:40 -0600 Subject: [PATCH 39/91] refactor: remove dependency on Config --- main/como/rnaseq.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/main/como/rnaseq.py b/main/como/rnaseq.py index 59ba8a70..633aad6c 100644 --- a/main/como/rnaseq.py +++ b/main/como/rnaseq.py @@ -28,7 +28,6 @@ from sklearn.neighbors import KernelDensity from como.migrations import gene_info_migrations -from como.project import Config from como.types import RNAPrepMethod from como.utils import convert_gene_data @@ -528,10 +527,9 @@ def cpm_filter( context_name: str, metrics: NamedMetrics, filtering_options: _FilteringOptions, - prep: RNASeqPreparationMethod, + output_csv_filepath: Path, ) -> NamedMetrics: """Apply Counts Per Million (CPM) filtering to the count matrix for a given sample.""" - config = Config() n_exp = filtering_options.replicate_ratio n_top = filtering_options.high_replicate_ratio cut_off = filtering_options.cut_off @@ -548,12 +546,11 @@ def cpm_filter( # thus, (0 / 1) * 1_000_000 = 0 library_size[library_size == 0] = 1 - output_filepath = config.result_dir / context_name / prep.value / f"CPM_Matrix_{prep.value}_{sample}.csv" - output_filepath.parent.mkdir(parents=True, exist_ok=True) + output_csv_filepath.parent.mkdir(parents=True, exist_ok=True) counts_per_million: pd.DataFrame = (counts / library_size) * 1_000_000 counts_per_million.insert(0, "entrez_gene_ids", pd.Series(entrez_ids)) - logger.debug(f"Writing CPM matrix to {output_filepath}") - counts_per_million.to_csv(output_filepath, index=False) + logger.debug(f"Writing CPM matrix to {output_csv_filepath}") + counts_per_million.to_csv(output_csv_filepath, index=False) # TODO: Counts per million is adding ~61,500 columns (equal to the number of genes) for some reason. # Most likely due to multiplying by 1_000_000, not exactly sure why From c2dce72ce23883981e6a1349c38ca10d5ff1fde5 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 12:08:53 -0600 Subject: [PATCH 40/91] style: ruff formatting --- main/como/rnaseq.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/main/como/rnaseq.py b/main/como/rnaseq.py index 633aad6c..82f09c5b 100644 --- a/main/como/rnaseq.py +++ b/main/como/rnaseq.py @@ -524,7 +524,6 @@ def calculate_z_score(metrics: NamedMetrics) -> NamedMetrics: def cpm_filter( *, - context_name: str, metrics: NamedMetrics, filtering_options: _FilteringOptions, output_csv_filepath: Path, @@ -534,9 +533,8 @@ def cpm_filter( n_top = filtering_options.high_replicate_ratio cut_off = filtering_options.cut_off - sample: str metric: _StudyMetrics - for sample, metric in metrics.items(): + for metric in metrics.values(): counts: pd.DataFrame = metric.count_matrix entrez_ids: list[str] = metric.entrez_gene_ids library_size: pd.DataFrame = counts.sum(axis=1) From 78fcf668efc0fa922305dd4995c1dab564d6565c Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 12:09:09 -0600 Subject: [PATCH 41/91] refactor: require output filepath for CPM --- main/como/rnaseq.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/main/como/rnaseq.py b/main/como/rnaseq.py index 82f09c5b..c8ab4e4a 100644 --- a/main/como/rnaseq.py +++ b/main/como/rnaseq.py @@ -651,24 +651,31 @@ def zfpkm_filter(*, metrics: NamedMetrics, filtering_options: _FilteringOptions, def filter_counts( *, - context_name: str, metrics: NamedMetrics, technique: FilteringTechnique, filtering_options: _FilteringOptions, - prep: RNASeqPreparationMethod, + cpm_output_filepath: Path | None = None, ) -> NamedMetrics: """Filter the count matrix based on the specified technique.""" match technique: case FilteringTechnique.cpm: + if cpm_output_filepath is None: + raise ValueError("CPM output filepath must be provided") return cpm_filter( - context_name=context_name, metrics=metrics, filtering_options=filtering_options, prep=prep + metrics=metrics, + filtering_options=filtering_options, + output_csv_filepath=cpm_output_filepath, ) + case FilteringTechnique.tpm: return tpm_quantile_filter(metrics=metrics, filtering_options=filtering_options) + case FilteringTechnique.zfpkm: return zfpkm_filter(metrics=metrics, filtering_options=filtering_options, calcualte_fpkm=True) + case FilteringTechnique.umi: return zfpkm_filter(metrics=metrics, filtering_options=filtering_options, calcualte_fpkm=False) + case _: raise ValueError(f"Technique must be one of {FilteringTechnique}") @@ -716,11 +723,9 @@ async def save_rnaseq_tests( entrez_gene_ids = read_counts_results.entrez_gene_ids metrics = filter_counts( - context_name=context_name, metrics=metrics, technique=technique, filtering_options=filtering_options, - prep=prep, ) expressed_genes: list[str] = [] From f06311f5d30a2335a882a6674d387b54f844ea26 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 12:11:03 -0600 Subject: [PATCH 42/91] style: log message for calcualted genes --- main/como/rnaseq.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/main/como/rnaseq.py b/main/como/rnaseq.py index c8ab4e4a..cf45f39c 100644 --- a/main/como/rnaseq.py +++ b/main/como/rnaseq.py @@ -758,6 +758,7 @@ async def save_rnaseq_tests( boolean_matrix.to_csv(output_filepath, index=False) logger.info( - f"{context_name} - Found {expressed_count} expressed and {high_confidence_count} confidently expressed genes" + f"{context_name} - Found {expressed_count} expressed genes, " + f"{high_confidence_count} of which are confidently expressed" ) logger.success(f"Wrote boolean matrix to {output_filepath}") From 355b9df052fc0c68884687c063ccf0db27b9ad59 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:09:03 -0600 Subject: [PATCH 43/91] refactor: remove rnaseq.py This moves all components of rnaseq.py into rnaseq_gen.py --- main/como/rnaseq.py | 764 -------------------------------------------- 1 file changed, 764 deletions(-) delete mode 100644 main/como/rnaseq.py diff --git a/main/como/rnaseq.py b/main/como/rnaseq.py deleted file mode 100644 index cf45f39c..00000000 --- a/main/como/rnaseq.py +++ /dev/null @@ -1,764 +0,0 @@ -from __future__ import annotations - -import gc -import math -import multiprocessing -import time -from collections import namedtuple -from dataclasses import dataclass, field -from enum import Enum -from functools import partial -from multiprocessing.pool import Pool -from pathlib import Path -from typing import Callable, NamedTuple - -import numpy as np -import numpy.typing as npt -import pandas as pd -import plotly.graph_objs as go -import scanpy as sc -import sklearn -import sklearn.neighbors -from fast_bioservices import Taxon -from fast_bioservices.pipeline import ensembl_to_gene_id_and_symbol -from loguru import logger -from pandas import DataFrame -from plotly.subplots import make_subplots -from scipy.signal import find_peaks -from sklearn.neighbors import KernelDensity - -from como.migrations import gene_info_migrations -from como.types import RNAPrepMethod -from como.utils import convert_gene_data - - -class _FilteringOptions(NamedTuple): - replicate_ratio: float - batch_ratio: float - cut_off: float - high_replicate_ratio: float - high_batch_ratio: float - - -class FilteringTechnique(Enum): - """RNA sequencing filtering capabilities.""" - - cpm = "cpm" - zfpkm = "zfpkm" - tpm = "quantile" - umi = "umi" - - @staticmethod - def from_string(value: str) -> FilteringTechnique: - """Create a filtering technique object from a string.""" - match value.lower(): - case "cpm": - return FilteringTechnique.cpm - case "zfpkm": - return FilteringTechnique.zfpkm - case "quantile": - return FilteringTechnique.tpm - case "umi": - return FilteringTechnique.umi - case _: - possible_values = [t.value for t in FilteringTechnique] - raise ValueError(f"Filtering technique must be one of {possible_values}; got: {value}") - - -class LayoutMethod(Enum): - """RNA sequencing layout method.""" - - paired_end = "paired-end" - single_end = "single-end" - - -@dataclass -class _StudyMetrics: - study: str - num_samples: int - count_matrix: pd.DataFrame - fragment_lengths: npt.NDArray[np.float32] - sample_names: list[str] - layout: list[LayoutMethod] - entrez_gene_ids: list[str] - gene_sizes: npt.NDArray[np.float32] - __normalization_matrix: pd.DataFrame = field(default_factory=pd.DataFrame) - __z_score_matrix: pd.DataFrame = field(default_factory=pd.DataFrame) - __high_confidence_entrez_gene_ids: list[str] = field(default=list) - - def __post_init__(self): - for layout in self.layout: - if layout not in LayoutMethod: - raise ValueError(f"Layout must be 'paired-end' or 'single-end'; got: {layout}") - - @property - def normalization_matrix(self) -> pd.DataFrame: - return self.__normalization_matrix - - @normalization_matrix.setter - def normalization_matrix(self, value: pd.DataFrame) -> None: - self.__normalization_matrix = value - - @property - def z_score_matrix(self) -> pd.DataFrame: - return self.__z_score_matrix - - @z_score_matrix.setter - def z_score_matrix(self, value: pd.DataFrame) -> None: - self.__z_score_matrix = value - - @property - def high_confidence_entrez_gene_ids(self) -> list[str]: - return self.__high_confidence_entrez_gene_ids - - @high_confidence_entrez_gene_ids.setter - def high_confidence_entrez_gene_ids(self, values: list[str]) -> None: - self.__high_confidence_entrez_gene_ids = values - - -Density = namedtuple("Density", ["x", "y"]) - - -class _ZFPKMResult(NamedTuple): - zfpkm: pd.Series - density: Density - mu: float - std_dev: float - max_fpkm: float - - -class _ReadMatrixResults(NamedTuple): - metrics: dict[str, _StudyMetrics] - entrez_gene_ids: list[str] - - -NamedMetrics = dict[str, _StudyMetrics] - - -def k_over_a(k: int, a: float) -> Callable[[npt.NDArray], bool]: - """Return a function that filters rows of an array based on the sum of elements being greater than or equal to A at least k times. - - This code is based on the `kOverA` function found in R's `genefilter` package: https://www.rdocumentation.org/packages/genefilter/versions/1.54.2/topics/kOverA - - :param k: The minimum number of times the sum of elements must be greater than or equal to A. - :param a: The value to compare the sum of elements to. - :return: A function that accepts a NumPy array to perform the actual filtering - """ # noqa: E501 - - def filter_func(row: npt.NDArray) -> bool: - return np.sum(row >= a) >= k - - return filter_func - - -def genefilter(data: pd.DataFrame | npt.NDArray, filter_func: Callable[[npt.NDArray], bool]) -> npt.NDArray: - """Apply a filter function to the rows of the data and return the filtered array. - - This code is based on the `genefilter` function found in R's `genefilter` package: https://www.rdocumentation.org/packages/genefilter/versions/1.54.2/topics/genefilter - - :param data: The data to filter - :param filter_func: THe function to filter the data by - :return: A NumPy array of the filtered data. - """ - match type(data): - case pd.DataFrame: - return data.apply(filter_func, axis=1).values - case npt.NDArray: - return np.apply_along_axis(filter_func, axis=1, arr=data) - case _: - raise ValueError("Unsupported data type. Must be a Pandas DataFrame or a NumPy array.") - - -async def _read_counts_matrix( - *, - context_name: str, - counts_matrix_filepath: Path, - config_filepath: Path, - gene_info_filepath: Path, - taxon_id: Taxon, -) -> _ReadMatrixResults: - """Read the counts matrix and returns the results. - - :param context_name: The context name being processed. Usually a cell type, but can be any string - :param counts_matrix_filepath: The file path to the gene count matrix - :param config_filepath: The file path to the Excel configuration file - :param gene_info_filepath: The file path to gene information generated by `rnaseq_preprocess.py` - :param taxon_id: The NCBI Taxon ID - :return: A dataclass `ReadMatrixResults` - """ - logger.trace(f"Reading config_filepath at '{config_filepath}'") - config_df: pd.DataFrame = pd.read_excel(config_filepath, sheet_name=context_name, header=0) - gene_info: pd.DataFrame = pd.read_csv(gene_info_filepath) - gene_info = gene_info[gene_info["ensembl_gene_id"] != "-"].reset_index(drop=True) - gene_info = gene_info_migrations(gene_info) - - match counts_matrix_filepath.suffix: - case ".csv": - logger.debug(f"Reading CSV file at '{counts_matrix_filepath}'") - counts_matrix: pd.DataFrame = pd.read_csv(counts_matrix_filepath, header=0) - if "ensembl_gene_id" not in counts_matrix.columns: - raise ValueError( - f"Counts matrix must contain a column named 'ensembl_gene_id'. " - f"Ensure the file '{counts_matrix_filepath}' contains this column." - ) - conversion = await ensembl_to_gene_id_and_symbol( - ids=counts_matrix["ensembl_gene_id"].tolist(), taxon=taxon_id - ) - counts_matrix = counts_matrix.merge(conversion, on="ensembl_gene_id", how="left") - - case ".h5ad": - logger.debug(f"Reading h5ad file at '{counts_matrix_filepath}'") - adata: sc.AnnData = sc.read_h5ad(counts_matrix_filepath) - counts_matrix: pd.DataFrame = adata.to_df().T # Make sample names the columns and gene data the index - - # Coherce the incoming gene data (i.e., Gene Symbols) into Entrez and Ensembl Gene IDs - conversion = await convert_gene_data(counts_matrix.index.tolist(), taxon_id) - counts_matrix.index.name = conversion.index.name - counts_matrix = counts_matrix.merge(conversion, left_index=True, right_index=True) - counts_matrix = counts_matrix[counts_matrix["entrez_gene_id"] != "-"] - counts_matrix.reset_index(inplace=True) - - # explicit garbage collection because this function runs for a little while - del adata - del conversion - gc.collect() - - case _: - raise ValueError( - f"Unknown file extension '{counts_matrix_filepath.suffix}'. Valid options are '.csv' or '.h5ad'." - ) - - if counts_matrix.empty: - raise ValueError( - f"Counts matrix is empty. Ensure the file contains data. " - f"Attempted to process file '{counts_matrix_filepath}'" - ) - - # Only include Entrez and Ensembl Gene IDs that are present in `gene_info` - counts_matrix["entrez_gene_id"] = counts_matrix["entrez_gene_id"].str.split("//") - counts_matrix = counts_matrix.explode("entrez_gene_id") - counts_matrix = counts_matrix.replace(to_replace="-", value=pd.NA).dropna() - counts_matrix["entrez_gene_id"] = counts_matrix["entrez_gene_id"].astype(int) - - gene_info = gene_info.replace(to_replace="-", value=pd.NA).dropna() - gene_info["entrez_gene_id"] = gene_info["entrez_gene_id"].astype(int) - - counts_matrix = counts_matrix.merge( - gene_info[["entrez_gene_id", "ensembl_gene_id"]], - on=["entrez_gene_id", "ensembl_gene_id"], - how="inner", - ) - gene_info = gene_info.merge( - counts_matrix[["entrez_gene_id", "ensembl_gene_id"]], - on=["entrez_gene_id", "ensembl_gene_id"], - how="inner", - ) - - entrez_gene_ids: list[str] = gene_info["entrez_gene_id"].tolist() - metrics: NamedMetrics = {} - for study in config_df["study"].unique().tolist(): - study_sample_names = config_df[config_df["study"] == study]["sample_name"].tolist() - layouts = config_df[config_df["study"] == study]["layout"].tolist() - metrics[study] = _StudyMetrics( - count_matrix=counts_matrix[counts_matrix.columns.intersection(study_sample_names)], - fragment_lengths=config_df[config_df["study"] == study]["fragment_length"].values, - sample_names=study_sample_names, - layout=[LayoutMethod(layout) for layout in layouts], - num_samples=len(study_sample_names), - entrez_gene_ids=entrez_gene_ids, - gene_sizes=np.array(gene_info["size"].values).astype(np.float32), - study=study, - ) - metrics[study].fragment_lengths[np.isnan(metrics[study].fragment_lengths)] = 0 - metrics[study].count_matrix.index = pd.Index(entrez_gene_ids, name="entrez_gene_id") - - return _ReadMatrixResults(metrics=metrics, entrez_gene_ids=gene_info["entrez_gene_id"].tolist()) - - -def calculate_tpm(metrics: NamedMetrics) -> NamedMetrics: - """Calculate the Transcripts Per Million (TPM) for each sample in the metrics dictionary.""" - for sample in metrics: - count_matrix = metrics[sample].count_matrix - - gene_sizes = metrics[sample].gene_sizes - - tpm_matrix = pd.DataFrame(data=None, index=count_matrix.index, columns=count_matrix.columns) - for i in range(len(count_matrix.columns)): - values: pd.Series = count_matrix.iloc[:, i] + 1 # Add 1 to prevent division by 0 - rate = np.log(values.tolist()) - np.log(gene_sizes) - denominator = np.log(np.sum(np.exp(rate))) - tpm_value = np.exp(rate - denominator + np.log(1e6)) - tpm_matrix.iloc[:, i] = tpm_value - metrics[sample].normalization_matrix = tpm_matrix - - return metrics - - -def calculate_fpkm(metrics: NamedMetrics) -> NamedMetrics: - """Calculate the Fragments Per Kilobase of transcript per Million mapped reads (FPKM) for each sample in the metrics dictionary.""" # noqa: E501 - matrix_values = [] - for study in metrics: - for sample in range(metrics[study].num_samples): - layout = metrics[study].layout[sample] - count_matrix: npt.NDArray = metrics[study].count_matrix.iloc[:, sample].values - gene_size = metrics[study].gene_sizes - - count_matrix = count_matrix.astype(np.float32) - gene_size = gene_size.astype(np.float32) - - match layout: - case LayoutMethod.paired_end: # FPKM - mean_fragment_lengths = metrics[study].fragment_lengths[sample] - # Ensure non-negative value - effective_length = [max(0, size - (mean_fragment_lengths + 1)) for size in gene_size] - n = count_matrix.sum() - fpkm = ((count_matrix + 1) * 1e9) / (np.array(effective_length) * n) - matrix_values.append(fpkm) - case LayoutMethod.single_end: # RPKM - # Add a pseudocount before log to ensure log(0) does not happen - rate = np.log(count_matrix + 1) - np.log(gene_size) - exp_rate = np.exp(rate - np.log(np.sum(count_matrix)) + np.log(1e9)) - matrix_values.append(exp_rate) - case _: - raise ValueError("Invalid normalization method specified") - - fpkm_matrix = pd.DataFrame(matrix_values).T # Transpose is needed because values were appended as rows - fpkm_matrix = fpkm_matrix[~pd.isna(fpkm_matrix)] - metrics[study].normalization_matrix = fpkm_matrix - - metrics[study].normalization_matrix.columns = metrics[study].count_matrix.columns - - return metrics - - -def _zfpkm_calculation(col: pd.Series, kernel: KernelDensity, peak_parameters: tuple[float, float]) -> _ZFPKMResult: - """Log2 Transformations. - - Stabilize the variance in the data to make the distribution more symmetric; this is helpful for Gaussian fitting - - Kernel Density Estimation (kde) - - Non-parametric method to estimate the probability density function (PDF) of a random variable - - Estimates the distribution of log2-transformed FPKM values - - Bandwidth parameter controls the smoothness of the density estimate - - KDE Explanation - - A way to smooth a histogram to get a better idea of the underlying distribution of the data - - Given a set of data points, we want to understand how they are distributed. - Histograms can be useful, but are sensitive to bin size and number - - The KDE places a "kernel" - a small symmetric function (i.e., Gaussian curve) - at each data point - - The "kernel" acts as a weight, giving more weight to points closer to the center of the kernel, - and less weight to points farther away - - Kernel functions are summed along each point on the x-axis - - A smooth curve is created that represents the estimated density of the data - - Peak Finding - - Identifies that are above a certain height and separated by a minimum distance - - Represent potential local maxima in the distribution - - Peak Selection - - The peak with the highest x-value (from log2-FPKM) is chosen as the mean (mu) - of the "inactive" gene distribution - - The peak representing unexpressed or inactive genes should be at a lower FPKM - value compared to the peak representing expressed genes - - Standard Deviation Estimation - - The mean of log2-FPKM values are greater than the calculated mu - - Standard deviation is estimated based on the assumption that the right tail of the distribution - This represents expressed genes) can be approximated by a half-normal distribution - - zFPKM Transformation - - Centers disbribution around 0 and scales it by the standard deviation. - This makes it easier to compare gene expression across different samples - - Represents the number of standard deviations away from the mean of the "inactive" gene distribution - - Higher zFPKM values indicate higher expression levels relative to the "inactive" peak - - A zFPKM value of 0 represents the mean of the "inactive" distribution - - Research shows that a zFPKM value of -3 or greater can be used as - a threshold for calling a gene as "expressed" - : https://doi.org/10.1186/1471-2164-14-778 - """ - col_log2: npt.NDArray = np.log2(col + 1) - col_log2 = np.nan_to_num(col_log2, nan=0) - refit: KernelDensity = kernel.fit(col_log2.reshape(-1, 1)) # type: ignore - - # kde: KernelDensity = KernelDensity(kernel="gaussian", bandwidth=bandwidth).fit(col_log2.reshape(-1, 1)) - x_range = np.linspace(col_log2.min(), col_log2.max(), 1000) - density = np.exp(refit.score_samples(x_range.reshape(-1, 1))) - peaks, _ = find_peaks(density, height=peak_parameters[0], distance=peak_parameters[1]) - peak_positions = x_range[peaks] - - mu = 0 - max_fpkm = 0 - stddev = 1 - - if len(peaks) != 0: - mu = peak_positions.max() - max_fpkm = density[peaks[np.argmax(peak_positions)]] - u = col_log2[col_log2 > mu].mean() - stddev = (u - mu) * np.sqrt(np.pi / 2) - zfpkm = pd.Series((col_log2 - mu) / stddev, dtype=np.float32, name=col.name) - - return _ZFPKMResult(zfpkm=zfpkm, density=Density(x_range, density), mu=mu, std_dev=stddev, max_fpkm=max_fpkm) - - -def zfpkm_transform( - fpkm_df: pd.DataFrame, - bandwidth: int = 0.5, - peak_parameters: tuple[float, float] = (0.02, 1.0), - update_every_percent: float = 0.1, -) -> tuple[dict[str, _ZFPKMResult], DataFrame]: - """Perform zFPKM calculation/transformation.""" - if update_every_percent > 1: - logger.warning( - f"update_every_percent should be a decimal value between 0 and 1; got: {update_every_percent} - " - f"will convert to percentage" - ) - update_every_percent /= 100 - - total = len(fpkm_df.columns) - update_per_step: int = int(np.ceil(total * update_every_percent)) - cores = multiprocessing.cpu_count() - 2 - logger.debug(f"Processing {total:,} samples through zFPKM transform using {cores} cores") - logger.debug( - f"Will update every {update_per_step:,} steps as this is approximately " - f"{update_every_percent:.1%} of {total:,}" - ) - - with Pool(processes=cores) as pool: - kernel = KernelDensity(kernel="gaussian", bandwidth=bandwidth) - chunksize = int(math.ceil(len(fpkm_df.columns) / (4 * cores))) - partial_func = partial(_zfpkm_calculation, kernel=kernel, peak_parameters=peak_parameters) - chunk_time = time.time() - start_time = time.time() - - log_padding = len(str(f"{total:,}")) - zfpkm_df = pd.DataFrame(data=0, index=fpkm_df.index, columns=fpkm_df.columns) - results: dict[str, _ZFPKMResult] = {} - result: _ZFPKMResult - for i, result in enumerate( - pool.imap( - partial_func, - (fpkm_df[col] for col in fpkm_df.columns), - chunksize=chunksize, - ) - ): - key = str(result.zfpkm.name) - results[key] = result - zfpkm_df[key] = result.zfpkm - - # show updates every X% and at the end, but skip on first iteration - if i != 0 and (i % update_per_step == 0 or i == total): - current_time = time.time() - chunk = current_time - chunk_time - total_time = current_time - start_time - formatted = f"{i:,}" - logger.debug( - f"Processed {formatted:>{log_padding}} of {total:,} - " - f"chunk took {chunk:.1f} seconds - " - f"running for {total_time:.1f} seconds" - ) - chunk_time = current_time - return results, zfpkm_df - - -def zfpkm_plot(results, *, plot_xfloor: int = -4, subplot_titles: bool = True): - """Plot the log2(FPKM) density and fitted Gaussian for each sample. - - :param results: A dictionary of intermediate results from zfpkm_transform. - :param: subplot_titles: Whether to display facet titles (sample names). - :param plot_xfloor: Lower limit for the x-axis. - :param subplot_titles: Whether to display facet titles (sample names). - """ - mega_df = pd.DataFrame(columns=["sample_name", "log2fpkm", "fpkm_density", "fitted_density_scaled"]) - for name, result in results.items(): - stddev = result.std_dev - x = np.array(result.density.x) - y = np.array(result.density.y) - - fitted = np.exp(-0.5 * ((x - result.mu) / stddev) ** 2) / (stddev * np.sqrt(2 * np.pi)) - max_fpkm = y.max() - max_fitted = fitted.max() - scale_fitted = fitted * (max_fpkm / max_fitted) - - df = pd.DataFrame( - { - "sample_name": [name] * len(x), - "log2fpkm": x, - "fpkm_density": y, - "fitted_density_scaled": scale_fitted, - } - ) - mega_df = pd.concat([mega_df, df], ignore_index=True) - - mega_df = mega_df.melt(id_vars=["log2fpkm", "sample_name"], var_name="source", value_name="density") - subplot_titles = list(results.keys()) if subplot_titles else None - fig = make_subplots( - rows=len(results), - cols=1, - subplot_titles=subplot_titles, - vertical_spacing=min(0.05, (1 / (len(results) - 1))), - ) - - for i, (name, group) in enumerate(mega_df.groupby("sample_name"), start=1): - fig.add_trace( - trace=go.Scatter(x=group["log2fpkm"], y=group["density"], mode="lines", name=name, legendgroup=name), - row=i, - col=1, - ) - fig.update_xaxes(title_text="log2(FPKM)", range=[plot_xfloor, max(group["log2fpkm"].tolist())], row=i, col=1) - fig.update_yaxes(title_text="density [scaled]", row=i, col=1) - fig.update_layout(legend_tracegroupgap=0) - - fig.update_layout(height=600 * len(results), width=1000, title_text="zFPKM Plots", showlegend=True) - fig.write_image("zfpkm_plot.png") - - -def calculate_z_score(metrics: NamedMetrics) -> NamedMetrics: - """Calculate the z-score for each sample in the metrics dictionary.""" - for sample in metrics: - log_matrix = np.log(metrics[sample].normalization_matrix) - z_matrix = pd.DataFrame( - data=sklearn.preprocessing.scale(log_matrix, axis=1), columns=metrics[sample].sample_names - ) - metrics[sample].z_score_matrix = z_matrix - return metrics - - -def cpm_filter( - *, - metrics: NamedMetrics, - filtering_options: _FilteringOptions, - output_csv_filepath: Path, -) -> NamedMetrics: - """Apply Counts Per Million (CPM) filtering to the count matrix for a given sample.""" - n_exp = filtering_options.replicate_ratio - n_top = filtering_options.high_replicate_ratio - cut_off = filtering_options.cut_off - - metric: _StudyMetrics - for metric in metrics.values(): - counts: pd.DataFrame = metric.count_matrix - entrez_ids: list[str] = metric.entrez_gene_ids - library_size: pd.DataFrame = counts.sum(axis=1) - - # For library_sizes equal to 0, add 1 to prevent divide by 0 - # This will not impact the final counts per million calculation because the original counts are still 0 - # thus, (0 / 1) * 1_000_000 = 0 - library_size[library_size == 0] = 1 - - output_csv_filepath.parent.mkdir(parents=True, exist_ok=True) - counts_per_million: pd.DataFrame = (counts / library_size) * 1_000_000 - counts_per_million.insert(0, "entrez_gene_ids", pd.Series(entrez_ids)) - logger.debug(f"Writing CPM matrix to {output_csv_filepath}") - counts_per_million.to_csv(output_csv_filepath, index=False) - - # TODO: Counts per million is adding ~61,500 columns (equal to the number of genes) for some reason. - # Most likely due to multiplying by 1_000_000, not exactly sure why - - min_samples = round(n_exp * len(counts.columns)) # noqa: F841 - top_samples = round(n_top * len(counts.columns)) # noqa: F841 - test_bools = pd.DataFrame({"entrez_gene_ids": entrez_ids}) - for i in range(len(counts_per_million.columns)): - cutoff = ( - 10e6 / (np.median(np.sum(counts[:, i]))) - if cut_off == "default" - else 1e6 * cut_off / np.median(np.sum(counts[:, i])) - ) - test_bools = test_bools.merge(counts_per_million[counts_per_million.iloc[:, i] > cutoff]) - - return metrics - - -def tpm_quantile_filter(*, metrics: NamedMetrics, filtering_options: _FilteringOptions) -> NamedMetrics: - """Apply quantile-based filtering to the TPM matrix for a given sample.""" - # TODO: Write the TPM matrix to disk - - n_exp = filtering_options.replicate_ratio - n_top = filtering_options.high_replicate_ratio - cut_off = filtering_options.cut_off - metrics = calculate_tpm(metrics) - - sample: str - metric: _StudyMetrics - for sample, metric in metrics.items(): - entrez_ids = metric.entrez_gene_ids - gene_size = metric.gene_sizes - tpm_matrix: pd.DataFrame = metric.normalization_matrix - - min_samples = round(n_exp * len(tpm_matrix.columns)) - top_samples = round(n_top * len(tpm_matrix.columns)) - - tpm_quantile = tpm_matrix[tpm_matrix > 0] - quantile_cutoff = np.quantile( - a=tpm_quantile.values, q=1 - (cut_off / 100), axis=0 - ) # Compute quantile across columns - boolean_expression = pd.DataFrame( - data=tpm_matrix > quantile_cutoff, index=tpm_matrix.index, columns=tpm_matrix.columns - ).astype(int) - - min_func = k_over_a(min_samples, 0.9) - top_func = k_over_a(top_samples, 0.9) - - min_genes: npt.NDArray[bool] = genefilter(boolean_expression, min_func) - top_genes: npt.NDArray[bool] = genefilter(boolean_expression, top_func) - - # Only keep `entrez_gene_ids` that pass `min_genes` - metric.entrez_gene_ids = [gene for gene, keep in zip(entrez_ids, min_genes) if keep] - metric.gene_sizes = [gene for gene, keep in zip(gene_size, min_genes) if keep] - metric.count_matrix = metric.count_matrix.iloc[min_genes, :] - metric.normalization_matrix = metrics[sample].normalization_matrix.iloc[min_genes, :] - - keep_top_genes = [gene for gene, keep in zip(entrez_ids, top_genes) if keep] - metric.high_confidence_entrez_gene_ids = [gene for gene, keep in zip(entrez_ids, keep_top_genes) if keep] - - metrics = calculate_z_score(metrics) - - return metrics - - -def zfpkm_filter(*, metrics: NamedMetrics, filtering_options: _FilteringOptions, calcualte_fpkm: bool) -> NamedMetrics: - """Apply zFPKM filtering to the FPKM matrix for a given sample.""" - min_sample_expression = filtering_options.replicate_ratio - high_confidence_sample_expression = filtering_options.high_replicate_ratio - cut_off = filtering_options.cut_off - - if calcualte_fpkm: - metrics = calculate_fpkm(metrics) - - metric: _StudyMetrics - for metric in metrics.values(): - # if fpkm was not calculated, the normalization matrix will be empty; collect the count matrix instead - matrix = metric.count_matrix if metric.normalization_matrix.empty else metric.normalization_matrix - matrix = matrix[matrix.sum(axis=1) > 0] - - minimums = matrix == 0 - results, zfpkm_df = zfpkm_transform(matrix) - zfpkm_df[minimums] = -4 - zfpkm_plot(results) - - # determine which genes are expressed - min_samples = round(min_sample_expression * len(zfpkm_df.columns)) - min_func = k_over_a(min_samples, cut_off) - min_genes: npt.NDArray[bool] = genefilter(zfpkm_df, min_func) - metric.entrez_gene_ids = [gene for gene, keep in zip(metric.entrez_gene_ids, min_genes) if keep] - - # determine which genes are confidently expressed - top_samples = round(high_confidence_sample_expression * len(zfpkm_df.columns)) - top_func = k_over_a(top_samples, cut_off) - top_genes: npt.NDArray[bool] = genefilter(zfpkm_df, top_func) - metric.high_confidence_entrez_gene_ids = [gene for gene, keep in zip(metric.entrez_gene_ids, top_genes) if keep] - - return metrics - - -def filter_counts( - *, - metrics: NamedMetrics, - technique: FilteringTechnique, - filtering_options: _FilteringOptions, - cpm_output_filepath: Path | None = None, -) -> NamedMetrics: - """Filter the count matrix based on the specified technique.""" - match technique: - case FilteringTechnique.cpm: - if cpm_output_filepath is None: - raise ValueError("CPM output filepath must be provided") - return cpm_filter( - metrics=metrics, - filtering_options=filtering_options, - output_csv_filepath=cpm_output_filepath, - ) - - case FilteringTechnique.tpm: - return tpm_quantile_filter(metrics=metrics, filtering_options=filtering_options) - - case FilteringTechnique.zfpkm: - return zfpkm_filter(metrics=metrics, filtering_options=filtering_options, calcualte_fpkm=True) - - case FilteringTechnique.umi: - return zfpkm_filter(metrics=metrics, filtering_options=filtering_options, calcualte_fpkm=False) - - case _: - raise ValueError(f"Technique must be one of {FilteringTechnique}") - - -async def save_rnaseq_tests( - context_name: str, - counts_matrix_filepath: Path, - config_filepath: Path, - gene_info_filepath: Path, - output_filepath: Path, - prep: RNAPrepMethod, - taxon_id: Taxon, - replicate_ratio: float, - batch_ratio: float, - high_replicate_ratio: float, - high_batch_ratio: float, - technique: FilteringTechnique, - cut_off: int | float, -): - """Save the results of the RNA-Seq tests to a CSV file.""" - filtering_options = _FilteringOptions( - replicate_ratio=replicate_ratio, - batch_ratio=batch_ratio, - cut_off=cut_off, - high_replicate_ratio=high_replicate_ratio, - high_batch_ratio=high_batch_ratio, - ) - - if prep == RNAPrepMethod.SCRNA: - technique = FilteringTechnique.umi - logger.warning( - "Single cell filtration does not normalize and assumes " - "gene counts are counted with Unique Molecular Identifiers (UMIs). " - "Setting filtering technique to UMI now." - ) - - read_counts_results: _ReadMatrixResults = await _read_counts_matrix( - context_name=context_name, - counts_matrix_filepath=counts_matrix_filepath, - config_filepath=config_filepath, - gene_info_filepath=gene_info_filepath, - taxon_id=taxon_id, - ) - metrics = read_counts_results.metrics - entrez_gene_ids = read_counts_results.entrez_gene_ids - - metrics = filter_counts( - metrics=metrics, - technique=technique, - filtering_options=filtering_options, - ) - - expressed_genes: list[str] = [] - top_genes: list[str] = [] - for metric in metrics.values(): - expressed_genes.extend(metric.entrez_gene_ids) - top_genes.extend(metric.high_confidence_entrez_gene_ids) - - expression_frequency = pd.Series(expressed_genes).value_counts() - expression_df = pd.DataFrame( - {"entrez_gene_id": expression_frequency.index, "frequency": expression_frequency.values} - ) - expression_df["prop"] = expression_df["frequency"] / len(metrics) - expression_df = expression_df[expression_df["prop"] >= filtering_options.batch_ratio] - - top_frequency = pd.Series(top_genes).value_counts() - top_df = pd.DataFrame({"entrez_gene_id": top_frequency.index, "frequency": top_frequency.values}) - top_df["prop"] = top_df["frequency"] / len(metrics) - top_df = top_df[top_df["prop"] >= filtering_options.high_batch_ratio] - - boolean_matrix = pd.DataFrame(data={"entrez_gene_id": entrez_gene_ids, "expressed": 0, "high": 0}) - for gene in entrez_gene_ids: - if gene in expression_df["entrez_gene_id"]: - boolean_matrix.loc[gene, "expressed"] = 1 - if gene in top_df["entrez_gene_id"]: - boolean_matrix.loc[gene, "high"] = 1 - - expressed_count = len(boolean_matrix[boolean_matrix["expressed"] == 1]) - high_confidence_count = len(boolean_matrix[boolean_matrix["high"] == 1]) - - boolean_matrix.to_csv(output_filepath, index=False) - logger.info( - f"{context_name} - Found {expressed_count} expressed genes, " - f"{high_confidence_count} of which are confidently expressed" - ) - logger.success(f"Wrote boolean matrix to {output_filepath}") From a43b20dd4b4771b941bfc1c24c087ed1860cc94e Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:09:23 -0600 Subject: [PATCH 44/91] refactor: import items from rnaseq.py --- main/como/rnaseq_gen.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 47aeb8ee..134ada3a 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -1,16 +1,33 @@ from __future__ import annotations -import argparse -import asyncio -from dataclasses import dataclass +import math +import multiprocessing +import time +from collections import namedtuple +from dataclasses import dataclass, field +from enum import Enum +from functools import partial +from multiprocessing.pool import Pool +from pathlib import Path +from typing import Callable, NamedTuple +import numpy as np +import numpy.typing as npt import pandas as pd -from fast_bioservices import Taxon +import plotly.graph_objs as go +import scanpy as sc +import sklearn +import sklearn.neighbors +from fast_bioservices.pipeline import ensembl_to_gene_id_and_symbol from loguru import logger +from pandas import DataFrame +from plotly.subplots import make_subplots +from scipy.signal import find_peaks +from sklearn.neighbors import KernelDensity -from como import Config -from como.custom_types import RNAPrepMethod -from como.rnaseq import FilteringTechnique, save_rnaseq_tests +from como.migrations import gene_info_migrations +from como.project import Config +from como.types import RNAPrepMethod @dataclass From 5675a357a49b99589bd415c5b25a3e951cf55f5f Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:10:02 -0600 Subject: [PATCH 45/91] refactor: remove command line usage --- main/como/rnaseq_gen.py | 134 +--------------------------------------- 1 file changed, 3 insertions(+), 131 deletions(-) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 134ada3a..db3803a1 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -30,17 +30,13 @@ from como.types import RNAPrepMethod -@dataclass -class _Arguments: - config_file: str +class _FilteringOptions(NamedTuple): replicate_ratio: float batch_ratio: float + cut_off: float high_replicate_ratio: float high_batch_ratio: float - filtering_technique: FilteringTechnique - minimum_cutoff: int | str - library_prep: RNAPrepMethod - taxon: Taxon + def __post_init__(self): self.library_prep = RNAPrepMethod.from_string(str(self.library_prep)) @@ -200,128 +196,4 @@ async def rnaseq_gen( ) -def _parse_args() -> _Arguments: - parser = argparse.ArgumentParser( - prog="rnaseq_gen.py", - description="Generate a list of active and high-confidence genes from a counts matrix using a user defined " - "at normalization-technique at /work/data/results//rnaseq_.csv: " - "https://github.com/HelikarLab/FastqToGeneCounts", - epilog="For additional help, please post questions/issues in the MADRID GitHub repo at " - "https://github.com/HelikarLab/MADRID or email babessell@gmail.com", - ) - parser.add_argument( - "-c", - "--config-file", - type=str, - required=True, - dest="config_file", - help="Name of config .xlsx file in the /work/data/config_files/. Can be generated using " - "rnaseq_preprocess.py or manually created and imported into the Juypterlab", - ) - parser.add_argument( - "-r", - "--replicate-ratio", - type=float, - required=False, - default=0.5, - dest="replicate_ratio", - help="Ratio of replicates required for a gene to be active within that study/batch group " - "Example: 0.7 means that for a gene to be active, at least 70% of replicates in a group " - "must pass the cutoff after normalization", - ) - parser.add_argument( - "-g", - "--batch-ratio", - type=float, - required=False, - default=0.5, - dest="batch_ratio", - help="Ratio of groups (studies or batches) required for a gene to be active " - "Example: 0.7 means that for a gene to be active, at least 70% of groups in a study must " - "have passed the replicate ratio test", - ) - parser.add_argument( - "-rh", - "--high-replicate-ratio", - type=float, - required=False, - default=1.0, - dest="high_replicate_ratio", - help="Ratio of replicates required for a gene to be considered high-confidence. " - "High-confidence genes ignore consensus with other data-sources, such as proteomics. " - "Example: 0.9 means that for a gene to be high-confidence, " - "at least 90% of replicates in a group must pass the cutoff after normalization", - ) - parser.add_argument( - "-gh", - "--high-batch-ratio", - type=float, - required=False, - default=1.0, - dest="high_batch_ratio", - help="Ratio of studies/batches required for a gene to be considered high-confidence within that group. " - "High-confidence genes ignore consensus with other data-sources, like proteomics. " - "Example: 0.9 means that for a gene to be high-confidence, " - "at least 90% of groups in a study must have passed the replicate ratio test", - ) - parser.add_argument( - "--taxon", - "--taxon-id", - type=str, - required=True, - dest="taxon", - help="The NCBI Taxonomy ID that is being proessed. '9606' for humans, '10090' for mice.", - ) - parser.add_argument( - "-t", - "--filt-technique", - type=str, - required=False, - default="quantile", - dest="filtering_technique", - help="Technique to normalize and filter counts with. " - "Either 'zfpkm', 'quantile', or 'cpm'. More info about each method is discussed in pipeline.ipynb.", - ) - parser.add_argument( - "--minimum-cutoff", - type=int, - required=False, - default=None, - dest="minimum_cutoff", - help="The minimum cutoff used for the filtration technique. " - "If the filtering technique is zFPKM, the default is -3. " - "If the filtering technique is quantile-tpm, the default is 25. " - "If the filtering technique is flat-cpm, the default is determined dynamically. " - "If the filtering technique is quantile, the default is 25.", - ) - parser.add_argument( - "-p", - "--library-prep", - required=True, - choices=["total", "mrna", "scrna"], - dest="library_prep", - help="Library preparation method. " - "Will separate samples into groups to only compare similarly prepared libraries. " - "For example, mRNA, total-rna, scRNA, etc", - ) - args = parser.parse_args() - args.filtering_technique = args.filtering_technique.lower() - args.taxon = Taxon.from_int(int(args.taxon)) if str(args.taxon).isdigit() else Taxon.from_string(str(args.taxon)) # type: ignore - return _Arguments(**vars(args)) - - -if __name__ == "__main__": - args = _parse_args() - asyncio.run( - rnaseq_gen( - config_filename=args.config_file, - replicate_ratio=args.replicate_ratio, - batch_ratio=args.batch_ratio, - high_replicate_ratio=args.high_replicate_ratio, - high_batch_ratio=args.high_batch_ratio, - technique=args.filtering_technique, - cut_off=args.minimum_cutoff, - prep=args.library_prep, - taxon_id=args.taxon, - ) ) From b5a199c73bd4f3513960d9c15aecfde8b4d62c2f Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:10:23 -0600 Subject: [PATCH 46/91] refactor: add classes for rna processing --- main/como/rnaseq_gen.py | 120 ++++++++++++++++++++++++++++------------ 1 file changed, 86 insertions(+), 34 deletions(-) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index db3803a1..6966980a 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -38,45 +38,97 @@ class _FilteringOptions(NamedTuple): high_batch_ratio: float +class FilteringTechnique(Enum): + """RNA sequencing filtering capabilities.""" + + cpm = "cpm" + zfpkm = "zfpkm" + tpm = "quantile" + umi = "umi" + + @staticmethod + def from_string(value: str) -> FilteringTechnique: + """Create a filtering technique object from a string.""" + match value.lower(): + case "cpm": + return FilteringTechnique.cpm + case "zfpkm": + return FilteringTechnique.zfpkm + case "quantile": + return FilteringTechnique.tpm + case "umi": + return FilteringTechnique.umi + case _: + possible_values = [t.value for t in FilteringTechnique] + raise ValueError(f"Got a filtering technique of '{value}'; should be one of: {possible_values}") + + +class LayoutMethod(Enum): + """RNA sequencing layout method.""" + + paired_end = "paired-end" + single_end = "single-end" + + +@dataclass +class _StudyMetrics: + study: str + num_samples: int + count_matrix: pd.DataFrame + fragment_lengths: npt.NDArray[np.float32] + sample_names: list[str] + layout: list[LayoutMethod] + entrez_gene_ids: list[str] + gene_sizes: npt.NDArray[np.float32] + __normalization_matrix: pd.DataFrame = field(default_factory=pd.DataFrame) + __z_score_matrix: pd.DataFrame = field(default_factory=pd.DataFrame) + __high_confidence_entrez_gene_ids: list[str] = field(default=list) + def __post_init__(self): - self.library_prep = RNAPrepMethod.from_string(str(self.library_prep)) - self.filtering_technique = FilteringTechnique.from_string(str(self.filtering_technique)) + for layout in self.layout: + if layout not in LayoutMethod: + raise ValueError(f"Layout must be 'paired-end' or 'single-end'; got: {layout}") - if self.minimum_cutoff is None: - if self.filtering_technique == FilteringTechnique.tpm: - self.minimum_cutoff = 25 - elif self.filtering_technique == FilteringTechnique.cpm: - self.minimum_cutoff = "default" - elif self.filtering_technique == FilteringTechnique.zfpkm: - self.minimum_cutoff = -3 + @property + def normalization_matrix(self) -> pd.DataFrame: + return self.__normalization_matrix + @normalization_matrix.setter + def normalization_matrix(self, value: pd.DataFrame) -> None: + self.__normalization_matrix = value -async def _handle_context_batch( - config_filename: str, - replicate_ratio: float, - batch_ratio: float, - replicate_ratio_high: float, - batch_ratio_high: float, - technique: FilteringTechnique, - cut_off: int | float | str, - prep: RNAPrepMethod, - taxon: Taxon, -) -> None: - """Iterate through each context type and create rnaseq expression file. + @property + def z_score_matrix(self) -> pd.DataFrame: + return self.__z_score_matrix - :param config_filename: The configuration filename to read - :param replicate_ratio: The percentage of replicates that a gene must - appear in for a gene to be marked as "active" in a batch/study - :param batch_ratio: The percentage of batches that a gene must appear in for a gene to be marked as 'active" - :param replicate_ratio_high: The percentage of replicates that a gene must - appear in for a gene to be marked "highly confident" in its expression in a batch/study - :param batch_ratio_high: The percentage of batches that a gene must - appear in for a gene to be marked "highly confident" in its expression - :param technique: The filtering technique to use - :param cut_off: The cutoff value to use for the provided filtering technique - :param prep: The library preparation method - :param taxon: The NCBI Taxon ID - :return: None + @z_score_matrix.setter + def z_score_matrix(self, value: pd.DataFrame) -> None: + self.__z_score_matrix = value + + @property + def high_confidence_entrez_gene_ids(self) -> list[str]: + return self.__high_confidence_entrez_gene_ids + + @high_confidence_entrez_gene_ids.setter + def high_confidence_entrez_gene_ids(self, values: list[str]) -> None: + self.__high_confidence_entrez_gene_ids = values + + +class _ZFPKMResult(NamedTuple): + zfpkm: pd.Series + density: Density + mu: float + std_dev: float + max_fpkm: float + + +class _ReadMatrixResults(NamedTuple): + metrics: dict[str, _StudyMetrics] + entrez_gene_ids: list[str] + + +Density = namedtuple("Density", ["x", "y"]) +NamedMetrics = dict[str, _StudyMetrics] """ config = Config() From 5a2dd33e8f24f373ad8db2a417678f10d74973c3 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:11:10 -0600 Subject: [PATCH 47/91] feat: added k_over_a calculation --- main/como/rnaseq_gen.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 6966980a..c9ef8562 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -129,6 +129,22 @@ class _ReadMatrixResults(NamedTuple): Density = namedtuple("Density", ["x", "y"]) NamedMetrics = dict[str, _StudyMetrics] + + +def k_over_a(k: int, a: float) -> Callable[[npt.NDArray], bool]: + """Return a function that filters rows of an array based on the sum of elements being greater than or equal to A at least k times. + + This code is based on the `kOverA` function found in R's `genefilter` package: https://www.rdocumentation.org/packages/genefilter/versions/1.54.2/topics/kOverA + + :param k: The minimum number of times the sum of elements must be greater than or equal to A. + :param a: The value to compare the sum of elements to. + :return: A function that accepts a NumPy array to perform the actual filtering + """ # noqa: E501 + + def filter_func(row: npt.NDArray) -> bool: + return np.sum(row >= a) >= k + + return filter_func """ config = Config() From fbb2b765be8f90e548cc56ef82a884baaf85673e Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:11:24 -0600 Subject: [PATCH 48/91] feat: added genefilter function --- main/como/rnaseq_gen.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index c9ef8562..2ad3390b 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -145,14 +145,27 @@ def filter_func(row: npt.NDArray) -> bool: return np.sum(row >= a) >= k return filter_func + + +def genefilter(data: pd.DataFrame | npt.NDArray, filter_func: Callable[[npt.NDArray], bool]) -> npt.NDArray: + """Apply a filter function to the rows of the data and return the filtered array. + + This code is based on the `genefilter` function found in R's `genefilter` package: https://www.rdocumentation.org/packages/genefilter/versions/1.54.2/topics/genefilter + + :param data: The data to filter + :param filter_func: THe function to filter the data by + :return: A NumPy array of the filtered data. """ - config = Config() + if not isinstance(data, (pd.DataFrame, npt.NDArray)): + raise TypeError("Unsupported data type. Must be a Pandas DataFrame or a NumPy array.") + + return ( + data.apply(filter_func, axis=1).values + if isinstance(data, pd.DataFrame) + else np.apply_along_axis(filter_func, axis=1, arr=data) + ) + - config_filepath = config.config_dir / config_filename - if not config_filepath.exists(): - raise FileNotFoundError(f"Unable to find '{config_filename}' at the path: '{config_filepath}'") - xl = pd.ExcelFile(config_filepath) - sheet_names = xl.sheet_names logger.info(f"Reading config file: {config_filepath}") From 1de1717baad4bb3d029b0bd243a0523b887caf5e Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:12:08 -0600 Subject: [PATCH 49/91] refactor: create separate read_counts matrix Reduce complexity of building matrix results --- main/como/rnaseq_gen.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 2ad3390b..795e21d3 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -166,8 +166,22 @@ def genefilter(data: pd.DataFrame | npt.NDArray, filter_func: Callable[[npt.NDAr ) +async def _read_counts(path: Path) -> pd.DataFrame: + if path.suffix not in {".csv", ".h5ad"}: + raise ValueError(f"Unknown file extension '{path.suffix}'. Valid options are '.csv' or '.h5ad'.") + + matrix: pd.DataFrame + if path.suffix == ".csv": + logger.debug(f"Reading CSV file at '{path}'") + matrix = pd.read_csv(path, header=0) + elif path.suffix == ".h5ad": + logger.debug(f"Reading h5ad file at '{path}'") + # Make sample names the columns and gene data the index + matrix = sc.read_h5ad(path).to_df().T + + return matrix + - logger.info(f"Reading config file: {config_filepath}") for context_name in sheet_names: logger.debug(f"Starting '{context_name}'") From 279f7cc513e9bfb2501f432ede90f4f42a470024 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:12:44 -0600 Subject: [PATCH 50/91] feat: bring zfpkm_filter from rnaseq.py --- main/como/rnaseq_gen.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 795e21d3..600bfa23 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -222,6 +222,41 @@ async def _read_counts(path: Path) -> pd.DataFrame: logger.success(f"Results saved at '{rnaseq_output_filepath}'") +def zfpkm_filter(*, metrics: NamedMetrics, filtering_options: _FilteringOptions, calcualte_fpkm: bool) -> NamedMetrics: + """Apply zFPKM filtering to the FPKM matrix for a given sample.""" + min_sample_expression = filtering_options.replicate_ratio + high_confidence_sample_expression = filtering_options.high_replicate_ratio + cut_off = filtering_options.cut_off + + if calcualte_fpkm: + metrics = calculate_fpkm(metrics) + + metric: _StudyMetrics + for metric in metrics.values(): + # if fpkm was not calculated, the normalization matrix will be empty; collect the count matrix instead + matrix = metric.count_matrix if metric.normalization_matrix.empty else metric.normalization_matrix + matrix = matrix[matrix.sum(axis=1) > 0] + + minimums = matrix == 0 + results, zfpkm_df = zfpkm_transform(matrix) + zfpkm_df[minimums] = -4 + zfpkm_plot(results) + + # determine which genes are expressed + min_samples = round(min_sample_expression * len(zfpkm_df.columns)) + min_func = k_over_a(min_samples, cut_off) + min_genes: npt.NDArray[bool] = genefilter(zfpkm_df, min_func) + metric.entrez_gene_ids = [gene for gene, keep in zip(metric.entrez_gene_ids, min_genes) if keep] + + # determine which genes are confidently expressed + top_samples = round(high_confidence_sample_expression * len(zfpkm_df.columns)) + top_func = k_over_a(top_samples, cut_off) + top_genes: npt.NDArray[bool] = genefilter(zfpkm_df, top_func) + metric.high_confidence_entrez_gene_ids = [gene for gene, keep in zip(metric.entrez_gene_ids, top_genes) if keep] + + return metrics + + async def rnaseq_gen( # config_filepath: Path, config_filename: str, From 7e2fe3d663a91b928a42e5979d33c5f67b71f479 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:13:28 -0600 Subject: [PATCH 51/91] feat: added matrix builder This is functionally equivalent to the _read_counts_matrix function from rnaseq.py --- main/como/rnaseq_gen.py | 60 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 600bfa23..572f3d5c 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -182,9 +182,65 @@ async def _read_counts(path: Path) -> pd.DataFrame: return matrix +async def _build_matrix_results( + *, + matrix: pd.DataFrame, + gene_info: pd.DataFrame, + metadata_df: pd.DataFrame, + taxon: int, +) -> _ReadMatrixResults: + """Read the counts matrix and returns the results. + + :param matrix: The gene counts matrix to process + :param metadata_df: The configuration dataframe related to the current context + :param taxon: The NCBI Taxon ID + :return: A dataclass `ReadMatrixResults` + """ + gene_info = gene_info_migrations(gene_info) + conversion = await ensembl_to_gene_id_and_symbol(ids=matrix["ensembl_gene_id"].tolist(), taxon=taxon) + matrix = matrix.merge(conversion, on="ensembl_gene_id", how="left") + + # Only include Entrez and Ensembl Gene IDs that are present in `gene_info` + matrix["entrez_gene_id"] = matrix["entrez_gene_id"].str.split("//") + matrix = matrix.explode("entrez_gene_id") + matrix = matrix.replace(to_replace="-", value=pd.NA).dropna() + matrix["entrez_gene_id"] = matrix["entrez_gene_id"].astype(int) + + gene_info = gene_info.replace(to_replace="-", value=pd.NA).dropna() + gene_info["entrez_gene_id"] = gene_info["entrez_gene_id"].astype(int) + + counts_matrix = matrix.merge( + gene_info[["entrez_gene_id", "ensembl_gene_id"]], + on=["entrez_gene_id", "ensembl_gene_id"], + how="inner", + ) + gene_info = gene_info.merge( + counts_matrix[["entrez_gene_id", "ensembl_gene_id"]], + on=["entrez_gene_id", "ensembl_gene_id"], + how="inner", + ) + + entrez_gene_ids: list[str] = gene_info["entrez_gene_id"].tolist() + metrics: NamedMetrics = {} + for study in metadata_df["study"].unique().tolist(): + study_sample_names = metadata_df[metadata_df["study"] == study]["sample_name"].tolist() + layouts = metadata_df[metadata_df["study"] == study]["layout"].tolist() + metrics[study] = _StudyMetrics( + count_matrix=counts_matrix[counts_matrix.columns.intersection(study_sample_names)], + fragment_lengths=metadata_df[metadata_df["study"] == study]["fragment_length"].values, + sample_names=study_sample_names, + layout=[LayoutMethod(layout) for layout in layouts], + num_samples=len(study_sample_names), + entrez_gene_ids=entrez_gene_ids, + gene_sizes=np.array(gene_info["size"].values).astype(np.float32), + study=study, + ) + metrics[study].fragment_lengths[np.isnan(metrics[study].fragment_lengths)] = 0 + metrics[study].count_matrix.index = pd.Index(entrez_gene_ids, name="entrez_gene_id") + + return _ReadMatrixResults(metrics=metrics, entrez_gene_ids=gene_info["entrez_gene_id"].tolist()) + - for context_name in sheet_names: - logger.debug(f"Starting '{context_name}'") rnaseq_input_filepath = ( config.data_dir / "data_matrices" / context_name / f"gene_counts_matrix_{prep.value}_{context_name}" From 9eaa2acc1b669024abb4ec57a74bc35f25ee912e Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:13:58 -0600 Subject: [PATCH 52/91] feat: added tpm calculation --- main/como/rnaseq_gen.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 572f3d5c..9238eda2 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -241,9 +241,24 @@ async def _build_matrix_results( return _ReadMatrixResults(metrics=metrics, entrez_gene_ids=gene_info["entrez_gene_id"].tolist()) +def calculate_tpm(metrics: NamedMetrics) -> NamedMetrics: + """Calculate the Transcripts Per Million (TPM) for each sample in the metrics dictionary.""" + for sample in metrics: + count_matrix = metrics[sample].count_matrix + + gene_sizes = metrics[sample].gene_sizes + + tpm_matrix = pd.DataFrame(data=None, index=count_matrix.index, columns=count_matrix.columns) + for i in range(len(count_matrix.columns)): + values: pd.Series = count_matrix.iloc[:, i] + 1 # Add 1 to prevent division by 0 + rate = np.log(values.tolist()) - np.log(gene_sizes) + denominator = np.log(np.sum(np.exp(rate))) + tpm_value = np.exp(rate - denominator + np.log(1e6)) + tpm_matrix.iloc[:, i] = tpm_value + metrics[sample].normalization_matrix = tpm_matrix + + return metrics - rnaseq_input_filepath = ( - config.data_dir / "data_matrices" / context_name / f"gene_counts_matrix_{prep.value}_{context_name}" ) if prep == RNAPrepMethod.SCRNA: rnaseq_input_filepath = rnaseq_input_filepath.with_suffix(".h5ad") From 3670016c55cc53e7060288b42242b25fe3911185 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:14:11 -0600 Subject: [PATCH 53/91] feat: added fpkm calculation --- main/como/rnaseq_gen.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 9238eda2..e9288bb3 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -259,6 +259,43 @@ def calculate_tpm(metrics: NamedMetrics) -> NamedMetrics: return metrics + +def calculate_fpkm(metrics: NamedMetrics) -> NamedMetrics: + """Calculate the Fragments Per Kilobase of transcript per Million mapped reads (FPKM) for each sample in the metrics dictionary.""" # noqa: E501 + matrix_values = [] + for study in metrics: + for sample in range(metrics[study].num_samples): + layout = metrics[study].layout[sample] + count_matrix: npt.NDArray = metrics[study].count_matrix.iloc[:, sample].values + gene_size = metrics[study].gene_sizes + + count_matrix = count_matrix.astype(np.float32) + gene_size = gene_size.astype(np.float32) + + match layout: + case LayoutMethod.paired_end: # FPKM + mean_fragment_lengths = metrics[study].fragment_lengths[sample] + # Ensure non-negative value + effective_length = [max(0, size - (mean_fragment_lengths + 1)) for size in gene_size] + n = count_matrix.sum() + fpkm = ((count_matrix + 1) * 1e9) / (np.array(effective_length) * n) + matrix_values.append(fpkm) + case LayoutMethod.single_end: # RPKM + # Add a pseudocount before log to ensure log(0) does not happen + rate = np.log(count_matrix + 1) - np.log(gene_size) + exp_rate = np.exp(rate - np.log(np.sum(count_matrix)) + np.log(1e9)) + matrix_values.append(exp_rate) + case _: + raise ValueError("Invalid normalization method specified") + + fpkm_matrix = pd.DataFrame(matrix_values).T # Transpose is needed because values were appended as rows + fpkm_matrix = fpkm_matrix[~pd.isna(fpkm_matrix)] + metrics[study].normalization_matrix = fpkm_matrix + + metrics[study].normalization_matrix.columns = metrics[study].count_matrix.columns + + return metrics + ) if prep == RNAPrepMethod.SCRNA: rnaseq_input_filepath = rnaseq_input_filepath.with_suffix(".h5ad") From 1cd4dbb00a523a6a8bf50c5f166714d8d28681bb Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:14:33 -0600 Subject: [PATCH 54/91] feat: added zfpkm transformation and calculation --- main/como/rnaseq_gen.py | 139 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 127 insertions(+), 12 deletions(-) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index e9288bb3..7b88962e 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -296,19 +296,134 @@ def calculate_fpkm(metrics: NamedMetrics) -> NamedMetrics: return metrics + +def _zfpkm_calculation(col: pd.Series, kernel: KernelDensity, peak_parameters: tuple[float, float]) -> _ZFPKMResult: + """Log2 Transformations. + + Stabilize the variance in the data to make the distribution more symmetric; this is helpful for Gaussian fitting + + Kernel Density Estimation (kde) + - Non-parametric method to estimate the probability density function (PDF) of a random variable + - Estimates the distribution of log2-transformed FPKM values + - Bandwidth parameter controls the smoothness of the density estimate + - KDE Explanation + - A way to smooth a histogram to get a better idea of the underlying distribution of the data + - Given a set of data points, we want to understand how they are distributed. + Histograms can be useful, but are sensitive to bin size and number + - The KDE places a "kernel" - a small symmetric function (i.e., Gaussian curve) - at each data point + - The "kernel" acts as a weight, giving more weight to points closer to the center of the kernel, + and less weight to points farther away + - Kernel functions are summed along each point on the x-axis + - A smooth curve is created that represents the estimated density of the data + + Peak Finding + - Identifies that are above a certain height and separated by a minimum distance + - Represent potential local maxima in the distribution + + Peak Selection + - The peak with the highest x-value (from log2-FPKM) is chosen as the mean (mu) + of the "inactive" gene distribution + - The peak representing unexpressed or inactive genes should be at a lower FPKM + value compared to the peak representing expressed genes + + Standard Deviation Estimation + - The mean of log2-FPKM values are greater than the calculated mu + - Standard deviation is estimated based on the assumption that the right tail of the distribution + This represents expressed genes) can be approximated by a half-normal distribution + + zFPKM Transformation + - Centers disbribution around 0 and scales it by the standard deviation. + This makes it easier to compare gene expression across different samples + - Represents the number of standard deviations away from the mean of the "inactive" gene distribution + - Higher zFPKM values indicate higher expression levels relative to the "inactive" peak + - A zFPKM value of 0 represents the mean of the "inactive" distribution + - Research shows that a zFPKM value of -3 or greater can be used as + a threshold for calling a gene as "expressed" + : https://doi.org/10.1186/1471-2164-14-778 + """ + col_log2: npt.NDArray = np.log2(col + 1) + col_log2 = np.nan_to_num(col_log2, nan=0) + refit: KernelDensity = kernel.fit(col_log2.reshape(-1, 1)) # type: ignore + + # kde: KernelDensity = KernelDensity(kernel="gaussian", bandwidth=bandwidth).fit(col_log2.reshape(-1, 1)) + x_range = np.linspace(col_log2.min(), col_log2.max(), 1000) + density = np.exp(refit.score_samples(x_range.reshape(-1, 1))) + peaks, _ = find_peaks(density, height=peak_parameters[0], distance=peak_parameters[1]) + peak_positions = x_range[peaks] + + mu = 0 + max_fpkm = 0 + stddev = 1 + + if len(peaks) != 0: + mu = peak_positions.max() + max_fpkm = density[peaks[np.argmax(peak_positions)]] + u = col_log2[col_log2 > mu].mean() + stddev = (u - mu) * np.sqrt(np.pi / 2) + zfpkm = pd.Series((col_log2 - mu) / stddev, dtype=np.float32, name=col.name) + + return _ZFPKMResult(zfpkm=zfpkm, density=Density(x_range, density), mu=mu, std_dev=stddev, max_fpkm=max_fpkm) + + +def zfpkm_transform( + fpkm_df: pd.DataFrame, + bandwidth: int = 0.5, + peak_parameters: tuple[float, float] = (0.02, 1.0), + update_every_percent: float = 0.1, +) -> tuple[dict[str, _ZFPKMResult], DataFrame]: + """Perform zFPKM calculation/transformation.""" + if update_every_percent > 1: + logger.warning( + f"update_every_percent should be a decimal value between 0 and 1; got: {update_every_percent} - " + f"will convert to percentage" ) - if prep == RNAPrepMethod.SCRNA: - rnaseq_input_filepath = rnaseq_input_filepath.with_suffix(".h5ad") - elif prep in {RNAPrepMethod.TOTAL, RNAPrepMethod.MRNA}: - rnaseq_input_filepath = rnaseq_input_filepath.with_suffix(".csv") - - if not rnaseq_input_filepath.exists(): - logger.warning(f"Gene counts matrix not found at {rnaseq_input_filepath}, skipping...") - continue - - gene_info_filepath = config.data_dir / "gene_info.csv" - rnaseq_output_filepath = ( - config.result_dir / context_name / prep.value / f"rnaseq_{prep.value}_{context_name}.csv" + update_every_percent /= 100 + + total = len(fpkm_df.columns) + update_per_step: int = int(np.ceil(total * update_every_percent)) + cores = multiprocessing.cpu_count() - 2 + logger.debug(f"Processing {total:,} samples through zFPKM transform using {cores} cores") + logger.debug( + f"Will update every {update_per_step:,} steps as this is approximately " + f"{update_every_percent:.1%} of {total:,}" + ) + + with Pool(processes=cores) as pool: + kernel = KernelDensity(kernel="gaussian", bandwidth=bandwidth) + chunksize = int(math.ceil(len(fpkm_df.columns) / (4 * cores))) + partial_func = partial(_zfpkm_calculation, kernel=kernel, peak_parameters=peak_parameters) + chunk_time = time.time() + start_time = time.time() + + log_padding = len(str(f"{total:,}")) + zfpkm_df = pd.DataFrame(data=0, index=fpkm_df.index, columns=fpkm_df.columns) + results: dict[str, _ZFPKMResult] = {} + result: _ZFPKMResult + for i, result in enumerate( + pool.imap( + partial_func, + (fpkm_df[col] for col in fpkm_df.columns), + chunksize=chunksize, + ) + ): + key = str(result.zfpkm.name) + results[key] = result + zfpkm_df[key] = result.zfpkm + + # show updates every X% and at the end, but skip on first iteration + if i != 0 and (i % update_per_step == 0 or i == total): + current_time = time.time() + chunk = current_time - chunk_time + total_time = current_time - start_time + formatted = f"{i:,}" + logger.debug( + f"Processed {formatted:>{log_padding}} of {total:,} - " + f"chunk took {chunk:.1f} seconds - " + f"running for {total_time:.1f} seconds" + ) + chunk_time = current_time + return results, zfpkm_df + ) rnaseq_output_filepath.parent.mkdir(parents=True, exist_ok=True) From dd8698e3a9b7ca1311b1d0beda96150d8d45ea77 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:14:50 -0600 Subject: [PATCH 55/91] feat: added zfpkm plotting --- main/como/rnaseq_gen.py | 67 ++++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 17 deletions(-) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 7b88962e..7c35ea3b 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -424,25 +424,58 @@ def zfpkm_transform( chunk_time = current_time return results, zfpkm_df + +def zfpkm_plot(results, *, plot_xfloor: int = -4, subplot_titles: bool = True): + """Plot the log2(FPKM) density and fitted Gaussian for each sample. + + :param results: A dictionary of intermediate results from zfpkm_transform. + :param: subplot_titles: Whether to display facet titles (sample names). + :param plot_xfloor: Lower limit for the x-axis. + :param subplot_titles: Whether to display facet titles (sample names). + """ + mega_df = pd.DataFrame(columns=["sample_name", "log2fpkm", "fpkm_density", "fitted_density_scaled"]) + for name, result in results.items(): + stddev = result.std_dev + x = np.array(result.density.x) + y = np.array(result.density.y) + + fitted = np.exp(-0.5 * ((x - result.mu) / stddev) ** 2) / (stddev * np.sqrt(2 * np.pi)) + max_fpkm = y.max() + max_fitted = fitted.max() + scale_fitted = fitted * (max_fpkm / max_fitted) + + df = pd.DataFrame( + { + "sample_name": [name] * len(x), + "log2fpkm": x, + "fpkm_density": y, + "fitted_density_scaled": scale_fitted, + } ) - rnaseq_output_filepath.parent.mkdir(parents=True, exist_ok=True) - - await save_rnaseq_tests( - context_name=context_name, - counts_matrix_filepath=rnaseq_input_filepath, - config_filepath=config_filepath, - output_filepath=rnaseq_output_filepath.as_posix(), - gene_info_filepath=gene_info_filepath, - prep=prep, - replicate_ratio=replicate_ratio, - batch_ratio=batch_ratio, - high_replicate_ratio=replicate_ratio_high, - high_batch_ratio=batch_ratio_high, - technique=technique, - cut_off=cut_off, - taxon_id=taxon, + mega_df = pd.concat([mega_df, df], ignore_index=True) + + mega_df = mega_df.melt(id_vars=["log2fpkm", "sample_name"], var_name="source", value_name="density") + subplot_titles = list(results.keys()) if subplot_titles else None + fig = make_subplots( + rows=len(results), + cols=1, + subplot_titles=subplot_titles, + vertical_spacing=min(0.05, (1 / (len(results) - 1))), + ) + + for i, (name, group) in enumerate(mega_df.groupby("sample_name"), start=1): + fig.add_trace( + trace=go.Scatter(x=group["log2fpkm"], y=group["density"], mode="lines", name=name, legendgroup=name), + row=i, + col=1, ) - logger.success(f"Results saved at '{rnaseq_output_filepath}'") + fig.update_xaxes(title_text="log2(FPKM)", range=[plot_xfloor, max(group["log2fpkm"].tolist())], row=i, col=1) + fig.update_yaxes(title_text="density [scaled]", row=i, col=1) + fig.update_layout(legend_tracegroupgap=0) + + fig.update_layout(height=600 * len(results), width=1000, title_text="zFPKM Plots", showlegend=True) + fig.write_image("zfpkm_plot.png") + def zfpkm_filter(*, metrics: NamedMetrics, filtering_options: _FilteringOptions, calcualte_fpkm: bool) -> NamedMetrics: From 01db1efc7fbe76bb49579b7b68a52b2ab6b7c36e Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:15:03 -0600 Subject: [PATCH 56/91] feat: aded calculate z score --- main/como/rnaseq_gen.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 7c35ea3b..6b57dd55 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -477,6 +477,16 @@ def zfpkm_plot(results, *, plot_xfloor: int = -4, subplot_titles: bool = True): fig.write_image("zfpkm_plot.png") +def calculate_z_score(metrics: NamedMetrics) -> NamedMetrics: + """Calculate the z-score for each sample in the metrics dictionary.""" + for sample in metrics: + log_matrix = np.log(metrics[sample].normalization_matrix) + z_matrix = pd.DataFrame( + data=sklearn.preprocessing.scale(log_matrix, axis=1), columns=metrics[sample].sample_names + ) + metrics[sample].z_score_matrix = z_matrix + return metrics + def zfpkm_filter(*, metrics: NamedMetrics, filtering_options: _FilteringOptions, calcualte_fpkm: bool) -> NamedMetrics: """Apply zFPKM filtering to the FPKM matrix for a given sample.""" From a47f752059e68241588dd3bbcc6a34a119729615 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:15:15 -0600 Subject: [PATCH 57/91] feat: added cpm filtering --- main/como/rnaseq_gen.py | 48 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 6b57dd55..4d04dcef 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -488,6 +488,54 @@ def calculate_z_score(metrics: NamedMetrics) -> NamedMetrics: return metrics +def cpm_filter( + *, + context_name: str, + metrics: NamedMetrics, + filtering_options: _FilteringOptions, + prep: RNAPrepMethod, +) -> NamedMetrics: + """Apply Counts Per Million (CPM) filtering to the count matrix for a given sample.""" + config = Config() + n_exp = filtering_options.replicate_ratio + n_top = filtering_options.high_replicate_ratio + cut_off = filtering_options.cut_off + + sample: str + metric: _StudyMetrics + for sample, metric in metrics.items(): + counts: pd.DataFrame = metric.count_matrix + entrez_ids: list[str] = metric.entrez_gene_ids + library_size: pd.DataFrame = counts.sum(axis=1) + + # For library_sizes equal to 0, add 1 to prevent divide by 0 + # This will not impact the final counts per million calculation because the original counts are still 0 + # thus, (0 / 1) * 1_000_000 = 0 + library_size[library_size == 0] = 1 + + output_filepath = config.result_dir / context_name / prep.value / f"CPM_Matrix_{prep.value}_{sample}.csv" + output_filepath.parent.mkdir(parents=True, exist_ok=True) + counts_per_million: pd.DataFrame = (counts / library_size) * 1_000_000 + counts_per_million.insert(0, "entrez_gene_ids", pd.Series(entrez_ids)) + logger.debug(f"Writing CPM matrix to {output_filepath}") + counts_per_million.to_csv(output_filepath, index=False) + + # TODO: Counts per million is adding ~61,500 columns (equal to the number of genes) for some reason. + # Most likely due to multiplying by 1_000_000, not exactly sure why + + min_samples = round(n_exp * len(counts.columns)) # noqa: F841 + top_samples = round(n_top * len(counts.columns)) # noqa: F841 + test_bools = pd.DataFrame({"entrez_gene_ids": entrez_ids}) + for i in range(len(counts_per_million.columns)): + cutoff = ( + 10e6 / (np.median(np.sum(counts[:, i]))) + if cut_off == "default" + else 1e6 * cut_off / np.median(np.sum(counts[:, i])) + ) + test_bools = test_bools.merge(counts_per_million[counts_per_million.iloc[:, i] > cutoff]) + + return metrics + def zfpkm_filter(*, metrics: NamedMetrics, filtering_options: _FilteringOptions, calcualte_fpkm: bool) -> NamedMetrics: """Apply zFPKM filtering to the FPKM matrix for a given sample.""" min_sample_expression = filtering_options.replicate_ratio From b6460c22de205a9932872e19806b062f5d05aeeb Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:15:35 -0600 Subject: [PATCH 58/91] feat: added tpm quantile filtering --- main/como/rnaseq_gen.py | 48 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 4d04dcef..6ec871e3 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -536,6 +536,54 @@ def cpm_filter( return metrics + +def tpm_quantile_filter(*, metrics: NamedMetrics, filtering_options: _FilteringOptions) -> NamedMetrics: + """Apply quantile-based filtering to the TPM matrix for a given sample.""" + # TODO: Write the TPM matrix to disk + + n_exp = filtering_options.replicate_ratio + n_top = filtering_options.high_replicate_ratio + cut_off = filtering_options.cut_off + metrics = calculate_tpm(metrics) + + sample: str + metric: _StudyMetrics + for sample, metric in metrics.items(): + entrez_ids = metric.entrez_gene_ids + gene_size = metric.gene_sizes + tpm_matrix: pd.DataFrame = metric.normalization_matrix + + min_samples = round(n_exp * len(tpm_matrix.columns)) + top_samples = round(n_top * len(tpm_matrix.columns)) + + tpm_quantile = tpm_matrix[tpm_matrix > 0] + quantile_cutoff = np.quantile( + a=tpm_quantile.values, q=1 - (cut_off / 100), axis=0 + ) # Compute quantile across columns + boolean_expression = pd.DataFrame( + data=tpm_matrix > quantile_cutoff, index=tpm_matrix.index, columns=tpm_matrix.columns + ).astype(int) + + min_func = k_over_a(min_samples, 0.9) + top_func = k_over_a(top_samples, 0.9) + + min_genes: npt.NDArray[bool] = genefilter(boolean_expression, min_func) + top_genes: npt.NDArray[bool] = genefilter(boolean_expression, top_func) + + # Only keep `entrez_gene_ids` that pass `min_genes` + metric.entrez_gene_ids = [gene for gene, keep in zip(entrez_ids, min_genes) if keep] + metric.gene_sizes = [gene for gene, keep in zip(gene_size, min_genes) if keep] + metric.count_matrix = metric.count_matrix.iloc[min_genes, :] + metric.normalization_matrix = metrics[sample].normalization_matrix.iloc[min_genes, :] + + keep_top_genes = [gene for gene, keep in zip(entrez_ids, top_genes) if keep] + metric.high_confidence_entrez_gene_ids = [gene for gene, keep in zip(entrez_ids, keep_top_genes) if keep] + + metrics = calculate_z_score(metrics) + + return metrics + + def zfpkm_filter(*, metrics: NamedMetrics, filtering_options: _FilteringOptions, calcualte_fpkm: bool) -> NamedMetrics: """Apply zFPKM filtering to the FPKM matrix for a given sample.""" min_sample_expression = filtering_options.replicate_ratio From e0c98b430e4b1c14801b131efdfd406b8a36d470 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:15:53 -0600 Subject: [PATCH 59/91] feat: added root filtering logic --- main/como/rnaseq_gen.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 6ec871e3..745d519b 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -619,9 +619,29 @@ def zfpkm_filter(*, metrics: NamedMetrics, filtering_options: _FilteringOptions, return metrics -async def rnaseq_gen( - # config_filepath: Path, - config_filename: str, +def filter_counts( + *, + context_name: str, + metrics: NamedMetrics, + technique: FilteringTechnique, + filtering_options: _FilteringOptions, + prep: RNAPrepMethod, +) -> NamedMetrics: + """Filter the count matrix based on the specified technique.""" + match technique: + case FilteringTechnique.cpm: + return cpm_filter( + context_name=context_name, metrics=metrics, filtering_options=filtering_options, prep=prep + ) + case FilteringTechnique.tpm: + return tpm_quantile_filter(metrics=metrics, filtering_options=filtering_options) + case FilteringTechnique.zfpkm: + return zfpkm_filter(metrics=metrics, filtering_options=filtering_options, calcualte_fpkm=True) + case FilteringTechnique.umi: + return zfpkm_filter(metrics=metrics, filtering_options=filtering_options, calcualte_fpkm=False) + case _: + raise ValueError(f"Technique must be one of {FilteringTechnique}") + prep: RNAPrepMethod, taxon_id: int | str | Taxon, replicate_ratio: float = 0.5, From a4d6f2f94099639b8fea828d06e6b3f5ab9545e5 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:16:15 -0600 Subject: [PATCH 60/91] feat: added logic for performing statistical tests --- main/como/rnaseq_gen.py | 76 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 745d519b..b430c916 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -642,8 +642,82 @@ def filter_counts( case _: raise ValueError(f"Technique must be one of {FilteringTechnique}") + +async def _save_rnaseq_tests( + context_name: str, + rnaseq_matrix: pd.DataFrame, + metadata_df: pd.DataFrame, + gene_info_df: pd.DataFrame, + output_filepath: Path, prep: RNAPrepMethod, - taxon_id: int | str | Taxon, + taxon: int, + replicate_ratio: float, + batch_ratio: float, + high_replicate_ratio: float, + high_batch_ratio: float, + technique: FilteringTechnique, + cut_off: int | float, +): + """Save the results of the RNA-Seq tests to a CSV file.""" + filtering_options = _FilteringOptions( + replicate_ratio=replicate_ratio, + batch_ratio=batch_ratio, + cut_off=cut_off, + high_replicate_ratio=high_replicate_ratio, + high_batch_ratio=high_batch_ratio, + ) + + read_counts_results: _ReadMatrixResults = await _build_matrix_results( + matrix=rnaseq_matrix, + gene_info=gene_info_df, + metadata_df=metadata_df, + taxon=taxon, + ) + metrics = read_counts_results.metrics + entrez_gene_ids = read_counts_results.entrez_gene_ids + + metrics = filter_counts( + context_name=context_name, + metrics=metrics, + technique=technique, + filtering_options=filtering_options, + prep=prep, + ) + + expressed_genes: list[str] = [] + top_genes: list[str] = [] + for metric in metrics.values(): + expressed_genes.extend(metric.entrez_gene_ids) + top_genes.extend(metric.high_confidence_entrez_gene_ids) + + expression_frequency = pd.Series(expressed_genes).value_counts() + expression_df = pd.DataFrame( + {"entrez_gene_id": expression_frequency.index, "frequency": expression_frequency.values} + ) + expression_df["prop"] = expression_df["frequency"] / len(metrics) + expression_df = expression_df[expression_df["prop"] >= filtering_options.batch_ratio] + + top_frequency = pd.Series(top_genes).value_counts() + top_df = pd.DataFrame({"entrez_gene_id": top_frequency.index, "frequency": top_frequency.values}) + top_df["prop"] = top_df["frequency"] / len(metrics) + top_df = top_df[top_df["prop"] >= filtering_options.high_batch_ratio] + + boolean_matrix = pd.DataFrame(data={"entrez_gene_id": entrez_gene_ids, "expressed": 0, "high": 0}) + for gene in entrez_gene_ids: + if gene in expression_df["entrez_gene_id"]: + boolean_matrix.loc[gene, "expressed"] = 1 + if gene in top_df["entrez_gene_id"]: + boolean_matrix.loc[gene, "high"] = 1 + + expressed_count = len(boolean_matrix[boolean_matrix["expressed"] == 1]) + high_confidence_count = len(boolean_matrix[boolean_matrix["high"] == 1]) + + boolean_matrix.to_csv(output_filepath, index=False) + logger.info( + f"{context_name} - Found {expressed_count} expressed and {high_confidence_count} confidently expressed genes" + ) + logger.success(f"Wrote boolean matrix to {output_filepath}") + replicate_ratio: float = 0.5, high_replicate_ratio: float = 1.0, batch_ratio: float = 0.5, From e9665bf168d7d5724b748830040041db96bee79c Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:16:29 -0600 Subject: [PATCH 61/91] feat: create metadata df --- main/como/rnaseq_gen.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index b430c916..f86b1b17 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -718,6 +718,15 @@ async def _save_rnaseq_tests( ) logger.success(f"Wrote boolean matrix to {output_filepath}") + +async def _create_metadata_df(path: Path) -> pd.DataFrame: + if path.suffix not in {".xls", ".xlsx"}: + raise ValueError( + f"Expected an excel file with extension of '.xlsx' or '.xls', got '{path.suffix}'. " + f"Attempted to process: {path}" + ) + return pd.read_excel(path) + replicate_ratio: float = 0.5, high_replicate_ratio: float = 1.0, batch_ratio: float = 0.5, From dc7c1fec6ec09beb8712676ad2e808a860525fbc Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:16:54 -0600 Subject: [PATCH 62/91] refactor: allow passing specific filepaths --- main/como/rnaseq_gen.py | 65 +++++++++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 19 deletions(-) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index f86b1b17..13619059 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -727,6 +727,16 @@ async def _create_metadata_df(path: Path) -> pd.DataFrame: ) return pd.read_excel(path) + +async def rnaseq_gen( # noqa: C901, allow complex function + context_name: str, + input_rnaseq_filepath: Path, + input_gene_info_filepath: Path, + output_rnaseq_filepath: Path, + prep: RNAPrepMethod, + taxon: int, + input_metadata_filepath: Path | None = None, + input_metadata_df: pd.DataFrame | None = None, replicate_ratio: float = 0.5, high_replicate_ratio: float = 1.0, batch_ratio: float = 0.5, @@ -740,9 +750,9 @@ async def _create_metadata_df(path: Path) -> pd.DataFrame: then study/batch numbers are checked for consensus according to batch ratios. The zFPKM method is outlined here: https://pubmed.ncbi.nlm.nih.gov/24215113/ - :param config_filename: The configuration filename to read + :param metadata_filepath: The configuration filename to read :param prep: The preparation method - :param taxon_id: The NCBI Taxon ID + :param taxon: The NCBI Taxon ID :param replicate_ratio: The percentage of replicates that a gene must appear in for a gene to be marked as "active" in a batch/study :param batch_ratio: The percentage of batches that a gene must appear in for a gene to be marked as 'active" @@ -754,41 +764,58 @@ async def _create_metadata_df(path: Path) -> pd.DataFrame: :param cut_off: The cutoff value to use for the provided filtering technique :return: None """ - if isinstance(technique, str): - technique = FilteringTechnique(technique.lower()) - if isinstance(taxon_id, (str, int)): - taxon_id = Taxon.from_string(str(taxon_id)) + if not input_metadata_df and not input_metadata_filepath: + raise ValueError("At least one of input_metadata_filepath or input_metadata_df must be provided") + + technique = ( + FilteringTechnique.from_string(str(technique.lower())) if isinstance(technique, (str, int)) else technique + ) match technique: case FilteringTechnique.tpm: - cut_off = 25 if cut_off is None else cut_off + cut_off = cut_off or 25 if cut_off < 1 or cut_off > 100: raise ValueError("Quantile must be between 1 - 100") case FilteringTechnique.cpm: - if cut_off is not None and cut_off < 0: + if cut_off and cut_off < 0: raise ValueError("Cutoff must be greater than 0") - elif cut_off is None: + elif cut_off: cut_off = "default" case FilteringTechnique.zfpkm: - cut_off = "default" if cut_off is None else cut_off + cut_off = "default" if cut_off else cut_off case FilteringTechnique.umi: pass case _: raise ValueError(f"Technique must be one of {FilteringTechnique}") - await _handle_context_batch( - config_filename=config_filename, + if not input_rnaseq_filepath.exists(): + raise FileNotFoundError(f"Input RNA-seq file not found! Searching for: '{input_rnaseq_filepath}'") + + if prep == RNAPrepMethod.SCRNA: + technique = FilteringTechnique.umi + logger.warning( + "Single cell filtration does not normalize and assumes " + "gene counts are counted with Unique Molecular Identifiers (UMIs). " + "Setting filtering technique to UMI now." + ) + + logger.debug(f"Starting '{context_name}'") + output_rnaseq_filepath.parent.mkdir(parents=True, exist_ok=True) + + await _save_rnaseq_tests( + context_name=context_name, + rnaseq_matrix=await _read_counts(input_rnaseq_filepath), + metadata_df=input_metadata_df or await _create_metadata_df(input_metadata_filepath), + gene_info_df=pd.read_csv(input_gene_info_filepath), + output_filepath=output_rnaseq_filepath, + prep=prep, + taxon=taxon, replicate_ratio=replicate_ratio, - replicate_ratio_high=high_replicate_ratio, batch_ratio=batch_ratio, - batch_ratio_high=high_batch_ratio, + high_replicate_ratio=high_replicate_ratio, + high_batch_ratio=high_batch_ratio, technique=technique, cut_off=cut_off, - prep=prep, - taxon=taxon_id, - ) - - ) From f35e09a263ee552d42e188ab71e2c5c52dc686c2 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Mon, 9 Dec 2024 15:17:25 -0600 Subject: [PATCH 63/91] refactor: rename variable names for easier reuse --- main/COMO.ipynb | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/main/COMO.ipynb b/main/COMO.ipynb index d361f9b2..b721bbbb 100644 --- a/main/COMO.ipynb +++ b/main/COMO.ipynb @@ -276,22 +276,22 @@ "from como.types import RNAPrepMethod\n", "\n", "context_names = [\"naiveB\"]\n", - "output_gene_info_filepaths = [Path(f\"data/results/{context}/gene_info.csv\") for context in context_names]\n", - "como_context_dirs = [Path(f\"data/COMO_input/{context}\") for context in context_names]\n", - "output_trna_filepaths = [Path(f\"data/results/{context}/total-rna/totalrna_{context}.csv\") for context in context_names]\n", - "output_polya_filepaths = [Path(f\"data/results/{context}/polya-rna/polyarna_{context}.csv\") for context in context_names]\n", + "gene_info_filepath = [Path(f\"data/results/{context}/gene_info.csv\") for context in context_names]\n", + "como_context_dir = [Path(f\"data/COMO_input/{context}\") for context in context_names]\n", + "trna_matrix_filepath = [Path(f\"data/results/{context}/total-rna/totalrna_{context}.csv\") for context in context_names]\n", + "polya_matrix_filepath = [Path(f\"data/results/{context}/polya-rna/polyarna_{context}.csv\") for context in context_names]\n", "\n", "\n", "for i in range(len(context_names)):\n", " await rnaseq_preprocess(\n", " context_name=context_names[i],\n", " taxon=9606,\n", - " output_gene_info_filepath=output_gene_info_filepaths[i],\n", - " como_context_dir=como_context_dirs[i],\n", + " output_gene_info_filepath=gene_info_filepath[i],\n", + " como_context_dir=como_context_dir[i],\n", " output_trna_config_filepath=Path(\"./data/config_sheets/trna_config.xlsx\"),\n", - " output_trna_count_matrix_filepath=output_trna_filepaths[i],\n", + " output_trna_count_matrix_filepath=trna_matrix_filepath[i],\n", " output_polya_config_filepath=Path(\"./data/config_sheets/mrna_config.xlsx\"),\n", - " output_polya_count_matrix_filepath=output_polya_filepaths[i],\n", + " output_polya_count_matrix_filepath=polya_matrix_filepath[i],\n", " cache=True,\n", " log_level=\"INFO\",\n", " )" @@ -373,7 +373,7 @@ "metadata": {}, "outputs": [], "source": [ - "# step 2.2 RNA-seq Analysis for Total RNA-seq library preparation\n", + "from como.rnaseq_gen import rnaseq_gen\n", "\n", "trnaseq_config_file = \"trnaseq_data_inputs_auto.xlsx\"\n", "rep_ratio = 0.75\n", @@ -438,6 +438,11 @@ "minimum_cutoff = -3\n", "taxon_id = \"human\"\n", "\n", + "await rnaseq_gen(\n", + " context_name=\"naiveB\",\n", + " input_rnaseq_filepath=\n", + ")\n", + "\n", "# fmt: off\n", "cmd = \" \".join(\n", " [\n", From fc803bcc5298023bd563b29a1547fd488c9e4944 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Tue, 10 Dec 2024 10:17:35 -0600 Subject: [PATCH 64/91] style: update log message, more pythonic code --- main/COMO.ipynb | 195 +++++++++++++++++---------------- main/como/rnaseq_preprocess.py | 8 +- 2 files changed, 104 insertions(+), 99 deletions(-) diff --git a/main/COMO.ipynb b/main/COMO.ipynb index b721bbbb..24d4b83c 100644 --- a/main/COMO.ipynb +++ b/main/COMO.ipynb @@ -223,6 +223,32 @@ "" ] }, + { + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-09T22:01:53.960204Z", + "start_time": "2024-12-09T22:01:53.957844Z" + } + }, + "cell_type": "code", + "source": [ + "from pathlib import Path\n", + "\n", + "taxon_id = 9606\n", + "context_names = [\"naiveB\"]\n", + "gene_info_filepath = [Path(f\"data/results/{context}/gene_info.csv\") for context in context_names]\n", + "como_context_dir = [Path(f\"data/COMO_input/{context}\") for context in context_names]\n", + "trna_matrix_filepath = [Path(f\"data/results/{context}/trna-rna/trna_{context}.csv\") for context in context_names]\n", + "polya_matrix_filepath = [Path(f\"data/results/{context}/polya-rna/polyarna_{context}.csv\") for context in context_names]\n", + "\n", + "# No single-cell data is provided by default; COMO accepts single-cell data in CSV or h5ad format\n", + "# If you are using single-cell data, adjust the following lines accordingly\n", + "scrna_matrix_filepath = [Path(f\"data/results/{context}/scrna/scrna_{context}.csv\") for context in context_names]\n", + "# scrna_matrix_filepath = [Path(f\"data/results/{context}/scrna/scrna_{context}.h5ad\") for context in context_names]\n" + ], + "outputs": [], + "execution_count": 3 + }, { "cell_type": "markdown", "metadata": {}, @@ -270,27 +296,19 @@ } ], "source": [ - "from pathlib import Path\n", - "\n", "from como.rnaseq_preprocess import rnaseq_preprocess\n", "from como.types import RNAPrepMethod\n", "\n", - "context_names = [\"naiveB\"]\n", - "gene_info_filepath = [Path(f\"data/results/{context}/gene_info.csv\") for context in context_names]\n", - "como_context_dir = [Path(f\"data/COMO_input/{context}\") for context in context_names]\n", - "trna_matrix_filepath = [Path(f\"data/results/{context}/total-rna/totalrna_{context}.csv\") for context in context_names]\n", - "polya_matrix_filepath = [Path(f\"data/results/{context}/polya-rna/polyarna_{context}.csv\") for context in context_names]\n", - "\n", "\n", "for i in range(len(context_names)):\n", " await rnaseq_preprocess(\n", " context_name=context_names[i],\n", - " taxon=9606,\n", + " taxon=taxon_id,\n", " output_gene_info_filepath=gene_info_filepath[i],\n", " como_context_dir=como_context_dir[i],\n", " output_trna_config_filepath=Path(\"./data/config_sheets/trna_config.xlsx\"),\n", " output_trna_count_matrix_filepath=trna_matrix_filepath[i],\n", - " output_polya_config_filepath=Path(\"./data/config_sheets/mrna_config.xlsx\"),\n", + " output_polya_config_filepath=Path(\"./data/config_sheets/polya_config.xlsx\"),\n", " output_polya_count_matrix_filepath=polya_matrix_filepath[i],\n", " cache=True,\n", " log_level=\"INFO\",\n", @@ -373,35 +391,31 @@ "metadata": {}, "outputs": [], "source": [ - "from como.rnaseq_gen import rnaseq_gen\n", + "from como.rnaseq_gen import rnaseq_gen, FilteringTechnique\n", "\n", - "trnaseq_config_file = \"trnaseq_data_inputs_auto.xlsx\"\n", - "rep_ratio = 0.75\n", - "group_ratio = 0.75\n", - "rep_ratio_h = 1.0\n", - "group_ratio_h = 1.0\n", - "technique = \"zFPKM\"\n", - "minimum_cutoff = -3\n", - "taxon_id = \"human\"\n", - "\n", - "# fmt: off\n", - "cmd = \" \".join(\n", - " [\n", - " \"python3\", \"como/rnaseq_gen.py\",\n", - " \"--config-file\", trnaseq_config_file,\n", - " \"--replicate-ratio\", str(rep_ratio),\n", - " \"--batch-ratio\", str(group_ratio),\n", - " \"--high-replicate-ratio\", str(rep_ratio_h),\n", - " \"--high-batch-ratio\", str(group_ratio_h),\n", - " \"--minimum-cutoff\", str(minimum_cutoff),\n", - " \"--filt-technique\", f\"{technique}\",\n", - " \"--library-prep\", \"total\",\n", - " \"--taxon-id\", taxon_id\n", - " ]\n", - ")\n", - "# fmt: on\n", - "\n", - "!{cmd}" + "replicate_ratio = 0.75\n", + "high_confidence_replicate_ratio = 1.0\n", + "batch_ratio = 0.75\n", + "high_confidence_batch_ratio = 1.0\n", + "technique = FilteringTechnique.zfpkm\n", + "cutoff = -3\n", + "\n", + "for i, context in enumerate(context_names):\n", + " await rnaseq_gen(\n", + " context_name=context,\n", + " input_rnaseq_filepath=trna_matrix_filepath[i],\n", + " input_gene_info_filepath=gene_info_filepath[i],\n", + " output_rnaseq_filepath=trna_matrix_filepath[i],\n", + " prep=RNAPrepMethod.TOTAL,\n", + " taxon=taxon_id,\n", + " input_metadata_filepath=Path(\"./data/config_sheets/trna_config.xlsx\"),\n", + " replicate_ratio=replicate_ratio,\n", + " high_replicate_ratio=high_confidence_replicate_ratio,\n", + " batch_ratio=batch_ratio,\n", + " high_batch_ratio=high_confidence_batch_ratio,\n", + " technique=technique,\n", + " cutoff=cutoff\n", + " )" ] }, { @@ -429,38 +443,31 @@ "metadata": {}, "outputs": [], "source": [ - "mrnaseq_config_file = \"mrnaseq_data_inputs_auto.xlsx\"\n", - "rep_ratio = 0.75\n", - "group_ratio = 0.75\n", - "rep_ratio_h = 1.0\n", - "group_ratio_h = 1.0\n", - "technique = \"zfpkm\"\n", - "minimum_cutoff = -3\n", - "taxon_id = \"human\"\n", - "\n", - "await rnaseq_gen(\n", - " context_name=\"naiveB\",\n", - " input_rnaseq_filepath=\n", - ")\n", + "from como.rnaseq_gen import rnaseq_gen, FilteringTechnique\n", "\n", - "# fmt: off\n", - "cmd = \" \".join(\n", - " [\n", - " \"python3\", \"como/rnaseq_gen.py\",\n", - " \"--config-file\", mrnaseq_config_file,\n", - " \"--replicate-ratio\", str(rep_ratio),\n", - " \"--batch-ratio\", str(group_ratio),\n", - " \"--high-replicate-ratio\", str(rep_ratio_h),\n", - " \"--high-batch-ratio\", str(group_ratio_h),\n", - " \"--minimum-cutoff\", str(minimum_cutoff),\n", - " \"--filt-technique\", f\"{technique}\",\n", - " \"--library-prep\", \"mrna\",\n", - " \"--taxon-id\", taxon_id\n", - " ]\n", - ")\n", - "# fmt: on\n", - "\n", - "!{cmd}" + "replicate_ratio = 0.75\n", + "high_confidence_replicate_ratio = 1.0\n", + "batch_ratio = 0.75\n", + "high_confidence_batch_ratio = 1.0\n", + "technique = FilteringTechnique.zfpkm\n", + "cutoff = -3\n", + "\n", + "for i, context in enumerate(context_names):\n", + " await rnaseq_gen(\n", + " context_name=context,\n", + " input_rnaseq_filepath=polya_matrix_filepath[i],\n", + " input_gene_info_filepath=gene_info_filepath[i],\n", + " output_rnaseq_filepath=polya_matrix_filepath[i],\n", + " prep=RNAPrepMethod.MRNA,\n", + " taxon=taxon_id,\n", + " input_metadata_filepath=Path(\"./data/config_sheets/mrna_config.xlsx\"),\n", + " replicate_ratio=replicate_ratio,\n", + " high_replicate_ratio=high_confidence_replicate_ratio,\n", + " batch_ratio=batch_ratio,\n", + " high_batch_ratio=high_confidence_batch_ratio,\n", + " technique=technique,\n", + " cutoff=cutoff\n", + " )" ] }, { @@ -488,33 +495,31 @@ "metadata": {}, "outputs": [], "source": [ - "scrnaseq_config_file = \"scrnaseq_data_inputs_auto.xlsx\"\n", - "rep_ratio = 0.75\n", - "group_ratio = 0.75\n", - "rep_ratio_h = 1.0\n", - "group_ratio_h = 1.0\n", - "quantile = 50\n", - "minimum_cutoff = -3\n", - "taxon_id = \"human\"\n", - "\n", - "# fmt: off\n", - "cmd = \" \".join(\n", - " [\n", - " \"python3\", \"como/rnaseq_gen.py\",\n", - " \"--config-file\", scrnaseq_config_file,\n", - " \"--replicate-ratio\", str(rep_ratio),\n", - " \"--batch-ratio\", str(group_ratio),\n", - " \"--high-replicate-ratio\", str(rep_ratio_h),\n", - " \"--high-batch-ratio\", str(group_ratio_h),\n", - " \"--minimum-cutoff\", str(minimum_cutoff),\n", - " \"--filt-technique\", \"umi\",\n", - " \"--library-prep\", \"scrna\",\n", - " \"--taxon-id\", taxon_id\n", - " ]\n", - ")\n", - "# fmt: on\n", + "from como.rnaseq_gen import rnaseq_gen, FilteringTechnique\n", "\n", - "!{cmd}" + "replicate_ratio = 0.75\n", + "high_confidence_replicate_ratio = 1.0\n", + "batch_ratio = 0.75\n", + "high_confidence_batch_ratio = 1.0\n", + "technique = FilteringTechnique.umi\n", + "cutoff = -3\n", + "\n", + "for i, context in enumerate(context_names):\n", + " await rnaseq_gen(\n", + " context_name=context,\n", + " input_rnaseq_filepath=scrna_matrix_filepath[i],\n", + " input_gene_info_filepath=gene_info_filepath[i],\n", + " output_rnaseq_filepath=scrna_matrix_filepath[i],\n", + " prep=RNAPrepMethod.SCRNA,\n", + " taxon=taxon_id,\n", + " input_metadata_filepath=Path(\"./data/config_sheets/scrna_config.xlsx\"),\n", + " replicate_ratio=replicate_ratio,\n", + " high_replicate_ratio=high_confidence_replicate_ratio,\n", + " batch_ratio=batch_ratio,\n", + " high_batch_ratio=high_confidence_batch_ratio,\n", + " technique=technique,\n", + " cutoff=cutoff\n", + " )" ] }, { diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index ae8bcdea..bfa19bc7 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -448,11 +448,11 @@ async def read_counts(file: Path) -> list[str]: ) return conversion["entrez_gene_id"].tolist() - logger.info("Fetching gene info (this may take 1-5 minutes)") + logger.info( + "Fetching gene info (this may take 1-5 minutes depending on the number of genes and your internet connection)" + ) genes = set(chain.from_iterable(await asyncio.gather(*[read_counts(f) for f in counts_matrix_filepaths]))) - - mygene = MyGene(cache=cache) - gene_data = await mygene.query(items=list(genes), taxon=taxon, scopes="entrezgene") + gene_data = await MyGene(cache=cache).query(items=list(genes), taxon=taxon, scopes="entrezgene") gene_info: pd.DataFrame = pd.DataFrame( data=None, columns=pd.Index(data=["ensembl_gene_id", "gene_symbol", "entrez_gene_id", "start_position", "end_position"]), From 8bdddd911680e2ab4a2865aef8dfef25c6707ba0 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Tue, 10 Dec 2024 10:17:49 -0600 Subject: [PATCH 65/91] style: variable rename --- main/como/rnaseq_gen.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 13619059..05f185f3 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -742,7 +742,7 @@ async def rnaseq_gen( # noqa: C901, allow complex function batch_ratio: float = 0.5, high_batch_ratio: float = 1.0, technique: FilteringTechnique | str = FilteringTechnique.tpm, - cut_off: int | float | None = None, + cutoff: int | float | None = None, ) -> None: """Generate a list of active and high-confidence genes from a gene count matrix. @@ -761,7 +761,7 @@ async def rnaseq_gen( # noqa: C901, allow complex function :param high_batch_ratio: The percentage of batches that a gene must appear in for a gene to be marked "highly confident" in its expression :param technique: The filtering technique to use - :param cut_off: The cutoff value to use for the provided filtering technique + :param cutoff: The cutoff value to use for the provided filtering technique :return: None """ if not input_metadata_df and not input_metadata_filepath: @@ -773,18 +773,18 @@ async def rnaseq_gen( # noqa: C901, allow complex function match technique: case FilteringTechnique.tpm: - cut_off = cut_off or 25 - if cut_off < 1 or cut_off > 100: + cutoff = cutoff or 25 + if cutoff < 1 or cutoff > 100: raise ValueError("Quantile must be between 1 - 100") case FilteringTechnique.cpm: - if cut_off and cut_off < 0: + if cutoff and cutoff < 0: raise ValueError("Cutoff must be greater than 0") - elif cut_off: - cut_off = "default" + elif cutoff: + cutoff = "default" case FilteringTechnique.zfpkm: - cut_off = "default" if cut_off else cut_off + cutoff = "default" if cutoff else cutoff case FilteringTechnique.umi: pass case _: @@ -817,5 +817,5 @@ async def rnaseq_gen( # noqa: C901, allow complex function high_replicate_ratio=high_replicate_ratio, high_batch_ratio=high_batch_ratio, technique=technique, - cut_off=cut_off, + cut_off=cutoff, ) From e0d84bee1fbf4ea3a197bdd6af89f67d501d4872 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Tue, 10 Dec 2024 10:18:31 -0600 Subject: [PATCH 66/91] feat: update to match new approach --- main/COMO.ipynb | 123 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 87 insertions(+), 36 deletions(-) diff --git a/main/COMO.ipynb b/main/COMO.ipynb index 24d4b83c..758d719b 100644 --- a/main/COMO.ipynb +++ b/main/COMO.ipynb @@ -2,7 +2,11 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "jupyter": { + "source_hidden": true + } + }, "source": [ "# COMO: Constraint-based Optomization of Metabolic Objectives\n", "\n", @@ -56,7 +60,11 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "jupyter": { + "source_hidden": true + } + }, "source": [ "# Step 1: Data Preprocessing and Analysis\n", "\n", @@ -224,30 +232,33 @@ ] }, { + "cell_type": "code", + "execution_count": 9, "metadata": { "ExecuteTime": { - "end_time": "2024-12-09T22:01:53.960204Z", - "start_time": "2024-12-09T22:01:53.957844Z" + "end_time": "2024-12-09T22:10:43.421233Z", + "start_time": "2024-12-09T22:10:43.418100Z" } }, - "cell_type": "code", + "outputs": [], "source": [ "from pathlib import Path\n", "\n", + "from como.rnaseq_preprocess import rnaseq_preprocess\n", + "from como.types import RNAPrepMethod\n", + "\n", "taxon_id = 9606\n", "context_names = [\"naiveB\"]\n", "gene_info_filepath = [Path(f\"data/results/{context}/gene_info.csv\") for context in context_names]\n", "como_context_dir = [Path(f\"data/COMO_input/{context}\") for context in context_names]\n", - "trna_matrix_filepath = [Path(f\"data/results/{context}/trna-rna/trna_{context}.csv\") for context in context_names]\n", + "trna_matrix_filepath = [Path(f\"data/results/{context}/total-rna/totalrna_{context}.csv\") for context in context_names]\n", "polya_matrix_filepath = [Path(f\"data/results/{context}/polya-rna/polyarna_{context}.csv\") for context in context_names]\n", "\n", "# No single-cell data is provided by default; COMO accepts single-cell data in CSV or h5ad format\n", "# If you are using single-cell data, adjust the following lines accordingly\n", "scrna_matrix_filepath = [Path(f\"data/results/{context}/scrna/scrna_{context}.csv\") for context in context_names]\n", "# scrna_matrix_filepath = [Path(f\"data/results/{context}/scrna/scrna_{context}.h5ad\") for context in context_names]\n" - ], - "outputs": [], - "execution_count": 3 + ] }, { "cell_type": "markdown", @@ -262,7 +273,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2024-12-07T03:30:27.253112Z", @@ -274,32 +285,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001B[32m2024-12-06 23:12:10\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m629\u001B[0m - \u001B[1mTEST\u001B[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Starting...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001B[32m2024-12-06 23:12:11\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m274\u001B[0m - \u001B[32m\u001B[1mWrote gene count matrix for 'total' RNA at '/Users/joshl/Projects/COMO/main/data/results/naiveB/total-rna/totalrna_naiveB.csv'\u001B[0m\n", - "\u001B[32m2024-12-06 23:12:11\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m274\u001B[0m - \u001B[32m\u001B[1mWrote gene count matrix for 'polya' RNA at '/Users/joshl/Projects/COMO/main/data/results/naiveB/polya-rna/polyarna_naiveB.csv'\u001B[0m\n", - "\u001B[32m2024-12-06 23:12:11\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m451\u001B[0m - \u001B[1mFetching gene info (this may take 1-5 minutes)\u001B[0m\n", - "\u001B[32m2024-12-06 23:13:04\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m488\u001B[0m - \u001B[32m\u001B[1mGene Info file written at '/Users/joshl/Projects/COMO/main/data/results/naiveB/gene_info.csv'\u001B[0m\n" + "\u001b[32m2024-12-09 16:23:55\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mcomo.rnaseq_preprocess\u001b[0m:\u001b[36m274\u001b[0m - \u001b[32m\u001b[1mWrote gene count matrix for 'polya' RNA at '/Users/joshl/Projects/COMO/main/data/results/naiveB/polya-rna/polyarna_naiveB.csv'\u001b[0m\n", + "\u001b[32m2024-12-09 16:23:55\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mcomo.rnaseq_preprocess\u001b[0m:\u001b[36m274\u001b[0m - \u001b[32m\u001b[1mWrote gene count matrix for 'total' RNA at '/Users/joshl/Projects/COMO/main/data/results/naiveB/total-rna/totalrna_naiveB.csv'\u001b[0m\n", + "\u001b[32m2024-12-09 16:23:55\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcomo.rnaseq_preprocess\u001b[0m:\u001b[36m451\u001b[0m - \u001b[1mFetching gene info (this may take 1-5 minutes)\u001b[0m\n", + "\u001b[32m2024-12-09 16:24:13\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mcomo.rnaseq_preprocess\u001b[0m:\u001b[36m488\u001b[0m - \u001b[32m\u001b[1mGene Info file written at '/Users/joshl/Projects/COMO/main/data/results/naiveB/gene_info.csv'\u001b[0m\n" ] } ], "source": [ - "from como.rnaseq_preprocess import rnaseq_preprocess\n", - "from como.types import RNAPrepMethod\n", - "\n", - "\n", "for i in range(len(context_names)):\n", " await rnaseq_preprocess(\n", " context_name=context_names[i],\n", @@ -387,9 +380,67 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 4, + "metadata": { + "ExecuteTime": { + "end_time": "2024-12-09T22:13:42.060657Z", + "start_time": "2024-12-09T22:13:41.740347Z" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[32m2024-12-09 16:18:43.958\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mcomo.rnaseq_gen\u001b[0m:\u001b[36mrnaseq_gen\u001b[0m:\u001b[36m805\u001b[0m - \u001b[34m\u001b[1mStarting 'naiveB'\u001b[0m\n", + "\u001b[32m2024-12-09 16:18:43.961\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mcomo.rnaseq_gen\u001b[0m:\u001b[36m_read_counts\u001b[0m:\u001b[36m175\u001b[0m - \u001b[34m\u001b[1mReading CSV file at 'data/results/naiveB/total-rna/totalrna_naiveB.csv'\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " entrez_gene_id expressed high\n", + "0 7105 0 0\n", + "1 64102 0 0\n", + "2 8813 0 0\n", + "3 57147 0 0\n", + "4 55732 0 0\n", + "... ... ... ...\n", + "34396 124901321 0 0\n", + "34397 124902403 0 0\n", + "34398 101929614 0 0\n", + "34399 107984888 0 0\n", + "34400 124900697 0 0\n", + "\n", + "[34401 rows x 3 columns]\n" + ] + }, + { + "ename": "KeyError", + "evalue": "'ensembl_gene_id'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/Projects/COMO/.venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3805\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3804\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3805\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3806\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", + "File \u001b[0;32mindex.pyx:167\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mindex.pyx:196\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7081\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7089\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 'ensembl_gene_id'", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[4], line 11\u001b[0m\n\u001b[1;32m 8\u001b[0m cutoff \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m3\u001b[39m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, context \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(context_names):\n\u001b[0;32m---> 11\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m rnaseq_gen( \u001b[38;5;66;03m# noqa\u001b[39;00m\n\u001b[1;32m 12\u001b[0m context_name\u001b[38;5;241m=\u001b[39mcontext,\n\u001b[1;32m 13\u001b[0m input_rnaseq_filepath\u001b[38;5;241m=\u001b[39mtrna_matrix_filepath[i],\n\u001b[1;32m 14\u001b[0m input_gene_info_filepath\u001b[38;5;241m=\u001b[39mgene_info_filepath[i],\n\u001b[1;32m 15\u001b[0m output_rnaseq_filepath\u001b[38;5;241m=\u001b[39mtrna_matrix_filepath[i],\n\u001b[1;32m 16\u001b[0m prep\u001b[38;5;241m=\u001b[39mRNAPrepMethod\u001b[38;5;241m.\u001b[39mTOTAL,\n\u001b[1;32m 17\u001b[0m taxon\u001b[38;5;241m=\u001b[39mtaxon_id,\n\u001b[1;32m 18\u001b[0m input_metadata_filepath\u001b[38;5;241m=\u001b[39mPath(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./data/config_sheets/trna_config.xlsx\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 19\u001b[0m replicate_ratio\u001b[38;5;241m=\u001b[39mreplicate_ratio,\n\u001b[1;32m 20\u001b[0m high_replicate_ratio\u001b[38;5;241m=\u001b[39mhigh_confidence_replicate_ratio,\n\u001b[1;32m 21\u001b[0m batch_ratio\u001b[38;5;241m=\u001b[39mbatch_ratio,\n\u001b[1;32m 22\u001b[0m high_batch_ratio\u001b[38;5;241m=\u001b[39mhigh_confidence_batch_ratio,\n\u001b[1;32m 23\u001b[0m technique\u001b[38;5;241m=\u001b[39mtechnique,\n\u001b[1;32m 24\u001b[0m cutoff\u001b[38;5;241m=\u001b[39mcutoff\n\u001b[1;32m 25\u001b[0m )\n", + "File \u001b[0;32m~/Projects/COMO/main/como/rnaseq_gen.py:808\u001b[0m, in \u001b[0;36mrnaseq_gen\u001b[0;34m(context_name, input_rnaseq_filepath, input_gene_info_filepath, output_rnaseq_filepath, prep, taxon, input_metadata_filepath, input_metadata_df, replicate_ratio, high_replicate_ratio, batch_ratio, high_batch_ratio, technique, cutoff)\u001b[0m\n\u001b[1;32m 805\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mStarting \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcontext_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 806\u001b[0m output_rnaseq_filepath\u001b[38;5;241m.\u001b[39mparent\u001b[38;5;241m.\u001b[39mmkdir(parents\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m--> 808\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m _save_rnaseq_tests(\n\u001b[1;32m 809\u001b[0m context_name\u001b[38;5;241m=\u001b[39mcontext_name,\n\u001b[1;32m 810\u001b[0m rnaseq_matrix\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mawait\u001b[39;00m _read_counts(input_rnaseq_filepath),\n\u001b[1;32m 811\u001b[0m metadata_df\u001b[38;5;241m=\u001b[39minput_metadata_df \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m _create_metadata_df(input_metadata_filepath),\n\u001b[1;32m 812\u001b[0m gene_info_df\u001b[38;5;241m=\u001b[39mpd\u001b[38;5;241m.\u001b[39mread_csv(input_gene_info_filepath),\n\u001b[1;32m 813\u001b[0m output_filepath\u001b[38;5;241m=\u001b[39moutput_rnaseq_filepath,\n\u001b[1;32m 814\u001b[0m prep\u001b[38;5;241m=\u001b[39mprep,\n\u001b[1;32m 815\u001b[0m taxon\u001b[38;5;241m=\u001b[39mtaxon,\n\u001b[1;32m 816\u001b[0m replicate_ratio\u001b[38;5;241m=\u001b[39mreplicate_ratio,\n\u001b[1;32m 817\u001b[0m batch_ratio\u001b[38;5;241m=\u001b[39mbatch_ratio,\n\u001b[1;32m 818\u001b[0m high_replicate_ratio\u001b[38;5;241m=\u001b[39mhigh_replicate_ratio,\n\u001b[1;32m 819\u001b[0m high_batch_ratio\u001b[38;5;241m=\u001b[39mhigh_batch_ratio,\n\u001b[1;32m 820\u001b[0m technique\u001b[38;5;241m=\u001b[39mtechnique,\n\u001b[1;32m 821\u001b[0m cut_off\u001b[38;5;241m=\u001b[39mcutoff,\n\u001b[1;32m 822\u001b[0m )\n", + "File \u001b[0;32m~/Projects/COMO/main/como/rnaseq_gen.py:671\u001b[0m, in \u001b[0;36m_save_rnaseq_tests\u001b[0;34m(context_name, rnaseq_matrix, metadata_df, gene_info_df, output_filepath, prep, taxon, replicate_ratio, batch_ratio, high_replicate_ratio, high_batch_ratio, technique, cut_off)\u001b[0m\n\u001b[1;32m 662\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Save the results of the RNA-Seq tests to a CSV file.\"\"\"\u001b[39;00m\n\u001b[1;32m 663\u001b[0m filtering_options \u001b[38;5;241m=\u001b[39m _FilteringOptions(\n\u001b[1;32m 664\u001b[0m replicate_ratio\u001b[38;5;241m=\u001b[39mreplicate_ratio,\n\u001b[1;32m 665\u001b[0m batch_ratio\u001b[38;5;241m=\u001b[39mbatch_ratio,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 668\u001b[0m high_batch_ratio\u001b[38;5;241m=\u001b[39mhigh_batch_ratio,\n\u001b[1;32m 669\u001b[0m )\n\u001b[0;32m--> 671\u001b[0m read_counts_results: _ReadMatrixResults \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m _build_matrix_results(\n\u001b[1;32m 672\u001b[0m matrix\u001b[38;5;241m=\u001b[39mrnaseq_matrix,\n\u001b[1;32m 673\u001b[0m gene_info\u001b[38;5;241m=\u001b[39mgene_info_df,\n\u001b[1;32m 674\u001b[0m metadata_df\u001b[38;5;241m=\u001b[39mmetadata_df,\n\u001b[1;32m 675\u001b[0m taxon\u001b[38;5;241m=\u001b[39mtaxon,\n\u001b[1;32m 676\u001b[0m )\n\u001b[1;32m 677\u001b[0m metrics \u001b[38;5;241m=\u001b[39m read_counts_results\u001b[38;5;241m.\u001b[39mmetrics\n\u001b[1;32m 678\u001b[0m entrez_gene_ids \u001b[38;5;241m=\u001b[39m read_counts_results\u001b[38;5;241m.\u001b[39mentrez_gene_ids\n", + "File \u001b[0;32m~/Projects/COMO/main/como/rnaseq_gen.py:201\u001b[0m, in \u001b[0;36m_build_matrix_results\u001b[0;34m(matrix, gene_info, metadata_df, taxon)\u001b[0m\n\u001b[1;32m 199\u001b[0m gene_info \u001b[38;5;241m=\u001b[39m gene_info_migrations(gene_info)\n\u001b[1;32m 200\u001b[0m \u001b[38;5;28mprint\u001b[39m(matrix)\n\u001b[0;32m--> 201\u001b[0m conversion \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m ensembl_to_gene_id_and_symbol(ids\u001b[38;5;241m=\u001b[39m\u001b[43mmatrix\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mensembl_gene_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mtolist(), taxon\u001b[38;5;241m=\u001b[39mtaxon)\n\u001b[1;32m 202\u001b[0m matrix \u001b[38;5;241m=\u001b[39m matrix\u001b[38;5;241m.\u001b[39mmerge(conversion, on\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mensembl_gene_id\u001b[39m\u001b[38;5;124m\"\u001b[39m, how\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mleft\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 204\u001b[0m \u001b[38;5;66;03m# Only include Entrez and Ensembl Gene IDs that are present in `gene_info`\u001b[39;00m\n", + "File \u001b[0;32m~/Projects/COMO/.venv/lib/python3.10/site-packages/pandas/core/frame.py:4102\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 4100\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mnlevels \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 4101\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_multilevel(key)\n\u001b[0;32m-> 4102\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4103\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[1;32m 4104\u001b[0m indexer \u001b[38;5;241m=\u001b[39m [indexer]\n", + "File \u001b[0;32m~/Projects/COMO/.venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3812\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3807\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(casted_key, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m 3808\u001b[0m \u001b[38;5;28misinstance\u001b[39m(casted_key, abc\u001b[38;5;241m.\u001b[39mIterable)\n\u001b[1;32m 3809\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m casted_key)\n\u001b[1;32m 3810\u001b[0m ):\n\u001b[1;32m 3811\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[0;32m-> 3812\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 3813\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 3814\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3815\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3816\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[1;32m 3817\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n", + "\u001b[0;31mKeyError\u001b[0m: 'ensembl_gene_id'" + ] + } + ], "source": [ "from como.rnaseq_gen import rnaseq_gen, FilteringTechnique\n", "\n", @@ -401,7 +452,7 @@ "cutoff = -3\n", "\n", "for i, context in enumerate(context_names):\n", - " await rnaseq_gen(\n", + " await rnaseq_gen( # noqa\n", " context_name=context,\n", " input_rnaseq_filepath=trna_matrix_filepath[i],\n", " input_gene_info_filepath=gene_info_filepath[i],\n", From 967445906d260cff83429a6ef1c1fc43694ef929 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Tue, 10 Dec 2024 14:48:32 -0600 Subject: [PATCH 67/91] revert: use mrna instead of polya --- main/como/rnaseq_preprocess.py | 26 +++++++++++++------------- main/como/types.py | 2 +- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index bfa19bc7..d033f752 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -523,7 +523,7 @@ async def _process( if output_trna_config_filepath: rna_types.append(("total", output_trna_config_filepath, output_trna_matrix_filepath)) if output_mrna_config_filepath: - rna_types.append(("polya", output_mrna_config_filepath, output_mrna_matrix_filepath)) + rna_types.append(("mrna", output_mrna_config_filepath, output_mrna_matrix_filepath)) # if provided, iterate through como-input specific directories tasks = [] @@ -563,9 +563,9 @@ async def rnaseq_preprocess( input_matrix_filepath: Path | list[Path] | None = None, preparation_method: RNAPrepMethod | list[RNAPrepMethod] | None = None, output_trna_config_filepath: Path | None = None, - output_polya_config_filepath: Path | None = None, + output_mrna_config_filepath: Path | None = None, output_trna_count_matrix_filepath: Path | None = None, - output_polya_count_matrix_filepath: Path | None = None, + output_mrna_count_matrix_filepath: Path | None = None, cache: bool = True, log_level: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO", log_location: str | TextIOWrapper = sys.stderr, @@ -579,9 +579,9 @@ async def rnaseq_preprocess( :param taxon: The NCBI taxonomy ID :param output_gene_info_filepath: Path to the output gene information CSV file :param output_trna_config_filepath: Path to the output tRNA config file (if in "create" mode) - :param output_polya_config_filepath: Path to the output mRNA config file (if in "create" mode) + :param output_mrna_config_filepath: Path to the output mRNA config file (if in "create" mode) :param output_trna_count_matrix_filepath: The path to write total RNA count matrices - :param output_polya_count_matrix_filepath: The path to write messenger RNA count matrices + :param output_mrna_count_matrix_filepath: The path to write messenger RNA count matrices :param como_context_dir: If in "create" mode, the input path(s) to the COMO_input directory of the current context i.e., the directory containing "fragmentSizes", "geneCounts", "insertSizeMetrics", etc. directories :param input_matrix_filepath: If in "provide" mode, the path(s) to the count matrices to be processed @@ -604,18 +604,18 @@ async def rnaseq_preprocess( output_trna_config_filepath = ( output_trna_config_filepath.resolve() if output_trna_config_filepath else output_trna_config_filepath ) - output_polya_config_filepath = ( - output_polya_config_filepath.resolve() if output_polya_config_filepath else output_polya_config_filepath + output_mrna_config_filepath = ( + output_mrna_config_filepath.resolve() if output_mrna_config_filepath else output_mrna_config_filepath ) output_trna_count_matrix_filepath = ( output_trna_count_matrix_filepath.resolve() if output_trna_count_matrix_filepath else output_trna_count_matrix_filepath ) - output_polya_count_matrix_filepath = ( - output_polya_count_matrix_filepath.resolve() - if output_polya_count_matrix_filepath - else output_polya_count_matrix_filepath + output_mrna_count_matrix_filepath = ( + output_mrna_count_matrix_filepath.resolve() + if output_mrna_count_matrix_filepath + else output_mrna_count_matrix_filepath ) input_matrix_filepath = _listify(input_matrix_filepath) @@ -633,8 +633,8 @@ async def rnaseq_preprocess( input_matrix_filepath=input_matrix_filepath, output_gene_info_filepath=output_gene_info_filepath, output_trna_config_filepath=output_trna_config_filepath, - output_mrna_config_filepath=output_polya_config_filepath, + output_mrna_config_filepath=output_mrna_config_filepath, output_trna_matrix_filepath=output_trna_count_matrix_filepath, - output_mrna_matrix_filepath=output_polya_count_matrix_filepath, + output_mrna_matrix_filepath=output_mrna_count_matrix_filepath, cache=cache, ) diff --git a/main/como/types.py b/main/como/types.py index ebe44ecf..42908252 100644 --- a/main/como/types.py +++ b/main/como/types.py @@ -47,4 +47,4 @@ def from_string(value: str) -> RNAPrepMethod: type_path = str | Path -type_rna = Literal["total", "polya"] +type_rna = Literal["total", "mrna"] From cde66061b1f410b56f5ada7955870763c8247a7a Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Tue, 10 Dec 2024 14:50:40 -0600 Subject: [PATCH 68/91] refactor: check files returned Instead of checking if the stem of directories match, check that the same number of files are returned. Because the directories are sorted (and this comes from FastqToGeneCounts), they should always match --- main/como/rnaseq_preprocess.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index d033f752..f7e65040 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -138,16 +138,18 @@ def _organize_gene_counts_files(data_dir: Path) -> list[_StudyMetrics]: # For each study, collect gene count files, fragment files, insert size files, layouts, and strandedness information study_metrics: list[_StudyMetrics] = [] for gene_dir, strand_dir in zip(gene_counts_directories, strandedness_directories): - if gene_dir.stem != strand_dir.stem: - raise ValueError( - f"Gene directory name of '{gene_dir.stem}' does not match stranded directory name of '{strand_dir.stem}'" # noqa: E501 - ) + count_files = list(gene_dir.glob("*.tab")) + strand_files = list(strand_dir.glob("*.txt")) + if len(count_files) == 0: + raise ValueError(f"No count files found for study '{gene_dir.stem}'.") + if len(strand_files) == 0: + raise ValueError(f"No strandedness files found for study '{gene_dir.stem}'.") study_metrics.append( _StudyMetrics( study_name=gene_dir.stem, - count_files=list(gene_dir.glob("*.tab")), - strand_files=list(strand_dir.glob("*.txt")), + count_files=count_files, + strand_files=strand_files, ) ) return study_metrics @@ -262,6 +264,7 @@ async def _write_counts_matrix( counts: list[pd.DataFrame] = await asyncio.gather( *[_create_sample_counts_matrix(metric) for metric in study_metrics] ) + final_matrix = pd.DataFrame() for count in counts: final_matrix = count if final_matrix.empty else pd.merge(final_matrix, count, on="ensembl_gene_id", how="outer") From b14d3f9b8c26699346f710c73f3e1cec91327fa5 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Tue, 10 Dec 2024 14:51:10 -0600 Subject: [PATCH 69/91] feat: allow specifying specific directories --- main/como/rnaseq_preprocess.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index f7e65040..9da1a5e2 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -278,13 +278,28 @@ async def _write_counts_matrix( return final_matrix -async def _create_config_df(context_name: str, /, como_input_dir: Path) -> pd.DataFrame: # noqa: C901 +async def _create_config_df( + context_name: str, + /, + como_context_dir: Path, + gene_count_dirname: str = "geneCounts", + layout_dirname: str = "layouts", + strandedness_dirname: str = "strandedness", + fragment_sizes_dirname: str = "fragmentSizes", + prep_method_dirname: str = "prepMethods", +) -> pd.DataFrame: """Create configuration sheet. The configuration file created is based on the gene counts matrix. If using zFPKM normalization technique, mean fragment lengths will be fetched """ - gene_counts_files = list(Path(como_input_dir, context_name, "geneCounts").rglob("*.tab")) + gene_counts_dir = como_context_dir / gene_count_dirname + layout_dir = como_context_dir / layout_dirname + strandedness_dir = como_context_dir / strandedness_dirname + fragment_sizes_dir = como_context_dir / fragment_sizes_dirname + prep_method_dir = como_context_dir / prep_method_dirname + + gene_counts_files = list(gene_counts_dir.rglob("*.tab")) sample_names: list[str] = [] fragment_lengths: list[int | float] = [] layouts: list[str] = [] @@ -292,6 +307,9 @@ async def _create_config_df(context_name: str, /, como_input_dir: Path) -> pd.Da groups: list[str] = [] preparation_method: list[str] = [] + if len(gene_counts_files) == 0: + raise FileNotFoundError(f"No gene count files found in '{gene_counts_dir}'.") + for gene_count_filename in sorted(gene_counts_files): try: # Match S___R___r___ From d52b0ee1c766d6c15aa31ac98518590f03b3960b Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Tue, 10 Dec 2024 14:52:01 -0600 Subject: [PATCH 70/91] style: use more descriptive variable names --- main/como/rnaseq_preprocess.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index 9da1a5e2..98a283f2 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -311,27 +311,25 @@ async def _create_config_df( raise FileNotFoundError(f"No gene count files found in '{gene_counts_dir}'.") for gene_count_filename in sorted(gene_counts_files): - try: - # Match S___R___r___ - # \d{1,3} matches 1-3 digits - # (?:r\d{1,3})? matches an option "r" followed by three digits - label = re.findall(r"S\d{1,3}R\d{1,3}(?:r\d{1,3})?", gene_count_filename.as_posix())[0] - - except IndexError as e: - raise IndexError( + # Match S___R___r___ + # \d{1,3} matches 1-3 digits + # (?:r\d{1,3})? optionally matches a "r" followed by three digits + label = re.findall(r"S\d{1,3}R\d{1,3}(?:r\d{1,3})?", gene_count_filename.as_posix())[0] + if not label: + raise ValueError( f"\n\nFilename of '{gene_count_filename}' is not valid. " f"Should be 'contextName_SXRYrZ.tab', where X is the study/batch number, Y is the replicate number, " f"and Z is the run number." "\n\nIf not a multi-run sample, exclude 'rZ' from the filename." - ) from e + ) study_number = re.findall(r"S\d{1,3}", label)[0] rep_number = re.findall(r"R\d{1,3}", label)[0] - run = re.findall(r"r\d{1,3}", label) + run_number = re.findall(r"r\d{1,3}", label) multi_flag = 0 - if len(run) > 0: - if run[0] != "r1": + if len(run_number) > 0: + if run_number[0] != "r1": continue else: label_glob = study_number + rep_number + "r*" From fe1d406c4bd3571394b588a1ead61c64526a4555 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Tue, 10 Dec 2024 14:52:35 -0600 Subject: [PATCH 71/91] refactor: use early continue Removes an extra indentation --- main/como/rnaseq_preprocess.py | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index 98a283f2..0a0c7a35 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -331,23 +331,21 @@ async def _create_config_df( if len(run_number) > 0: if run_number[0] != "r1": continue - else: - label_glob = study_number + rep_number + "r*" - runs = [run for run in gene_counts_files if re.search(label_glob, run.as_posix())] - multi_flag = 1 - frag_files = [] - - for r in runs: - r_label = re.findall(r"r\d{1,3}", r.as_posix())[0] - R_label = re.findall(r"R\d{1,3}", r.as_posix())[0] # noqa: N806 - frag_filename = "".join([context_name, "_", study_number, R_label, r_label, "_fragment_size.txt"]) - frag_files.append(como_input_dir / context_name / "fragmentSizes" / study_number / frag_filename) - - context_path = como_input_dir / context_name - layout_files: list[Path] = list((context_path / "layouts").rglob(f"{context_name}_{label}_layout.txt")) - strand_files: list[Path] = list((context_path / "strandedness").rglob(f"{context_name}_{label}_strandedness.txt")) # fmt: skip # noqa: E501 - frag_files: list[Path] = list((context_path / "fragmentSizes").rglob(f"{context_name}_{label}_fragment_size.txt")) # fmt: skip # noqa: E501 - prep_files: list[Path] = list((context_path / "prepMethods").rglob(f"{context_name}_{label}_prep_method.txt")) + label_glob = f"{study_number}{rep_number}r*" # S__R__r* + runs = [run for run in gene_counts_files if re.search(label_glob, run.as_posix())] + multi_flag = 1 + frag_files = [] + + for run in runs: + run_number = re.findall(r"R\d{1,3}", run.as_posix())[0] + replicate = re.findall(r"r\d{1,3}", run.as_posix())[0] + frag_filename = "".join([context_name, "_", study_number, run_number, replicate, "_fragment_size.txt"]) + frag_files.append(como_context_dir / fragment_sizes_dirname / study_number / frag_filename) + + layout_files: list[Path] = list(layout_dir.rglob(f"{context_name}_{label}_layout.txt")) + strand_files: list[Path] = list(strandedness_dir.rglob(f"{context_name}_{label}_strandedness.txt")) + frag_files: list[Path] = list(fragment_sizes_dir.rglob(f"{context_name}_{label}_fragment_size.txt")) + prep_files: list[Path] = list(prep_method_dir.rglob(f"{context_name}_{label}_prep_method.txt")) layout = "UNKNOWN" if len(layout_files) == 0: From 8f1d02782550c70c23f447f61d7ed9b01906c7e4 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Tue, 10 Dec 2024 14:52:50 -0600 Subject: [PATCH 72/91] style: update warning messages --- main/como/rnaseq_preprocess.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index 0a0c7a35..5c2d8125 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -351,7 +351,7 @@ async def _create_config_df( if len(layout_files) == 0: logger.warning( f"No layout file found for {label}, writing as 'UNKNOWN', " - f"this should be defined by user if using zFPKM or rnaseq_gen.py will not run" + f"this should be defined if you are using zFPKM or downstream 'rnaseq_gen.py' will not run" ) elif len(layout_files) == 1: with layout_files[0].open("r") as file: @@ -380,7 +380,7 @@ async def _create_config_df( prep = "total" if len(prep_files) == 0: - logger.warning(f"No prep file found for {label}, assuming 'total' as in Total RNA library preparation") + logger.warning(f"No prep file found for {label}, assuming 'total', as in 'Total RNA' library preparation") elif len(prep_files) == 1: with prep_files[0].open("r") as file: prep = file.read().strip().lower() @@ -393,10 +393,10 @@ async def _create_config_df( ) mean_fragment_size = 100 - if len(frag_files) == 0: + if len(frag_files) == 0 and prep != RNAPrepMethod.TOTAL.value: logger.warning( f"No fragment file found for {label}, using '100'. " - f"This must be defined by the user in order to use zFPKM normalization" + "You should define this if you are going to use downstream zFPKM normalization" ) elif len(frag_files) == 1: if layout == "single-end": @@ -512,7 +512,7 @@ async def _create_matrix_file( output_counts_matrix_filepath: Path, rna: type_rna, ) -> None: - config_df = await _create_config_df(context_name, como_input_dir=como_context_dir) + config_df = await _create_config_df(context_name, como_context_dir=como_context_dir) await _write_counts_matrix( config_df=config_df, como_context_dir=como_context_dir, From 0ef736e34b5479fa87d17c1db48002a32a38910d Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Tue, 10 Dec 2024 14:53:20 -0600 Subject: [PATCH 73/91] style: rename variables --- main/COMO.ipynb | 50 ++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/main/COMO.ipynb b/main/COMO.ipynb index 758d719b..52d7fb88 100644 --- a/main/COMO.ipynb +++ b/main/COMO.ipynb @@ -233,7 +233,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2024-12-09T22:10:43.421233Z", @@ -285,10 +285,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m2024-12-09 16:23:55\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mcomo.rnaseq_preprocess\u001b[0m:\u001b[36m274\u001b[0m - \u001b[32m\u001b[1mWrote gene count matrix for 'polya' RNA at '/Users/joshl/Projects/COMO/main/data/results/naiveB/polya-rna/polyarna_naiveB.csv'\u001b[0m\n", - "\u001b[32m2024-12-09 16:23:55\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mcomo.rnaseq_preprocess\u001b[0m:\u001b[36m274\u001b[0m - \u001b[32m\u001b[1mWrote gene count matrix for 'total' RNA at '/Users/joshl/Projects/COMO/main/data/results/naiveB/total-rna/totalrna_naiveB.csv'\u001b[0m\n", - "\u001b[32m2024-12-09 16:23:55\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mcomo.rnaseq_preprocess\u001b[0m:\u001b[36m451\u001b[0m - \u001b[1mFetching gene info (this may take 1-5 minutes)\u001b[0m\n", - "\u001b[32m2024-12-09 16:24:13\u001b[0m | \u001b[32m\u001b[1mSUCCESS \u001b[0m | \u001b[36mcomo.rnaseq_preprocess\u001b[0m:\u001b[36m488\u001b[0m - \u001b[32m\u001b[1mGene Info file written at '/Users/joshl/Projects/COMO/main/data/results/naiveB/gene_info.csv'\u001b[0m\n" + "\u001B[32m2024-12-09 16:23:55\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m274\u001B[0m - \u001B[32m\u001B[1mWrote gene count matrix for 'polya' RNA at '/Users/joshl/Projects/COMO/main/data/results/naiveB/polya-rna/polyarna_naiveB.csv'\u001B[0m\n", + "\u001B[32m2024-12-09 16:23:55\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m274\u001B[0m - \u001B[32m\u001B[1mWrote gene count matrix for 'total' RNA at '/Users/joshl/Projects/COMO/main/data/results/naiveB/total-rna/totalrna_naiveB.csv'\u001B[0m\n", + "\u001B[32m2024-12-09 16:23:55\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m451\u001B[0m - \u001B[1mFetching gene info (this may take 1-5 minutes)\u001B[0m\n", + "\u001B[32m2024-12-09 16:24:13\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m488\u001B[0m - \u001B[32m\u001B[1mGene Info file written at '/Users/joshl/Projects/COMO/main/data/results/naiveB/gene_info.csv'\u001B[0m\n" ] } ], @@ -301,8 +301,8 @@ " como_context_dir=como_context_dir[i],\n", " output_trna_config_filepath=Path(\"./data/config_sheets/trna_config.xlsx\"),\n", " output_trna_count_matrix_filepath=trna_matrix_filepath[i],\n", - " output_polya_config_filepath=Path(\"./data/config_sheets/polya_config.xlsx\"),\n", - " output_polya_count_matrix_filepath=polya_matrix_filepath[i],\n", + " output_mrna_config_filepath=Path(\"./data/config_sheets/polya_config.xlsx\"),\n", + " output_mrna_count_matrix_filepath=polya_matrix_filepath[i],\n", " cache=True,\n", " log_level=\"INFO\",\n", " )" @@ -392,8 +392,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m2024-12-09 16:18:43.958\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mcomo.rnaseq_gen\u001b[0m:\u001b[36mrnaseq_gen\u001b[0m:\u001b[36m805\u001b[0m - \u001b[34m\u001b[1mStarting 'naiveB'\u001b[0m\n", - "\u001b[32m2024-12-09 16:18:43.961\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mcomo.rnaseq_gen\u001b[0m:\u001b[36m_read_counts\u001b[0m:\u001b[36m175\u001b[0m - \u001b[34m\u001b[1mReading CSV file at 'data/results/naiveB/total-rna/totalrna_naiveB.csv'\u001b[0m\n" + "\u001B[32m2024-12-09 16:18:43.958\u001B[0m | \u001B[34m\u001B[1mDEBUG \u001B[0m | \u001B[36mcomo.rnaseq_gen\u001B[0m:\u001B[36mrnaseq_gen\u001B[0m:\u001B[36m805\u001B[0m - \u001B[34m\u001B[1mStarting 'naiveB'\u001B[0m\n", + "\u001B[32m2024-12-09 16:18:43.961\u001B[0m | \u001B[34m\u001B[1mDEBUG \u001B[0m | \u001B[36mcomo.rnaseq_gen\u001B[0m:\u001B[36m_read_counts\u001B[0m:\u001B[36m175\u001B[0m - \u001B[34m\u001B[1mReading CSV file at 'data/results/naiveB/total-rna/totalrna_naiveB.csv'\u001B[0m\n" ] }, { @@ -421,23 +421,23 @@ "evalue": "'ensembl_gene_id'", "output_type": "error", "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/Projects/COMO/.venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3805\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3804\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 3805\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcasted_key\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3806\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", - "File \u001b[0;32mindex.pyx:167\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32mindex.pyx:196\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7081\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7089\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", - "\u001b[0;31mKeyError\u001b[0m: 'ensembl_gene_id'", + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mKeyError\u001B[0m Traceback (most recent call last)", + "File \u001B[0;32m~/Projects/COMO/.venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3805\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 3804\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m-> 3805\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_engine\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mcasted_key\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 3806\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m err:\n", + "File \u001B[0;32mindex.pyx:167\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[0;34m()\u001B[0m\n", + "File \u001B[0;32mindex.pyx:196\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[0;34m()\u001B[0m\n", + "File \u001B[0;32mpandas/_libs/hashtable_class_helper.pxi:7081\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[0;34m()\u001B[0m\n", + "File \u001B[0;32mpandas/_libs/hashtable_class_helper.pxi:7089\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[0;34m()\u001B[0m\n", + "\u001B[0;31mKeyError\u001B[0m: 'ensembl_gene_id'", "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[4], line 11\u001b[0m\n\u001b[1;32m 8\u001b[0m cutoff \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m3\u001b[39m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, context \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(context_names):\n\u001b[0;32m---> 11\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m rnaseq_gen( \u001b[38;5;66;03m# noqa\u001b[39;00m\n\u001b[1;32m 12\u001b[0m context_name\u001b[38;5;241m=\u001b[39mcontext,\n\u001b[1;32m 13\u001b[0m input_rnaseq_filepath\u001b[38;5;241m=\u001b[39mtrna_matrix_filepath[i],\n\u001b[1;32m 14\u001b[0m input_gene_info_filepath\u001b[38;5;241m=\u001b[39mgene_info_filepath[i],\n\u001b[1;32m 15\u001b[0m output_rnaseq_filepath\u001b[38;5;241m=\u001b[39mtrna_matrix_filepath[i],\n\u001b[1;32m 16\u001b[0m prep\u001b[38;5;241m=\u001b[39mRNAPrepMethod\u001b[38;5;241m.\u001b[39mTOTAL,\n\u001b[1;32m 17\u001b[0m taxon\u001b[38;5;241m=\u001b[39mtaxon_id,\n\u001b[1;32m 18\u001b[0m input_metadata_filepath\u001b[38;5;241m=\u001b[39mPath(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./data/config_sheets/trna_config.xlsx\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 19\u001b[0m replicate_ratio\u001b[38;5;241m=\u001b[39mreplicate_ratio,\n\u001b[1;32m 20\u001b[0m high_replicate_ratio\u001b[38;5;241m=\u001b[39mhigh_confidence_replicate_ratio,\n\u001b[1;32m 21\u001b[0m batch_ratio\u001b[38;5;241m=\u001b[39mbatch_ratio,\n\u001b[1;32m 22\u001b[0m high_batch_ratio\u001b[38;5;241m=\u001b[39mhigh_confidence_batch_ratio,\n\u001b[1;32m 23\u001b[0m technique\u001b[38;5;241m=\u001b[39mtechnique,\n\u001b[1;32m 24\u001b[0m cutoff\u001b[38;5;241m=\u001b[39mcutoff\n\u001b[1;32m 25\u001b[0m )\n", - "File \u001b[0;32m~/Projects/COMO/main/como/rnaseq_gen.py:808\u001b[0m, in \u001b[0;36mrnaseq_gen\u001b[0;34m(context_name, input_rnaseq_filepath, input_gene_info_filepath, output_rnaseq_filepath, prep, taxon, input_metadata_filepath, input_metadata_df, replicate_ratio, high_replicate_ratio, batch_ratio, high_batch_ratio, technique, cutoff)\u001b[0m\n\u001b[1;32m 805\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mStarting \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcontext_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 806\u001b[0m output_rnaseq_filepath\u001b[38;5;241m.\u001b[39mparent\u001b[38;5;241m.\u001b[39mmkdir(parents\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, exist_ok\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m--> 808\u001b[0m \u001b[38;5;28;01mawait\u001b[39;00m _save_rnaseq_tests(\n\u001b[1;32m 809\u001b[0m context_name\u001b[38;5;241m=\u001b[39mcontext_name,\n\u001b[1;32m 810\u001b[0m rnaseq_matrix\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mawait\u001b[39;00m _read_counts(input_rnaseq_filepath),\n\u001b[1;32m 811\u001b[0m metadata_df\u001b[38;5;241m=\u001b[39minput_metadata_df \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m _create_metadata_df(input_metadata_filepath),\n\u001b[1;32m 812\u001b[0m gene_info_df\u001b[38;5;241m=\u001b[39mpd\u001b[38;5;241m.\u001b[39mread_csv(input_gene_info_filepath),\n\u001b[1;32m 813\u001b[0m output_filepath\u001b[38;5;241m=\u001b[39moutput_rnaseq_filepath,\n\u001b[1;32m 814\u001b[0m prep\u001b[38;5;241m=\u001b[39mprep,\n\u001b[1;32m 815\u001b[0m taxon\u001b[38;5;241m=\u001b[39mtaxon,\n\u001b[1;32m 816\u001b[0m replicate_ratio\u001b[38;5;241m=\u001b[39mreplicate_ratio,\n\u001b[1;32m 817\u001b[0m batch_ratio\u001b[38;5;241m=\u001b[39mbatch_ratio,\n\u001b[1;32m 818\u001b[0m high_replicate_ratio\u001b[38;5;241m=\u001b[39mhigh_replicate_ratio,\n\u001b[1;32m 819\u001b[0m high_batch_ratio\u001b[38;5;241m=\u001b[39mhigh_batch_ratio,\n\u001b[1;32m 820\u001b[0m technique\u001b[38;5;241m=\u001b[39mtechnique,\n\u001b[1;32m 821\u001b[0m cut_off\u001b[38;5;241m=\u001b[39mcutoff,\n\u001b[1;32m 822\u001b[0m )\n", - "File \u001b[0;32m~/Projects/COMO/main/como/rnaseq_gen.py:671\u001b[0m, in \u001b[0;36m_save_rnaseq_tests\u001b[0;34m(context_name, rnaseq_matrix, metadata_df, gene_info_df, output_filepath, prep, taxon, replicate_ratio, batch_ratio, high_replicate_ratio, high_batch_ratio, technique, cut_off)\u001b[0m\n\u001b[1;32m 662\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Save the results of the RNA-Seq tests to a CSV file.\"\"\"\u001b[39;00m\n\u001b[1;32m 663\u001b[0m filtering_options \u001b[38;5;241m=\u001b[39m _FilteringOptions(\n\u001b[1;32m 664\u001b[0m replicate_ratio\u001b[38;5;241m=\u001b[39mreplicate_ratio,\n\u001b[1;32m 665\u001b[0m batch_ratio\u001b[38;5;241m=\u001b[39mbatch_ratio,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 668\u001b[0m high_batch_ratio\u001b[38;5;241m=\u001b[39mhigh_batch_ratio,\n\u001b[1;32m 669\u001b[0m )\n\u001b[0;32m--> 671\u001b[0m read_counts_results: _ReadMatrixResults \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m _build_matrix_results(\n\u001b[1;32m 672\u001b[0m matrix\u001b[38;5;241m=\u001b[39mrnaseq_matrix,\n\u001b[1;32m 673\u001b[0m gene_info\u001b[38;5;241m=\u001b[39mgene_info_df,\n\u001b[1;32m 674\u001b[0m metadata_df\u001b[38;5;241m=\u001b[39mmetadata_df,\n\u001b[1;32m 675\u001b[0m taxon\u001b[38;5;241m=\u001b[39mtaxon,\n\u001b[1;32m 676\u001b[0m )\n\u001b[1;32m 677\u001b[0m metrics \u001b[38;5;241m=\u001b[39m read_counts_results\u001b[38;5;241m.\u001b[39mmetrics\n\u001b[1;32m 678\u001b[0m entrez_gene_ids \u001b[38;5;241m=\u001b[39m read_counts_results\u001b[38;5;241m.\u001b[39mentrez_gene_ids\n", - "File \u001b[0;32m~/Projects/COMO/main/como/rnaseq_gen.py:201\u001b[0m, in \u001b[0;36m_build_matrix_results\u001b[0;34m(matrix, gene_info, metadata_df, taxon)\u001b[0m\n\u001b[1;32m 199\u001b[0m gene_info \u001b[38;5;241m=\u001b[39m gene_info_migrations(gene_info)\n\u001b[1;32m 200\u001b[0m \u001b[38;5;28mprint\u001b[39m(matrix)\n\u001b[0;32m--> 201\u001b[0m conversion \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m ensembl_to_gene_id_and_symbol(ids\u001b[38;5;241m=\u001b[39m\u001b[43mmatrix\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mensembl_gene_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mtolist(), taxon\u001b[38;5;241m=\u001b[39mtaxon)\n\u001b[1;32m 202\u001b[0m matrix \u001b[38;5;241m=\u001b[39m matrix\u001b[38;5;241m.\u001b[39mmerge(conversion, on\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mensembl_gene_id\u001b[39m\u001b[38;5;124m\"\u001b[39m, how\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mleft\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 204\u001b[0m \u001b[38;5;66;03m# Only include Entrez and Ensembl Gene IDs that are present in `gene_info`\u001b[39;00m\n", - "File \u001b[0;32m~/Projects/COMO/.venv/lib/python3.10/site-packages/pandas/core/frame.py:4102\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 4100\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mnlevels \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 4101\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_getitem_multilevel(key)\n\u001b[0;32m-> 4102\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_loc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4103\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_integer(indexer):\n\u001b[1;32m 4104\u001b[0m indexer \u001b[38;5;241m=\u001b[39m [indexer]\n", - "File \u001b[0;32m~/Projects/COMO/.venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3812\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3807\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(casted_key, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m 3808\u001b[0m \u001b[38;5;28misinstance\u001b[39m(casted_key, abc\u001b[38;5;241m.\u001b[39mIterable)\n\u001b[1;32m 3809\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(x, \u001b[38;5;28mslice\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m x \u001b[38;5;129;01min\u001b[39;00m casted_key)\n\u001b[1;32m 3810\u001b[0m ):\n\u001b[1;32m 3811\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m InvalidIndexError(key)\n\u001b[0;32m-> 3812\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(key) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m 3813\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[1;32m 3814\u001b[0m \u001b[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3815\u001b[0m \u001b[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3816\u001b[0m \u001b[38;5;66;03m# the TypeError.\u001b[39;00m\n\u001b[1;32m 3817\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_indexing_error(key)\n", - "\u001b[0;31mKeyError\u001b[0m: 'ensembl_gene_id'" + "\u001B[0;31mKeyError\u001B[0m Traceback (most recent call last)", + "Cell \u001B[0;32mIn[4], line 11\u001B[0m\n\u001B[1;32m 8\u001B[0m cutoff \u001B[38;5;241m=\u001B[39m \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m3\u001B[39m\n\u001B[1;32m 10\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m i, context \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28menumerate\u001B[39m(context_names):\n\u001B[0;32m---> 11\u001B[0m \u001B[38;5;28;01mawait\u001B[39;00m rnaseq_gen( \u001B[38;5;66;03m# noqa\u001B[39;00m\n\u001B[1;32m 12\u001B[0m context_name\u001B[38;5;241m=\u001B[39mcontext,\n\u001B[1;32m 13\u001B[0m input_rnaseq_filepath\u001B[38;5;241m=\u001B[39mtrna_matrix_filepath[i],\n\u001B[1;32m 14\u001B[0m input_gene_info_filepath\u001B[38;5;241m=\u001B[39mgene_info_filepath[i],\n\u001B[1;32m 15\u001B[0m output_rnaseq_filepath\u001B[38;5;241m=\u001B[39mtrna_matrix_filepath[i],\n\u001B[1;32m 16\u001B[0m prep\u001B[38;5;241m=\u001B[39mRNAPrepMethod\u001B[38;5;241m.\u001B[39mTOTAL,\n\u001B[1;32m 17\u001B[0m taxon\u001B[38;5;241m=\u001B[39mtaxon_id,\n\u001B[1;32m 18\u001B[0m input_metadata_filepath\u001B[38;5;241m=\u001B[39mPath(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m./data/config_sheets/trna_config.xlsx\u001B[39m\u001B[38;5;124m\"\u001B[39m),\n\u001B[1;32m 19\u001B[0m replicate_ratio\u001B[38;5;241m=\u001B[39mreplicate_ratio,\n\u001B[1;32m 20\u001B[0m high_replicate_ratio\u001B[38;5;241m=\u001B[39mhigh_confidence_replicate_ratio,\n\u001B[1;32m 21\u001B[0m batch_ratio\u001B[38;5;241m=\u001B[39mbatch_ratio,\n\u001B[1;32m 22\u001B[0m high_batch_ratio\u001B[38;5;241m=\u001B[39mhigh_confidence_batch_ratio,\n\u001B[1;32m 23\u001B[0m technique\u001B[38;5;241m=\u001B[39mtechnique,\n\u001B[1;32m 24\u001B[0m cutoff\u001B[38;5;241m=\u001B[39mcutoff\n\u001B[1;32m 25\u001B[0m )\n", + "File \u001B[0;32m~/Projects/COMO/main/como/rnaseq_gen.py:808\u001B[0m, in \u001B[0;36mrnaseq_gen\u001B[0;34m(context_name, input_rnaseq_filepath, input_gene_info_filepath, output_rnaseq_filepath, prep, taxon, input_metadata_filepath, input_metadata_df, replicate_ratio, high_replicate_ratio, batch_ratio, high_batch_ratio, technique, cutoff)\u001B[0m\n\u001B[1;32m 805\u001B[0m logger\u001B[38;5;241m.\u001B[39mdebug(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mStarting \u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;132;01m{\u001B[39;00mcontext_name\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m 806\u001B[0m output_rnaseq_filepath\u001B[38;5;241m.\u001B[39mparent\u001B[38;5;241m.\u001B[39mmkdir(parents\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m, exist_ok\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m)\n\u001B[0;32m--> 808\u001B[0m \u001B[38;5;28;01mawait\u001B[39;00m _save_rnaseq_tests(\n\u001B[1;32m 809\u001B[0m context_name\u001B[38;5;241m=\u001B[39mcontext_name,\n\u001B[1;32m 810\u001B[0m rnaseq_matrix\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mawait\u001B[39;00m _read_counts(input_rnaseq_filepath),\n\u001B[1;32m 811\u001B[0m metadata_df\u001B[38;5;241m=\u001B[39minput_metadata_df \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28;01mawait\u001B[39;00m _create_metadata_df(input_metadata_filepath),\n\u001B[1;32m 812\u001B[0m gene_info_df\u001B[38;5;241m=\u001B[39mpd\u001B[38;5;241m.\u001B[39mread_csv(input_gene_info_filepath),\n\u001B[1;32m 813\u001B[0m output_filepath\u001B[38;5;241m=\u001B[39moutput_rnaseq_filepath,\n\u001B[1;32m 814\u001B[0m prep\u001B[38;5;241m=\u001B[39mprep,\n\u001B[1;32m 815\u001B[0m taxon\u001B[38;5;241m=\u001B[39mtaxon,\n\u001B[1;32m 816\u001B[0m replicate_ratio\u001B[38;5;241m=\u001B[39mreplicate_ratio,\n\u001B[1;32m 817\u001B[0m batch_ratio\u001B[38;5;241m=\u001B[39mbatch_ratio,\n\u001B[1;32m 818\u001B[0m high_replicate_ratio\u001B[38;5;241m=\u001B[39mhigh_replicate_ratio,\n\u001B[1;32m 819\u001B[0m high_batch_ratio\u001B[38;5;241m=\u001B[39mhigh_batch_ratio,\n\u001B[1;32m 820\u001B[0m technique\u001B[38;5;241m=\u001B[39mtechnique,\n\u001B[1;32m 821\u001B[0m cut_off\u001B[38;5;241m=\u001B[39mcutoff,\n\u001B[1;32m 822\u001B[0m )\n", + "File \u001B[0;32m~/Projects/COMO/main/como/rnaseq_gen.py:671\u001B[0m, in \u001B[0;36m_save_rnaseq_tests\u001B[0;34m(context_name, rnaseq_matrix, metadata_df, gene_info_df, output_filepath, prep, taxon, replicate_ratio, batch_ratio, high_replicate_ratio, high_batch_ratio, technique, cut_off)\u001B[0m\n\u001B[1;32m 662\u001B[0m \u001B[38;5;250m\u001B[39m\u001B[38;5;124;03m\"\"\"Save the results of the RNA-Seq tests to a CSV file.\"\"\"\u001B[39;00m\n\u001B[1;32m 663\u001B[0m filtering_options \u001B[38;5;241m=\u001B[39m _FilteringOptions(\n\u001B[1;32m 664\u001B[0m replicate_ratio\u001B[38;5;241m=\u001B[39mreplicate_ratio,\n\u001B[1;32m 665\u001B[0m batch_ratio\u001B[38;5;241m=\u001B[39mbatch_ratio,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 668\u001B[0m high_batch_ratio\u001B[38;5;241m=\u001B[39mhigh_batch_ratio,\n\u001B[1;32m 669\u001B[0m )\n\u001B[0;32m--> 671\u001B[0m read_counts_results: _ReadMatrixResults \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m _build_matrix_results(\n\u001B[1;32m 672\u001B[0m matrix\u001B[38;5;241m=\u001B[39mrnaseq_matrix,\n\u001B[1;32m 673\u001B[0m gene_info\u001B[38;5;241m=\u001B[39mgene_info_df,\n\u001B[1;32m 674\u001B[0m metadata_df\u001B[38;5;241m=\u001B[39mmetadata_df,\n\u001B[1;32m 675\u001B[0m taxon\u001B[38;5;241m=\u001B[39mtaxon,\n\u001B[1;32m 676\u001B[0m )\n\u001B[1;32m 677\u001B[0m metrics \u001B[38;5;241m=\u001B[39m read_counts_results\u001B[38;5;241m.\u001B[39mmetrics\n\u001B[1;32m 678\u001B[0m entrez_gene_ids \u001B[38;5;241m=\u001B[39m read_counts_results\u001B[38;5;241m.\u001B[39mentrez_gene_ids\n", + "File \u001B[0;32m~/Projects/COMO/main/como/rnaseq_gen.py:201\u001B[0m, in \u001B[0;36m_build_matrix_results\u001B[0;34m(matrix, gene_info, metadata_df, taxon)\u001B[0m\n\u001B[1;32m 199\u001B[0m gene_info \u001B[38;5;241m=\u001B[39m gene_info_migrations(gene_info)\n\u001B[1;32m 200\u001B[0m \u001B[38;5;28mprint\u001B[39m(matrix)\n\u001B[0;32m--> 201\u001B[0m conversion \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m ensembl_to_gene_id_and_symbol(ids\u001B[38;5;241m=\u001B[39m\u001B[43mmatrix\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mensembl_gene_id\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m]\u001B[49m\u001B[38;5;241m.\u001B[39mtolist(), taxon\u001B[38;5;241m=\u001B[39mtaxon)\n\u001B[1;32m 202\u001B[0m matrix \u001B[38;5;241m=\u001B[39m matrix\u001B[38;5;241m.\u001B[39mmerge(conversion, on\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mensembl_gene_id\u001B[39m\u001B[38;5;124m\"\u001B[39m, how\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mleft\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m 204\u001B[0m \u001B[38;5;66;03m# Only include Entrez and Ensembl Gene IDs that are present in `gene_info`\u001B[39;00m\n", + "File \u001B[0;32m~/Projects/COMO/.venv/lib/python3.10/site-packages/pandas/core/frame.py:4102\u001B[0m, in \u001B[0;36mDataFrame.__getitem__\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 4100\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcolumns\u001B[38;5;241m.\u001B[39mnlevels \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m1\u001B[39m:\n\u001B[1;32m 4101\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_getitem_multilevel(key)\n\u001B[0;32m-> 4102\u001B[0m indexer \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcolumns\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mkey\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 4103\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m is_integer(indexer):\n\u001B[1;32m 4104\u001B[0m indexer \u001B[38;5;241m=\u001B[39m [indexer]\n", + "File \u001B[0;32m~/Projects/COMO/.venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3812\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 3807\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(casted_key, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;129;01mor\u001B[39;00m (\n\u001B[1;32m 3808\u001B[0m \u001B[38;5;28misinstance\u001B[39m(casted_key, abc\u001B[38;5;241m.\u001B[39mIterable)\n\u001B[1;32m 3809\u001B[0m \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;28many\u001B[39m(\u001B[38;5;28misinstance\u001B[39m(x, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;28;01mfor\u001B[39;00m x \u001B[38;5;129;01min\u001B[39;00m casted_key)\n\u001B[1;32m 3810\u001B[0m ):\n\u001B[1;32m 3811\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m InvalidIndexError(key)\n\u001B[0;32m-> 3812\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m(key) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01merr\u001B[39;00m\n\u001B[1;32m 3813\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m:\n\u001B[1;32m 3814\u001B[0m \u001B[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001B[39;00m\n\u001B[1;32m 3815\u001B[0m \u001B[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001B[39;00m\n\u001B[1;32m 3816\u001B[0m \u001B[38;5;66;03m# the TypeError.\u001B[39;00m\n\u001B[1;32m 3817\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_check_indexing_error(key)\n", + "\u001B[0;31mKeyError\u001B[0m: 'ensembl_gene_id'" ] } ], From 3825aa259e4d3c71a648700c31dfa32962b5f45e Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Tue, 10 Dec 2024 15:48:06 -0600 Subject: [PATCH 74/91] refactor: move filtering technique to types --- main/como/rnaseq_gen.py | 27 +-------------------------- main/como/types.py | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 05f185f3..5d2e364e 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -27,7 +27,7 @@ from como.migrations import gene_info_migrations from como.project import Config -from como.types import RNAPrepMethod +from como.types import FilteringTechnique, RNAPrepMethod class _FilteringOptions(NamedTuple): @@ -38,31 +38,6 @@ class _FilteringOptions(NamedTuple): high_batch_ratio: float -class FilteringTechnique(Enum): - """RNA sequencing filtering capabilities.""" - - cpm = "cpm" - zfpkm = "zfpkm" - tpm = "quantile" - umi = "umi" - - @staticmethod - def from_string(value: str) -> FilteringTechnique: - """Create a filtering technique object from a string.""" - match value.lower(): - case "cpm": - return FilteringTechnique.cpm - case "zfpkm": - return FilteringTechnique.zfpkm - case "quantile": - return FilteringTechnique.tpm - case "umi": - return FilteringTechnique.umi - case _: - possible_values = [t.value for t in FilteringTechnique] - raise ValueError(f"Got a filtering technique of '{value}'; should be one of: {possible_values}") - - class LayoutMethod(Enum): """RNA sequencing layout method.""" diff --git a/main/como/types.py b/main/como/types.py index 42908252..916d3a25 100644 --- a/main/como/types.py +++ b/main/como/types.py @@ -46,5 +46,30 @@ def from_string(value: str) -> RNAPrepMethod: raise ValueError(f"Filtering technique must be one of {possible_values}; got: {value}") +class FilteringTechnique(Enum): + """RNA sequencing filtering capabilities.""" + + cpm = "cpm" + zfpkm = "zfpkm" + tpm = "quantile" + umi = "umi" + + @staticmethod + def from_string(value: str) -> FilteringTechnique: + """Create a filtering technique object from a string.""" + match value.lower(): + case "cpm": + return FilteringTechnique.cpm + case "zfpkm": + return FilteringTechnique.zfpkm + case "quantile": + return FilteringTechnique.tpm + case "umi": + return FilteringTechnique.umi + case _: + possible_values = [t.value for t in FilteringTechnique] + raise ValueError(f"Got a filtering technique of '{value}'; should be one of: {possible_values}") + + type_path = str | Path type_rna = Literal["total", "mrna"] From b0fa60eb3d80f1ebe994dad4c26043099aefaa59 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Tue, 10 Dec 2024 15:48:17 -0600 Subject: [PATCH 75/91] fix: do not use more cores than necessary --- main/como/rnaseq_gen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 5d2e364e..8274a0d3 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -356,7 +356,7 @@ def zfpkm_transform( total = len(fpkm_df.columns) update_per_step: int = int(np.ceil(total * update_every_percent)) - cores = multiprocessing.cpu_count() - 2 + cores = min(multiprocessing.cpu_count() - 2, total) logger.debug(f"Processing {total:,} samples through zFPKM transform using {cores} cores") logger.debug( f"Will update every {update_per_step:,} steps as this is approximately " From 6bd06e1875e8c37747bb8adf4eddfbb45608c240 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Wed, 11 Dec 2024 14:37:58 -0600 Subject: [PATCH 76/91] fix: use parenthesis to validate calculations Signed-off-by: Josh Loecker --- main/como/rnaseq_gen.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/main/como/rnaseq_gen.py b/main/como/rnaseq_gen.py index 8274a0d3..364bd4c3 100644 --- a/main/como/rnaseq_gen.py +++ b/main/como/rnaseq_gen.py @@ -505,7 +505,7 @@ def cpm_filter( cutoff = ( 10e6 / (np.median(np.sum(counts[:, i]))) if cut_off == "default" - else 1e6 * cut_off / np.median(np.sum(counts[:, i])) + else (1e6 * cut_off) / np.median(np.sum(counts[:, i])) ) test_bools = test_bools.merge(counts_per_million[counts_per_million.iloc[:, i] > cutoff]) @@ -637,7 +637,7 @@ async def _save_rnaseq_tests( filtering_options = _FilteringOptions( replicate_ratio=replicate_ratio, batch_ratio=batch_ratio, - cut_off=cut_off, + cut_off=float(cut_off), high_replicate_ratio=high_replicate_ratio, high_batch_ratio=high_batch_ratio, ) @@ -725,9 +725,14 @@ async def rnaseq_gen( # noqa: C901, allow complex function then study/batch numbers are checked for consensus according to batch ratios. The zFPKM method is outlined here: https://pubmed.ncbi.nlm.nih.gov/24215113/ - :param metadata_filepath: The configuration filename to read + :param context_name: The name of the context being processed + :param input_rnaseq_filepath: The filepath to the gene count matrix + :param input_gene_info_filepath: The filepath to the gene info file + :param output_rnaseq_filepath: The filepath to write the output gene count matrix :param prep: The preparation method :param taxon: The NCBI Taxon ID + :param input_metadata_filepath: The filepath to the metadata file + :param input_metadata_df: The metadata dataframe :param replicate_ratio: The percentage of replicates that a gene must appear in for a gene to be marked as "active" in a batch/study :param batch_ratio: The percentage of batches that a gene must appear in for a gene to be marked as 'active" @@ -759,7 +764,7 @@ async def rnaseq_gen( # noqa: C901, allow complex function cutoff = "default" case FilteringTechnique.zfpkm: - cutoff = "default" if cutoff else cutoff + cutoff = cutoff or -3 case FilteringTechnique.umi: pass case _: From f24347d14e00f6817114ef049ab6b5a09cd659b4 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Wed, 11 Dec 2024 14:41:54 -0600 Subject: [PATCH 77/91] refactor: ignore missing variables for now Signed-off-by: Josh Loecker --- main/COMO.ipynb | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/main/COMO.ipynb b/main/COMO.ipynb index 52d7fb88..bbaeb152 100644 --- a/main/COMO.ipynb +++ b/main/COMO.ipynb @@ -442,7 +442,7 @@ } ], "source": [ - "from como.rnaseq_gen import rnaseq_gen, FilteringTechnique\n", + "from como.rnaseq_gen import FilteringTechnique, rnaseq_gen\n", "\n", "replicate_ratio = 0.75\n", "high_confidence_replicate_ratio = 1.0\n", @@ -452,7 +452,7 @@ "cutoff = -3\n", "\n", "for i, context in enumerate(context_names):\n", - " await rnaseq_gen( # noqa\n", + " await rnaseq_gen(\n", " context_name=context,\n", " input_rnaseq_filepath=trna_matrix_filepath[i],\n", " input_gene_info_filepath=gene_info_filepath[i],\n", @@ -494,7 +494,7 @@ "metadata": {}, "outputs": [], "source": [ - "from como.rnaseq_gen import rnaseq_gen, FilteringTechnique\n", + "from como.rnaseq_gen import FilteringTechnique, rnaseq_gen\n", "\n", "replicate_ratio = 0.75\n", "high_confidence_replicate_ratio = 1.0\n", @@ -546,7 +546,7 @@ "metadata": {}, "outputs": [], "source": [ - "from como.rnaseq_gen import rnaseq_gen, FilteringTechnique\n", + "from como.rnaseq_gen import FilteringTechnique, rnaseq_gen\n", "\n", "replicate_ratio = 0.75\n", "high_confidence_replicate_ratio = 1.0\n", @@ -744,8 +744,8 @@ " [\n", " \"python3\", \"como/merge_xomics.py\",\n", " \"--merge-zfpkm-distribution\",\n", - " \"--total-rnaseq-config-file\", trnaseq_config_file,\n", - " \"--mrnaseq-config-file\", mrnaseq_config_file,\n", + " \"--total-rnaseq-config-file\", trnaseq_config_file, # noqa: F821\n", + " \"--mrnaseq-config-file\", mrnaseq_config_file, # noqa: F821\n", " # \"--scrnaseq-config-file\", scrnaseq_config_file, # If using single-cell data, uncomment the start of this line\n", " # \"--proteomics-config-file\", proteomics_config_file, # If using proteomics data, uncomment the start of this line\n", " \"--requirement-adjust\", requirement_adjust,\n", From 1956721275442f45eae829d592671810647f0832 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Wed, 11 Dec 2024 14:43:14 -0600 Subject: [PATCH 78/91] chore: ignore complex function Signed-off-by: Josh Loecker --- main/como/rnaseq_preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index 5c2d8125..02702b38 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -278,7 +278,7 @@ async def _write_counts_matrix( return final_matrix -async def _create_config_df( +async def _create_config_df( # noqa: C901 context_name: str, /, como_context_dir: Path, From 65bd2a3a43cf868d4abf0d505d70bf6bf5a7ef17 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Wed, 11 Dec 2024 15:12:21 -0600 Subject: [PATCH 79/91] chore: remove unused imports --- main/como/proteomics_gen.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/main/como/proteomics_gen.py b/main/como/proteomics_gen.py index b1e17b34..1b62bac2 100644 --- a/main/como/proteomics_gen.py +++ b/main/como/proteomics_gen.py @@ -1,5 +1,6 @@ -import argparse -import asyncio +from __future__ import annotations + +from pathlib import Path import numpy as np import pandas as pd @@ -12,7 +13,7 @@ # Load Proteomics -def load_proteomics_data(datafilename, context_name): +def process_proteomics_data(path: Path) -> pd.DataFrame: """Load proteomics data from a given context and filename.""" config = Config() data_path = config.data_dir / "data_matrices" / context_name / datafilename @@ -45,7 +46,7 @@ def load_proteomics_data(datafilename, context_name): # read map to convert to entrez -async def load_gene_symbol_map(gene_symbols: list[str]): +async def load_gene_symbol_map(gene_symbols: list[str], entrez_map: Path | None = None): """Add descirption....""" config = Config() filepath = config.data_dir / "proteomics_entrez_map.csv" @@ -62,7 +63,15 @@ async def load_gene_symbol_map(gene_symbols: list[str]): return df[~df.index.duplicated()] -def abundance_to_bool_group(context_name, group_name, abundance_matrix, rep_ratio, hi_rep_ratio, quantile): +def abundance_to_bool_group( + context_name, + group_name, + abundance_matrix, + replicate_ratio, + high_confidence_replicate_ratio, + quantile, + output_boolean_filepath: Path, +): """Descrioption....""" config = Config() output_dir = config.result_dir / context_name / "proteomics" @@ -156,11 +165,15 @@ def load_empty_dict(): async def proteomics_gen( - config_file: str, - rep_ratio: float = 0.5, - group_ratio: float = 0.5, - hi_rep_ratio: float = 0.5, - hi_group_ratio: float = 0.5, + context_name: str, + config_filepath: Path, + matrix_filepath: Path, + output_boolean_filepath: Path, + input_entrez_map: Path | None = None, + replicate_ratio: float = 0.5, + batch_ratio: float = 0.5, + high_confidence_replicate_ratio: float = 0.7, + high_confience_batch_ratio: float = 0.7, quantile: int = 25, ): """Generate proteomics data.""" From 3cc9bec93f893af7f35786ecb4f4d1ad58795744 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Wed, 11 Dec 2024 15:14:13 -0600 Subject: [PATCH 80/91] refactor: remove command line interface --- main/como/proteomics_gen.py | 75 ------------------------------------- 1 file changed, 75 deletions(-) diff --git a/main/como/proteomics_gen.py b/main/como/proteomics_gen.py index 1b62bac2..366f7bf5 100644 --- a/main/como/proteomics_gen.py +++ b/main/como/proteomics_gen.py @@ -218,80 +218,5 @@ async def proteomics_gen( to_bool_context(context_name, group_ratio, hi_group_ratio, groups) -def _main(): - parser = argparse.ArgumentParser( - prog="proteomics_gen.py", - description="Description goes here", - epilog="For additional help, please post questions/issues in the MADRID GitHub repo at " - "https://github.com/HelikarLab/MADRID or email babessell@gmail.com", - ) - parser.add_argument( - "-c", - "--config-file", - type=str, - required=True, - dest="config_file", - help="The configuration file for proteomics", - ) - parser.add_argument( - "-r", - "--replicate-ratio", - type=float, - required=False, - default=0.5, - dest="rep_ratio", - help="Ratio of replicates required for a gene to be considered active in that group", - ) - parser.add_argument( - "-b", - "--batch-ratio", - type=float, - required=False, - default=0.5, - dest="group_ratio", - help="Ratio of groups (batches or studies) required for a gene to be considered active in a context", - ) - parser.add_argument( - "-hr", - "--high-replicate-ratio", - type=float, - required=False, - default=0.5, - dest="hi_rep_ratio", - help="Ratio of replicates required for a gene to be considered high-confidence in that group", - ) - parser.add_argument( - "-hb", - "--high-batch-ratio", - type=float, - required=False, - default=0.5, - dest="hi_group_ratio", - help="Ratio of groups (batches or studies) required for a gene to be considered high-confidence in a context", - ) - - parser.add_argument( - "-q", - "--quantile", - type=int, - required=False, - default=25, - dest="quantile", - help="The quantile of genes to accept. This should be an integer from 0% (no proteins pass) " - "to 100% (all proteins pass).", - ) - args = parser.parse_args() - asyncio.run( - proteomics_gen( - args.config_file, - args.rep_ratio, - args.group_ratio, - args.hi_rep_ratio, - args.hi_group_ratio, - args.quantile, ) ) - - -if __name__ == "__main__": - _main() From 0f9bd3a35ab27b6aeed9f7d4735c441b732055fb Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Wed, 11 Dec 2024 15:14:44 -0600 Subject: [PATCH 81/91] feat: process input data more pythonically --- main/como/proteomics_gen.py | 35 ++++++++--------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/main/como/proteomics_gen.py b/main/como/proteomics_gen.py index 366f7bf5..d71ded3f 100644 --- a/main/como/proteomics_gen.py +++ b/main/como/proteomics_gen.py @@ -15,34 +15,15 @@ # Load Proteomics def process_proteomics_data(path: Path) -> pd.DataFrame: """Load proteomics data from a given context and filename.""" - config = Config() - data_path = config.data_dir / "data_matrices" / context_name / datafilename - logger.info(f"Data Matrix Path: {data_path}") - - if data_path.exists(): - proteomics_data = pd.read_csv(data_path, header=0) - else: - logger.error(f"Error: file not found: {data_path}") - - return None - # Preprocess data, drop na, duplicate ';' in symbol, - proteomics_data["gene_symbol"] = proteomics_data["gene_symbol"].astype(str) - proteomics_data.dropna(subset=["gene_symbol"], inplace=True) - pluralnames = proteomics_data[proteomics_data["gene_symbol"].str.contains(";") == True] # noqa: E712 - - for idx, row in pluralnames.iterrows(): - names = row["gene_symbol"].split(";") - rows = [] - - for name in names: - rowcopy = row.copy() - rowcopy["gene_symbol"] = name - rows.append(rowcopy) - proteomics_data.drop(index=idx, inplace=True) - proteomics_data = pd.concat([proteomics_data, pd.DataFrame(rows)], ignore_index=True) - - return proteomics_data + matrix: pd.DataFrame = pd.read_csv(path) + if "gene_symbol" not in matrix.columns: + raise ValueError("No gene_symbol column found in proteomics data.") + + matrix["gene_symbol"] = matrix["gene_symbol"].astype(str) + matrix.dropna(subset=["gene_symbol"], inplace=True) + matrix = matrix.assign(gene_symbol=matrix["gene_symbol"].str.split(";")).explode("gene_symbol") + return matrix # read map to convert to entrez From 1402016337ac0d2a1ad8dc74db21615d40e4947f Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Wed, 11 Dec 2024 15:15:39 -0600 Subject: [PATCH 82/91] refactor: do not use hardcoded filepaths --- main/como/proteomics_gen.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/main/como/proteomics_gen.py b/main/como/proteomics_gen.py index d71ded3f..8f91f41a 100644 --- a/main/como/proteomics_gen.py +++ b/main/como/proteomics_gen.py @@ -29,10 +29,8 @@ def process_proteomics_data(path: Path) -> pd.DataFrame: # read map to convert to entrez async def load_gene_symbol_map(gene_symbols: list[str], entrez_map: Path | None = None): """Add descirption....""" - config = Config() - filepath = config.data_dir / "proteomics_entrez_map.csv" - if filepath.exists(): - df = pd.read_csv(filepath, index_col="gene_symbol") + if entrez_map and entrez_map.exists(): + df = pd.read_csv(entrez_map, index_col="gene_symbol") else: biodbnet = BioDBNet() df = await biodbnet.async_db2db( @@ -158,15 +156,20 @@ async def proteomics_gen( quantile: int = 25, ): """Generate proteomics data.""" - config = Config() - if not config_file: - raise ValueError("Config file must be provided") + if not config_filepath.exists(): + raise FileNotFoundError(f"Config file not found at {config_filepath}") + if config_filepath.suffix not in (".xlsx", ".xls"): + raise FileNotFoundError(f"Config file must be an xlsx or xls file at {config_filepath}") + + if not matrix_filepath.exists(): + raise FileNotFoundError(f"Matrix file not found at {matrix_filepath}") + if matrix_filepath.suffix not in {".csv"}: + raise FileNotFoundError(f"Matrix file must be a csv file at {matrix_filepath}") if quantile < 0 or quantile > 100: raise ValueError("Quantile must be an integer from 0 to 100") quantile /= 100 - prot_config_filepath = config.data_dir / "config_sheets" / config_file logger.info(f"Config file is at '{prot_config_filepath}'") xl = pd.ExcelFile(prot_config_filepath) From 450c9e954e2d195b549bfe0706592c86ca010111 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Wed, 11 Dec 2024 15:16:01 -0600 Subject: [PATCH 83/91] chore: ruff formatting --- main/como/proteomics_gen.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/main/como/proteomics_gen.py b/main/como/proteomics_gen.py index 8f91f41a..4f2b7475 100644 --- a/main/como/proteomics_gen.py +++ b/main/como/proteomics_gen.py @@ -34,7 +34,9 @@ async def load_gene_symbol_map(gene_symbols: list[str], entrez_map: Path | None else: biodbnet = BioDBNet() df = await biodbnet.async_db2db( - values=gene_symbols, input_db=Input.GENE_SYMBOL, output_db=[Output.GENE_ID, Output.ENSEMBL_GENE_ID] + values=gene_symbols, + input_db=Input.GENE_SYMBOL, + output_db=[Output.GENE_ID, Output.ENSEMBL_GENE_ID], ) df.loc[df["gene_id"] == "-", ["gene_id"]] = np.nan df.to_csv(filepath, index_label="gene_symbol") From 01b31dfe1ad1e0bfba3ac5b66b99fb85f36694a6 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Wed, 11 Dec 2024 15:16:31 -0600 Subject: [PATCH 84/91] style: use better variable name & reorganize --- main/como/proteomics_gen.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/main/como/proteomics_gen.py b/main/como/proteomics_gen.py index 4f2b7475..e649cd8b 100644 --- a/main/como/proteomics_gen.py +++ b/main/como/proteomics_gen.py @@ -39,7 +39,7 @@ async def load_gene_symbol_map(gene_symbols: list[str], entrez_map: Path | None output_db=[Output.GENE_ID, Output.ENSEMBL_GENE_ID], ) df.loc[df["gene_id"] == "-", ["gene_id"]] = np.nan - df.to_csv(filepath, index_label="gene_symbol") + df.to_csv(entrez_map, index_label="gene_symbol") return df[~df.index.duplicated()] @@ -75,12 +75,11 @@ def abundance_to_bool_group( abundance_matrix["pos"] = (abundance_matrix > 0).sum(axis=1) / abundance_matrix.count(axis=1) abundance_matrix["expressed"] = 0 - abundance_matrix.loc[(abundance_matrix["pos"] >= rep_ratio), ["expressed"]] = 1 abundance_matrix["high"] = 0 - abundance_matrix.loc[(abundance_matrix["pos"] >= hi_rep_ratio), ["high"]] = 1 + abundance_matrix.loc[(abundance_matrix["pos"] >= replicate_ratio), ["expressed"]] = 1 + abundance_matrix.loc[(abundance_matrix["pos"] >= high_confidence_replicate_ratio), ["high"]] = 1 - bool_filepath = output_dir / f"bool_prot_Matrix_{context_name}_{group_name}.csv" - abundance_matrix.to_csv(bool_filepath, index_label="entrez_gene_id") + abundance_matrix.to_csv(output_boolean_filepath, index_label="entrez_gene_id") def to_bool_context(context_name, group_ratio, hi_group_ratio, group_names): From 06e0932f398db82d429b522663d648ce67ae25df Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Wed, 11 Dec 2024 15:16:45 -0600 Subject: [PATCH 85/91] feat: process the provided file paths --- main/como/proteomics_gen.py | 63 ++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 29 deletions(-) diff --git a/main/como/proteomics_gen.py b/main/como/proteomics_gen.py index e649cd8b..50f1cfbb 100644 --- a/main/como/proteomics_gen.py +++ b/main/como/proteomics_gen.py @@ -171,37 +171,42 @@ async def proteomics_gen( raise ValueError("Quantile must be an integer from 0 to 100") quantile /= 100 - logger.info(f"Config file is at '{prot_config_filepath}'") + config_df = pd.read_excel(config_filepath, sheet_name=context_name) + matrix: pd.DataFrame = process_proteomics_data(matrix_filepath) - xl = pd.ExcelFile(prot_config_filepath) - sheet_names = xl.sheet_names - - for context_name in sheet_names: - datafilename = "".join(["protein_abundance_", context_name, ".csv"]) - config_sheet = pd.read_excel(prot_config_filepath, sheet_name=context_name) - groups = config_sheet["group"].unique().tolist() - - for group in groups: - group_idx = np.where([g == group for g in config_sheet["group"].tolist()]) - cols = [*np.take(config_sheet["sample_name"].to_numpy(), group_idx).ravel().tolist(), "gene_symbol"] - - proteomics_data = load_proteomics_data(datafilename, context_name) - proteomics_data = proteomics_data.loc[:, cols] - - symbols_to_ids = await load_gene_symbol_map(gene_symbols=proteomics_data["gene_symbol"].tolist()) - proteomics_data.dropna(subset=["gene_symbol"], inplace=True) - if "uniprot" in proteomics_data.columns: - proteomics_data.drop(columns=["uniprot"], inplace=True) - - proteomics_data = proteomics_data.groupby(["gene_symbol"]).agg("max") - proteomics_data["entrez_gene_id"] = symbols_to_ids["gene_id"] - proteomics_data.dropna(subset=["entrez_gene_id"], inplace=True) - proteomics_data.set_index("entrez_gene_id", inplace=True) - - # save proteomics data by test - abundance_to_bool_group(context_name, group, proteomics_data, rep_ratio, hi_rep_ratio, quantile) - to_bool_context(context_name, group_ratio, hi_group_ratio, groups) + groups = config_df["group"].unique().tolist() + for group in groups: + indices = np.where([g == group for g in config_df["group"]]) + sample_columns = [*np.take(config_df["sample_name"].to_numpy(), indices).ravel().tolist(), "gene_symbol"] + matrix = matrix.loc[:, sample_columns] + symbols_to_gene_ids = await load_gene_symbol_map( + gene_symbols=matrix["gene_symbol"].tolist(), + entrez_map=input_entrez_map, + ) + matrix.dropna(subset=["gene_symbol"], inplace=True) + if "uniprot" in matrix.columns: + matrix.drop(columns=["uniprot"], inplace=True) + + matrix = matrix.groupby(["gene_symbol"]).agg("max") + matrix["entrez_gene_id"] = symbols_to_gene_ids["gene_id"] + matrix.dropna(subset=["entrez_gene_id"], inplace=True) + matrix.set_index("entrez_gene_id", inplace=True) + + # bool_filepath = output_dir / f"bool_prot_Matrix_{context_name}_{group_name}.csv" + abundance_to_bool_group( + context_name=context_name, + group_name=group, + abundance_matrix=matrix, + replicate_ratio=replicate_ratio, + high_confidence_replicate_ratio=high_confidence_replicate_ratio, + quantile=quantile, + output_boolean_filepath=output_boolean_filepath, ) + to_bool_context( + context_name=context_name, + group_ratio=batch_ratio, + hi_group_ratio=high_confience_batch_ratio, + group_names=groups, ) From 4baf95a393e925d4f865b2d7f3f8a9608459ce4c Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Wed, 11 Dec 2024 15:17:19 -0600 Subject: [PATCH 86/91] feat: write to provided filepath --- main/como/proteomics_preprocessing.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/main/como/proteomics_preprocessing.py b/main/como/proteomics_preprocessing.py index 9b215439..2fee5290 100644 --- a/main/como/proteomics_preprocessing.py +++ b/main/como/proteomics_preprocessing.py @@ -129,12 +129,12 @@ def plot_gaussian_fit(z_results: ZResult, facet_titles: bool = True, x_min: int # Main function for protein abundance transformation -def protein_transform_main(abundance_df: pd.DataFrame | str | Path, out_dir: str | Path, group_name: str) -> None: +def protein_transform_main( + abundance_df: pd.DataFrame | str | Path, + output_gaussian_img_filepath: Path, + output_z_score_matrix_filepath: Path, +) -> None: """Transform protein abundance data.""" - out_dir: Path = Path(out_dir) - output_figure_directory = out_dir / "figures" - output_figure_directory.mkdir(parents=True, exist_ok=True) - abundance_df: pd.DataFrame = ( pd.read_csv(abundance_df) if isinstance(abundance_df, (str, Path)) else abundance_df.fillna(0) ) @@ -142,10 +142,10 @@ def protein_transform_main(abundance_df: pd.DataFrame | str | Path, out_dir: str z_transform: ZResult = z_score_calc(abundance_df, min_thresh=0) fig = plot_gaussian_fit(z_results=z_transform, facet_titles=True, x_min=-4) - fig.write_image(out_dir / "gaussian_fit.png") - fig.write_html(out_dir / "gaussian_fit.html") - logger.info(f"Wrote image to {out_dir / 'gaussian_fit.png'}") + fig.write_image(output_gaussian_img_filepath.with_suffix(".png")) + fig.write_html(output_gaussian_img_filepath.with_suffix(".html")) + logger.info(f"Gaussian fit figure written to {output_gaussian_img_filepath}") z_transformed_abundances = z_transform.zfpkm z_transformed_abundances[abundance_df == 0] = -4 - z_transformed_abundances.to_csv(out_dir / f"protein_zscore_Matrix_{group_name}.csv", index=False) + z_transformed_abundances.to_csv(output_z_score_matrix_filepath, index=False) From 9f9700163ae5b7dcef98faff70435c5114a46ccc Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Wed, 11 Dec 2024 15:26:04 -0600 Subject: [PATCH 87/91] refactor: remove hardcoded filepaths --- main/como/proteomics_gen.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/main/como/proteomics_gen.py b/main/como/proteomics_gen.py index 50f1cfbb..9f216d6d 100644 --- a/main/como/proteomics_gen.py +++ b/main/como/proteomics_gen.py @@ -53,17 +53,13 @@ def abundance_to_bool_group( quantile, output_boolean_filepath: Path, ): - """Descrioption....""" - config = Config() - output_dir = config.result_dir / context_name / "proteomics" - output_dir.mkdir(parents=True, exist_ok=True) - - # write group abundances to individual files - abundance_filepath = ( - config.result_dir / context_name / "proteomics" / "".join(["protein_abundance_", group_name, ".csv"]) - ) + """Convert proteomic data to boolean expression.""" abundance_matrix.to_csv(abundance_filepath, index_label="entrez_gene_id") - protein_transform_main(abundance_matrix, output_dir, group_name) + protein_transform_main( + abundance_df=abundance_matrix, + output_gaussian_img_filepath=output_gaussian_img_filepath, + output_z_score_matrix_filepath=output_z_score_matrix_filepath, + ) # Logical Calculation abundance_matrix_nozero = abundance_matrix.replace(0, np.nan) From bfe8d7e4ec483704765c73599277caa58fc82fa5 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Wed, 11 Dec 2024 15:26:24 -0600 Subject: [PATCH 88/91] refactor: pythonic approach to processing --- main/como/proteomics_gen.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/main/como/proteomics_gen.py b/main/como/proteomics_gen.py index 9f216d6d..2c312553 100644 --- a/main/como/proteomics_gen.py +++ b/main/como/proteomics_gen.py @@ -64,14 +64,14 @@ def abundance_to_bool_group( # Logical Calculation abundance_matrix_nozero = abundance_matrix.replace(0, np.nan) thresholds = abundance_matrix_nozero.quantile(quantile, axis=0) - testbool = pd.DataFrame(0, columns=list(abundance_matrix), index=abundance_matrix.index) + testbool = pd.DataFrame(0, columns=abundance_matrix.columns, index=abundance_matrix.index) - for col in list(abundance_matrix): + for col in abundance_matrix.columns: testbool.loc[abundance_matrix[col] > thresholds[col], [col]] = 1 - abundance_matrix["pos"] = (abundance_matrix > 0).sum(axis=1) / abundance_matrix.count(axis=1) abundance_matrix["expressed"] = 0 abundance_matrix["high"] = 0 + abundance_matrix["pos"] = abundance_matrix[abundance_matrix > 0].sum(axis=1) / abundance_matrix.count(axis=1) abundance_matrix.loc[(abundance_matrix["pos"] >= replicate_ratio), ["expressed"]] = 1 abundance_matrix.loc[(abundance_matrix["pos"] >= high_confidence_replicate_ratio), ["high"]] = 1 From c40169611068898ba36b26cad39b51016236b8e8 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Wed, 11 Dec 2024 15:26:37 -0600 Subject: [PATCH 89/91] fix: arguments to write data --- main/como/proteomics_gen.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/main/como/proteomics_gen.py b/main/como/proteomics_gen.py index 2c312553..9a53304e 100644 --- a/main/como/proteomics_gen.py +++ b/main/como/proteomics_gen.py @@ -46,11 +46,13 @@ async def load_gene_symbol_map(gene_symbols: list[str], entrez_map: Path | None def abundance_to_bool_group( context_name, - group_name, - abundance_matrix, - replicate_ratio, - high_confidence_replicate_ratio, - quantile, + abundance_filepath: Path, + output_gaussian_img_filepath: Path, + output_z_score_matrix_filepath: Path, + abundance_matrix: pd.DataFrame, + replicate_ratio: float, + high_confidence_replicate_ratio: float, + quantile: float, output_boolean_filepath: Path, ): """Convert proteomic data to boolean expression.""" @@ -145,6 +147,8 @@ async def proteomics_gen( config_filepath: Path, matrix_filepath: Path, output_boolean_filepath: Path, + output_gaussian_img_filepath: Path, + output_z_score_matrix_filepath: Path, input_entrez_map: Path | None = None, replicate_ratio: float = 0.5, batch_ratio: float = 0.5, @@ -193,12 +197,14 @@ async def proteomics_gen( # bool_filepath = output_dir / f"bool_prot_Matrix_{context_name}_{group_name}.csv" abundance_to_bool_group( context_name=context_name, - group_name=group, + abundance_filepath=matrix_filepath, abundance_matrix=matrix, replicate_ratio=replicate_ratio, high_confidence_replicate_ratio=high_confidence_replicate_ratio, quantile=quantile, output_boolean_filepath=output_boolean_filepath, + output_gaussian_img_filepath=output_gaussian_img_filepath, + output_z_score_matrix_filepath=output_z_score_matrix_filepath, ) to_bool_context( context_name=context_name, From c7da9718068eb054410d49e842332e1b4b847964 Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Wed, 11 Dec 2024 16:03:18 -0600 Subject: [PATCH 90/91] feat: allow specifiying files to process --- main/COMO.ipynb | 222 +++++++++----------------- main/como/proteomics_gen.py | 16 +- main/como/proteomics_preprocessing.py | 13 +- 3 files changed, 96 insertions(+), 155 deletions(-) diff --git a/main/COMO.ipynb b/main/COMO.ipynb index bbaeb152..3cbebec1 100644 --- a/main/COMO.ipynb +++ b/main/COMO.ipynb @@ -232,15 +232,10 @@ ] }, { + "metadata": {}, "cell_type": "code", - "execution_count": 1, - "metadata": { - "ExecuteTime": { - "end_time": "2024-12-09T22:10:43.421233Z", - "start_time": "2024-12-09T22:10:43.418100Z" - } - }, "outputs": [], + "execution_count": null, "source": [ "from pathlib import Path\n", "\n", @@ -249,14 +244,20 @@ "\n", "taxon_id = 9606\n", "context_names = [\"naiveB\"]\n", - "gene_info_filepath = [Path(f\"data/results/{context}/gene_info.csv\") for context in context_names]\n", - "como_context_dir = [Path(f\"data/COMO_input/{context}\") for context in context_names]\n", - "trna_matrix_filepath = [Path(f\"data/results/{context}/total-rna/totalrna_{context}.csv\") for context in context_names]\n", - "polya_matrix_filepath = [Path(f\"data/results/{context}/polya-rna/polyarna_{context}.csv\") for context in context_names]\n", + "\n", + "gene_info_filepath = {context: Path(f\"data/results/{context}/gene_info.csv\") for context in context_names}\n", + "como_context_dir = {context: Path(f\"data/COMO_input/{context}\") for context in context_names}\n", + "trna_matrix_filepath = {context: Path(f\"data/results/{context}/total-rna/totalrna_{context}.csv\") for context in context_names}\n", + "mrna_matrix_filepath = {context: Path(f\"data/results/{context}/polya-rna/polyarna_{context}.csv\") for context in context_names}\n", + "proteomics_matrix_filepath = {context: Path(f\"data/data_matrices/{context}/protein_abundance_{context}.csv\") for context in context_names}\n", + "\n", + "trna_metadata_filepath = Path(\"data/config_sheets/trna_config.xlsx\")\n", + "mrna_metadata_filepath = Path(\"data/config_sheets/mrna_config.xlsx\")\n", + "proteomics_metadata_filepath = Path(\"data/config_sheets/proteomics_config.xlsx\")\n", "\n", "# No single-cell data is provided by default; COMO accepts single-cell data in CSV or h5ad format\n", "# If you are using single-cell data, adjust the following lines accordingly\n", - "scrna_matrix_filepath = [Path(f\"data/results/{context}/scrna/scrna_{context}.csv\") for context in context_names]\n", + "scrna_matrix_filepath = {context: Path(f\"data/results/{context}/scrna/scrna_{context}.csv\") for context in context_names}\n", "# scrna_matrix_filepath = [Path(f\"data/results/{context}/scrna/scrna_{context}.h5ad\") for context in context_names]\n" ] }, @@ -272,37 +273,24 @@ ] }, { + "metadata": {}, "cell_type": "code", - "execution_count": 10, - "metadata": { - "ExecuteTime": { - "end_time": "2024-12-07T03:30:27.253112Z", - "start_time": "2024-12-07T03:30:27.236557Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001B[32m2024-12-09 16:23:55\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m274\u001B[0m - \u001B[32m\u001B[1mWrote gene count matrix for 'polya' RNA at '/Users/joshl/Projects/COMO/main/data/results/naiveB/polya-rna/polyarna_naiveB.csv'\u001B[0m\n", - "\u001B[32m2024-12-09 16:23:55\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m274\u001B[0m - \u001B[32m\u001B[1mWrote gene count matrix for 'total' RNA at '/Users/joshl/Projects/COMO/main/data/results/naiveB/total-rna/totalrna_naiveB.csv'\u001B[0m\n", - "\u001B[32m2024-12-09 16:23:55\u001B[0m | \u001B[1mINFO \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m451\u001B[0m - \u001B[1mFetching gene info (this may take 1-5 minutes)\u001B[0m\n", - "\u001B[32m2024-12-09 16:24:13\u001B[0m | \u001B[32m\u001B[1mSUCCESS \u001B[0m | \u001B[36mcomo.rnaseq_preprocess\u001B[0m:\u001B[36m488\u001B[0m - \u001B[32m\u001B[1mGene Info file written at '/Users/joshl/Projects/COMO/main/data/results/naiveB/gene_info.csv'\u001B[0m\n" - ] - } - ], + "outputs": [], + "execution_count": null, "source": [ - "for i in range(len(context_names)):\n", + "for context in context_names:\n", + " if context not in {*trna_matrix_filepath, *mrna_matrix_filepath}:\n", + " continue\n", " await rnaseq_preprocess(\n", - " context_name=context_names[i],\n", + " context_name=context,\n", " taxon=taxon_id,\n", - " output_gene_info_filepath=gene_info_filepath[i],\n", - " como_context_dir=como_context_dir[i],\n", - " output_trna_config_filepath=Path(\"./data/config_sheets/trna_config.xlsx\"),\n", - " output_trna_count_matrix_filepath=trna_matrix_filepath[i],\n", - " output_mrna_config_filepath=Path(\"./data/config_sheets/polya_config.xlsx\"),\n", - " output_mrna_count_matrix_filepath=polya_matrix_filepath[i],\n", + " como_context_dir=como_context_dir[context],\n", + " input_matrix_filepath=None,\n", + " output_gene_info_filepath=gene_info_filepath[context],\n", + " output_trna_config_filepath=trna_metadata_filepath,\n", + " output_trna_count_matrix_filepath=trna_matrix_filepath[context],\n", + " output_mrna_config_filepath=mrna_metadata_filepath,\n", + " output_mrna_count_matrix_filepath=mrna_matrix_filepath[context],\n", " cache=True,\n", " log_level=\"INFO\",\n", " )" @@ -379,68 +367,10 @@ ] }, { + "metadata": {}, "cell_type": "code", - "execution_count": 4, - "metadata": { - "ExecuteTime": { - "end_time": "2024-12-09T22:13:42.060657Z", - "start_time": "2024-12-09T22:13:41.740347Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001B[32m2024-12-09 16:18:43.958\u001B[0m | \u001B[34m\u001B[1mDEBUG \u001B[0m | \u001B[36mcomo.rnaseq_gen\u001B[0m:\u001B[36mrnaseq_gen\u001B[0m:\u001B[36m805\u001B[0m - \u001B[34m\u001B[1mStarting 'naiveB'\u001B[0m\n", - "\u001B[32m2024-12-09 16:18:43.961\u001B[0m | \u001B[34m\u001B[1mDEBUG \u001B[0m | \u001B[36mcomo.rnaseq_gen\u001B[0m:\u001B[36m_read_counts\u001B[0m:\u001B[36m175\u001B[0m - \u001B[34m\u001B[1mReading CSV file at 'data/results/naiveB/total-rna/totalrna_naiveB.csv'\u001B[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " entrez_gene_id expressed high\n", - "0 7105 0 0\n", - "1 64102 0 0\n", - "2 8813 0 0\n", - "3 57147 0 0\n", - "4 55732 0 0\n", - "... ... ... ...\n", - "34396 124901321 0 0\n", - "34397 124902403 0 0\n", - "34398 101929614 0 0\n", - "34399 107984888 0 0\n", - "34400 124900697 0 0\n", - "\n", - "[34401 rows x 3 columns]\n" - ] - }, - { - "ename": "KeyError", - "evalue": "'ensembl_gene_id'", - "output_type": "error", - "traceback": [ - "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", - "\u001B[0;31mKeyError\u001B[0m Traceback (most recent call last)", - "File \u001B[0;32m~/Projects/COMO/.venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3805\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 3804\u001B[0m \u001B[38;5;28;01mtry\u001B[39;00m:\n\u001B[0;32m-> 3805\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43m_engine\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mcasted_key\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 3806\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m \u001B[38;5;28;01mas\u001B[39;00m err:\n", - "File \u001B[0;32mindex.pyx:167\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[0;34m()\u001B[0m\n", - "File \u001B[0;32mindex.pyx:196\u001B[0m, in \u001B[0;36mpandas._libs.index.IndexEngine.get_loc\u001B[0;34m()\u001B[0m\n", - "File \u001B[0;32mpandas/_libs/hashtable_class_helper.pxi:7081\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[0;34m()\u001B[0m\n", - "File \u001B[0;32mpandas/_libs/hashtable_class_helper.pxi:7089\u001B[0m, in \u001B[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001B[0;34m()\u001B[0m\n", - "\u001B[0;31mKeyError\u001B[0m: 'ensembl_gene_id'", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001B[0;31mKeyError\u001B[0m Traceback (most recent call last)", - "Cell \u001B[0;32mIn[4], line 11\u001B[0m\n\u001B[1;32m 8\u001B[0m cutoff \u001B[38;5;241m=\u001B[39m \u001B[38;5;241m-\u001B[39m\u001B[38;5;241m3\u001B[39m\n\u001B[1;32m 10\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m i, context \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28menumerate\u001B[39m(context_names):\n\u001B[0;32m---> 11\u001B[0m \u001B[38;5;28;01mawait\u001B[39;00m rnaseq_gen( \u001B[38;5;66;03m# noqa\u001B[39;00m\n\u001B[1;32m 12\u001B[0m context_name\u001B[38;5;241m=\u001B[39mcontext,\n\u001B[1;32m 13\u001B[0m input_rnaseq_filepath\u001B[38;5;241m=\u001B[39mtrna_matrix_filepath[i],\n\u001B[1;32m 14\u001B[0m input_gene_info_filepath\u001B[38;5;241m=\u001B[39mgene_info_filepath[i],\n\u001B[1;32m 15\u001B[0m output_rnaseq_filepath\u001B[38;5;241m=\u001B[39mtrna_matrix_filepath[i],\n\u001B[1;32m 16\u001B[0m prep\u001B[38;5;241m=\u001B[39mRNAPrepMethod\u001B[38;5;241m.\u001B[39mTOTAL,\n\u001B[1;32m 17\u001B[0m taxon\u001B[38;5;241m=\u001B[39mtaxon_id,\n\u001B[1;32m 18\u001B[0m input_metadata_filepath\u001B[38;5;241m=\u001B[39mPath(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m./data/config_sheets/trna_config.xlsx\u001B[39m\u001B[38;5;124m\"\u001B[39m),\n\u001B[1;32m 19\u001B[0m replicate_ratio\u001B[38;5;241m=\u001B[39mreplicate_ratio,\n\u001B[1;32m 20\u001B[0m high_replicate_ratio\u001B[38;5;241m=\u001B[39mhigh_confidence_replicate_ratio,\n\u001B[1;32m 21\u001B[0m batch_ratio\u001B[38;5;241m=\u001B[39mbatch_ratio,\n\u001B[1;32m 22\u001B[0m high_batch_ratio\u001B[38;5;241m=\u001B[39mhigh_confidence_batch_ratio,\n\u001B[1;32m 23\u001B[0m technique\u001B[38;5;241m=\u001B[39mtechnique,\n\u001B[1;32m 24\u001B[0m cutoff\u001B[38;5;241m=\u001B[39mcutoff\n\u001B[1;32m 25\u001B[0m )\n", - "File \u001B[0;32m~/Projects/COMO/main/como/rnaseq_gen.py:808\u001B[0m, in \u001B[0;36mrnaseq_gen\u001B[0;34m(context_name, input_rnaseq_filepath, input_gene_info_filepath, output_rnaseq_filepath, prep, taxon, input_metadata_filepath, input_metadata_df, replicate_ratio, high_replicate_ratio, batch_ratio, high_batch_ratio, technique, cutoff)\u001B[0m\n\u001B[1;32m 805\u001B[0m logger\u001B[38;5;241m.\u001B[39mdebug(\u001B[38;5;124mf\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mStarting \u001B[39m\u001B[38;5;124m'\u001B[39m\u001B[38;5;132;01m{\u001B[39;00mcontext_name\u001B[38;5;132;01m}\u001B[39;00m\u001B[38;5;124m'\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m 806\u001B[0m output_rnaseq_filepath\u001B[38;5;241m.\u001B[39mparent\u001B[38;5;241m.\u001B[39mmkdir(parents\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m, exist_ok\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mTrue\u001B[39;00m)\n\u001B[0;32m--> 808\u001B[0m \u001B[38;5;28;01mawait\u001B[39;00m _save_rnaseq_tests(\n\u001B[1;32m 809\u001B[0m context_name\u001B[38;5;241m=\u001B[39mcontext_name,\n\u001B[1;32m 810\u001B[0m rnaseq_matrix\u001B[38;5;241m=\u001B[39m\u001B[38;5;28;01mawait\u001B[39;00m _read_counts(input_rnaseq_filepath),\n\u001B[1;32m 811\u001B[0m metadata_df\u001B[38;5;241m=\u001B[39minput_metadata_df \u001B[38;5;129;01mor\u001B[39;00m \u001B[38;5;28;01mawait\u001B[39;00m _create_metadata_df(input_metadata_filepath),\n\u001B[1;32m 812\u001B[0m gene_info_df\u001B[38;5;241m=\u001B[39mpd\u001B[38;5;241m.\u001B[39mread_csv(input_gene_info_filepath),\n\u001B[1;32m 813\u001B[0m output_filepath\u001B[38;5;241m=\u001B[39moutput_rnaseq_filepath,\n\u001B[1;32m 814\u001B[0m prep\u001B[38;5;241m=\u001B[39mprep,\n\u001B[1;32m 815\u001B[0m taxon\u001B[38;5;241m=\u001B[39mtaxon,\n\u001B[1;32m 816\u001B[0m replicate_ratio\u001B[38;5;241m=\u001B[39mreplicate_ratio,\n\u001B[1;32m 817\u001B[0m batch_ratio\u001B[38;5;241m=\u001B[39mbatch_ratio,\n\u001B[1;32m 818\u001B[0m high_replicate_ratio\u001B[38;5;241m=\u001B[39mhigh_replicate_ratio,\n\u001B[1;32m 819\u001B[0m high_batch_ratio\u001B[38;5;241m=\u001B[39mhigh_batch_ratio,\n\u001B[1;32m 820\u001B[0m technique\u001B[38;5;241m=\u001B[39mtechnique,\n\u001B[1;32m 821\u001B[0m cut_off\u001B[38;5;241m=\u001B[39mcutoff,\n\u001B[1;32m 822\u001B[0m )\n", - "File \u001B[0;32m~/Projects/COMO/main/como/rnaseq_gen.py:671\u001B[0m, in \u001B[0;36m_save_rnaseq_tests\u001B[0;34m(context_name, rnaseq_matrix, metadata_df, gene_info_df, output_filepath, prep, taxon, replicate_ratio, batch_ratio, high_replicate_ratio, high_batch_ratio, technique, cut_off)\u001B[0m\n\u001B[1;32m 662\u001B[0m \u001B[38;5;250m\u001B[39m\u001B[38;5;124;03m\"\"\"Save the results of the RNA-Seq tests to a CSV file.\"\"\"\u001B[39;00m\n\u001B[1;32m 663\u001B[0m filtering_options \u001B[38;5;241m=\u001B[39m _FilteringOptions(\n\u001B[1;32m 664\u001B[0m replicate_ratio\u001B[38;5;241m=\u001B[39mreplicate_ratio,\n\u001B[1;32m 665\u001B[0m batch_ratio\u001B[38;5;241m=\u001B[39mbatch_ratio,\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 668\u001B[0m high_batch_ratio\u001B[38;5;241m=\u001B[39mhigh_batch_ratio,\n\u001B[1;32m 669\u001B[0m )\n\u001B[0;32m--> 671\u001B[0m read_counts_results: _ReadMatrixResults \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m _build_matrix_results(\n\u001B[1;32m 672\u001B[0m matrix\u001B[38;5;241m=\u001B[39mrnaseq_matrix,\n\u001B[1;32m 673\u001B[0m gene_info\u001B[38;5;241m=\u001B[39mgene_info_df,\n\u001B[1;32m 674\u001B[0m metadata_df\u001B[38;5;241m=\u001B[39mmetadata_df,\n\u001B[1;32m 675\u001B[0m taxon\u001B[38;5;241m=\u001B[39mtaxon,\n\u001B[1;32m 676\u001B[0m )\n\u001B[1;32m 677\u001B[0m metrics \u001B[38;5;241m=\u001B[39m read_counts_results\u001B[38;5;241m.\u001B[39mmetrics\n\u001B[1;32m 678\u001B[0m entrez_gene_ids \u001B[38;5;241m=\u001B[39m read_counts_results\u001B[38;5;241m.\u001B[39mentrez_gene_ids\n", - "File \u001B[0;32m~/Projects/COMO/main/como/rnaseq_gen.py:201\u001B[0m, in \u001B[0;36m_build_matrix_results\u001B[0;34m(matrix, gene_info, metadata_df, taxon)\u001B[0m\n\u001B[1;32m 199\u001B[0m gene_info \u001B[38;5;241m=\u001B[39m gene_info_migrations(gene_info)\n\u001B[1;32m 200\u001B[0m \u001B[38;5;28mprint\u001B[39m(matrix)\n\u001B[0;32m--> 201\u001B[0m conversion \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mawait\u001B[39;00m ensembl_to_gene_id_and_symbol(ids\u001B[38;5;241m=\u001B[39m\u001B[43mmatrix\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[38;5;124;43mensembl_gene_id\u001B[39;49m\u001B[38;5;124;43m\"\u001B[39;49m\u001B[43m]\u001B[49m\u001B[38;5;241m.\u001B[39mtolist(), taxon\u001B[38;5;241m=\u001B[39mtaxon)\n\u001B[1;32m 202\u001B[0m matrix \u001B[38;5;241m=\u001B[39m matrix\u001B[38;5;241m.\u001B[39mmerge(conversion, on\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mensembl_gene_id\u001B[39m\u001B[38;5;124m\"\u001B[39m, how\u001B[38;5;241m=\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mleft\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m 204\u001B[0m \u001B[38;5;66;03m# Only include Entrez and Ensembl Gene IDs that are present in `gene_info`\u001B[39;00m\n", - "File \u001B[0;32m~/Projects/COMO/.venv/lib/python3.10/site-packages/pandas/core/frame.py:4102\u001B[0m, in \u001B[0;36mDataFrame.__getitem__\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 4100\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcolumns\u001B[38;5;241m.\u001B[39mnlevels \u001B[38;5;241m>\u001B[39m \u001B[38;5;241m1\u001B[39m:\n\u001B[1;32m 4101\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_getitem_multilevel(key)\n\u001B[0;32m-> 4102\u001B[0m indexer \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcolumns\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_loc\u001B[49m\u001B[43m(\u001B[49m\u001B[43mkey\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 4103\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m is_integer(indexer):\n\u001B[1;32m 4104\u001B[0m indexer \u001B[38;5;241m=\u001B[39m [indexer]\n", - "File \u001B[0;32m~/Projects/COMO/.venv/lib/python3.10/site-packages/pandas/core/indexes/base.py:3812\u001B[0m, in \u001B[0;36mIndex.get_loc\u001B[0;34m(self, key)\u001B[0m\n\u001B[1;32m 3807\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28misinstance\u001B[39m(casted_key, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;129;01mor\u001B[39;00m (\n\u001B[1;32m 3808\u001B[0m \u001B[38;5;28misinstance\u001B[39m(casted_key, abc\u001B[38;5;241m.\u001B[39mIterable)\n\u001B[1;32m 3809\u001B[0m \u001B[38;5;129;01mand\u001B[39;00m \u001B[38;5;28many\u001B[39m(\u001B[38;5;28misinstance\u001B[39m(x, \u001B[38;5;28mslice\u001B[39m) \u001B[38;5;28;01mfor\u001B[39;00m x \u001B[38;5;129;01min\u001B[39;00m casted_key)\n\u001B[1;32m 3810\u001B[0m ):\n\u001B[1;32m 3811\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m InvalidIndexError(key)\n\u001B[0;32m-> 3812\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mKeyError\u001B[39;00m(key) \u001B[38;5;28;01mfrom\u001B[39;00m \u001B[38;5;21;01merr\u001B[39;00m\n\u001B[1;32m 3813\u001B[0m \u001B[38;5;28;01mexcept\u001B[39;00m \u001B[38;5;167;01mTypeError\u001B[39;00m:\n\u001B[1;32m 3814\u001B[0m \u001B[38;5;66;03m# If we have a listlike key, _check_indexing_error will raise\u001B[39;00m\n\u001B[1;32m 3815\u001B[0m \u001B[38;5;66;03m# InvalidIndexError. Otherwise we fall through and re-raise\u001B[39;00m\n\u001B[1;32m 3816\u001B[0m \u001B[38;5;66;03m# the TypeError.\u001B[39;00m\n\u001B[1;32m 3817\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39m_check_indexing_error(key)\n", - "\u001B[0;31mKeyError\u001B[0m: 'ensembl_gene_id'" - ] - } - ], + "outputs": [], + "execution_count": null, "source": [ "from como.rnaseq_gen import FilteringTechnique, rnaseq_gen\n", "\n", @@ -451,15 +381,17 @@ "technique = FilteringTechnique.zfpkm\n", "cutoff = -3\n", "\n", - "for i, context in enumerate(context_names):\n", + "for context in context_names:\n", + " if context not in trna_matrix_filepath:\n", + " continue\n", " await rnaseq_gen(\n", " context_name=context,\n", - " input_rnaseq_filepath=trna_matrix_filepath[i],\n", - " input_gene_info_filepath=gene_info_filepath[i],\n", - " output_rnaseq_filepath=trna_matrix_filepath[i],\n", + " input_rnaseq_filepath=trna_matrix_filepath[context],\n", + " input_gene_info_filepath=gene_info_filepath[context],\n", + " output_rnaseq_filepath=trna_matrix_filepath[context],\n", " prep=RNAPrepMethod.TOTAL,\n", " taxon=taxon_id,\n", - " input_metadata_filepath=Path(\"./data/config_sheets/trna_config.xlsx\"),\n", + " input_metadata_filepath=trna_metadata_filepath,\n", " replicate_ratio=replicate_ratio,\n", " high_replicate_ratio=high_confidence_replicate_ratio,\n", " batch_ratio=batch_ratio,\n", @@ -489,10 +421,10 @@ ] }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "cell_type": "code", "outputs": [], + "execution_count": null, "source": [ "from como.rnaseq_gen import FilteringTechnique, rnaseq_gen\n", "\n", @@ -503,15 +435,17 @@ "technique = FilteringTechnique.zfpkm\n", "cutoff = -3\n", "\n", - "for i, context in enumerate(context_names):\n", + "for context in context_names:\n", + " if context not in mrna_matrix_filepath:\n", + " continue\n", " await rnaseq_gen(\n", " context_name=context,\n", - " input_rnaseq_filepath=polya_matrix_filepath[i],\n", - " input_gene_info_filepath=gene_info_filepath[i],\n", - " output_rnaseq_filepath=polya_matrix_filepath[i],\n", + " input_rnaseq_filepath=mrna_matrix_filepath[context],\n", + " input_gene_info_filepath=gene_info_filepath[context],\n", + " output_rnaseq_filepath=mrna_matrix_filepath[context],\n", " prep=RNAPrepMethod.MRNA,\n", " taxon=taxon_id,\n", - " input_metadata_filepath=Path(\"./data/config_sheets/mrna_config.xlsx\"),\n", + " input_metadata_filepath=mrna_metadata_filepath,\n", " replicate_ratio=replicate_ratio,\n", " high_replicate_ratio=high_confidence_replicate_ratio,\n", " batch_ratio=batch_ratio,\n", @@ -541,10 +475,10 @@ ] }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "cell_type": "code", "outputs": [], + "execution_count": null, "source": [ "from como.rnaseq_gen import FilteringTechnique, rnaseq_gen\n", "\n", @@ -555,12 +489,14 @@ "technique = FilteringTechnique.umi\n", "cutoff = -3\n", "\n", - "for i, context in enumerate(context_names):\n", + "for context in context_names:\n", + " if context not in scrna_matrix_filepath:\n", + " continue\n", " await rnaseq_gen(\n", " context_name=context,\n", - " input_rnaseq_filepath=scrna_matrix_filepath[i],\n", - " input_gene_info_filepath=gene_info_filepath[i],\n", - " output_rnaseq_filepath=scrna_matrix_filepath[i],\n", + " input_rnaseq_filepath=scrna_matrix_filepath[context],\n", + " input_gene_info_filepath=gene_info_filepath[context],\n", + " output_rnaseq_filepath=scrna_matrix_filepath[context],\n", " prep=RNAPrepMethod.SCRNA,\n", " taxon=taxon_id,\n", " input_metadata_filepath=Path(\"./data/config_sheets/scrna_config.xlsx\"),\n", @@ -590,38 +526,34 @@ ] }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "cell_type": "code", "outputs": [], + "execution_count": null, "source": [ - "proteomics_config_file = \"proteomics_data_inputs_paper.xlsx\"\n", - "rep_ratio = 0.75\n", - "batch_ratio = 0.75\n", - "high_rep_ratio = 1.0\n", - "high_batch_ratio = 1.0\n", - "quantile = 25\n", - "\n", - "# fmt: off\n", - "cmd = \" \".join(\n", - " [\n", - " \"python3\", \"como/proteomics_gen.py\",\n", - " \"--config-file\", proteomics_config_file,\n", - " \"--replicate-ratio\", str(rep_ratio),\n", - " \"--high-replicate-ratio\", str(high_rep_ratio),\n", - " \"--batch-ratio\", str(batch_ratio),\n", - " \"--high-batch-ratio\", str(high_batch_ratio),\n", - " \"--quantile\", str(quantile),\n", - " ]\n", - ")\n", - "# fmt: on\n", - "\n", - "!{cmd}" + "from como.proteomics_gen import proteomics_gen\n", + "\n", + "for context in context_names:\n", + " await proteomics_gen(\n", + " context_name=context_names,\n", + " config_filepath=proteomics_metadata_filepath,\n", + " matrix_filepath=proteomics_matrix_filepath[context],\n", + " output_boolean_filepath=Path(f\"data/results/{context}/proteomics/{context}_proteomics_boolean_matrix.csv\"),\n", + " output_gaussian_png_filepath=Path(f\"data/results/{context}/proteomics/{context}_proteomics_gaussian.png\"),\n", + " output_gaussian_html_filepath=Path(f\"data/results/{context}/proteomics/{context}_proteomics_gaussian.html\"),\n", + " output_z_score_matrix_filepath=Path(f\"data/results/{context}/proteomics/{context}_zscore_matrix.csv\"),\n", + " input_entrez_map=Path(f\"data/results/{context}/proteomics/{context}_entrez_map.csv\"),\n", + " replicate_ratio=0.5,\n", + " batch_ratio=0.5,\n", + " high_confidence_replicate_ratio=0.7,\n", + " high_confidence_batch_ratio=0.7,\n", + " quantile=25,\n", + " )" ] }, { - "cell_type": "markdown", "metadata": {}, + "cell_type": "markdown", "source": [ "# Cluster Sample Data (Optional)\n", "This step is used to cluster the samples based on their expression values. This can be used to determine which samples are more similar to each other. In a perfect world, one cluster would be created for each context type used. This is done using the `como/cluster_rnaseq.py` script.\n", @@ -645,10 +577,10 @@ ] }, { - "cell_type": "code", - "execution_count": null, "metadata": {}, + "cell_type": "code", "outputs": [], + "execution_count": null, "source": [ "filt_technique = \"zfpkm\"\n", "cluster_algorithm = \"umap\"\n", diff --git a/main/como/proteomics_gen.py b/main/como/proteomics_gen.py index 9a53304e..d020d71d 100644 --- a/main/como/proteomics_gen.py +++ b/main/como/proteomics_gen.py @@ -47,7 +47,8 @@ async def load_gene_symbol_map(gene_symbols: list[str], entrez_map: Path | None def abundance_to_bool_group( context_name, abundance_filepath: Path, - output_gaussian_img_filepath: Path, + output_gaussian_png_filepath: Path, + output_gaussian_html_filepath: Path, output_z_score_matrix_filepath: Path, abundance_matrix: pd.DataFrame, replicate_ratio: float, @@ -59,7 +60,8 @@ def abundance_to_bool_group( abundance_matrix.to_csv(abundance_filepath, index_label="entrez_gene_id") protein_transform_main( abundance_df=abundance_matrix, - output_gaussian_img_filepath=output_gaussian_img_filepath, + output_gaussian_png_filepath=output_gaussian_png_filepath, + output_gaussian_html_filepath=output_gaussian_html_filepath, output_z_score_matrix_filepath=output_z_score_matrix_filepath, ) @@ -147,13 +149,14 @@ async def proteomics_gen( config_filepath: Path, matrix_filepath: Path, output_boolean_filepath: Path, - output_gaussian_img_filepath: Path, output_z_score_matrix_filepath: Path, + output_gaussian_png_filepath: Path | None = None, + output_gaussian_html_filepath: Path | None = None, input_entrez_map: Path | None = None, replicate_ratio: float = 0.5, batch_ratio: float = 0.5, high_confidence_replicate_ratio: float = 0.7, - high_confience_batch_ratio: float = 0.7, + high_confidence_batch_ratio: float = 0.7, quantile: int = 25, ): """Generate proteomics data.""" @@ -203,12 +206,13 @@ async def proteomics_gen( high_confidence_replicate_ratio=high_confidence_replicate_ratio, quantile=quantile, output_boolean_filepath=output_boolean_filepath, - output_gaussian_img_filepath=output_gaussian_img_filepath, + output_gaussian_png_filepath=output_gaussian_png_filepath, + output_gaussian_html_filepath=output_gaussian_html_filepath, output_z_score_matrix_filepath=output_z_score_matrix_filepath, ) to_bool_context( context_name=context_name, group_ratio=batch_ratio, - hi_group_ratio=high_confience_batch_ratio, + hi_group_ratio=high_confidence_batch_ratio, group_names=groups, ) diff --git a/main/como/proteomics_preprocessing.py b/main/como/proteomics_preprocessing.py index 2fee5290..1329406d 100644 --- a/main/como/proteomics_preprocessing.py +++ b/main/como/proteomics_preprocessing.py @@ -131,7 +131,8 @@ def plot_gaussian_fit(z_results: ZResult, facet_titles: bool = True, x_min: int # Main function for protein abundance transformation def protein_transform_main( abundance_df: pd.DataFrame | str | Path, - output_gaussian_img_filepath: Path, + output_gaussian_png_filepath: Path, + output_gaussian_html_filepath: Path, output_z_score_matrix_filepath: Path, ) -> None: """Transform protein abundance data.""" @@ -142,9 +143,13 @@ def protein_transform_main( z_transform: ZResult = z_score_calc(abundance_df, min_thresh=0) fig = plot_gaussian_fit(z_results=z_transform, facet_titles=True, x_min=-4) - fig.write_image(output_gaussian_img_filepath.with_suffix(".png")) - fig.write_html(output_gaussian_img_filepath.with_suffix(".html")) - logger.info(f"Gaussian fit figure written to {output_gaussian_img_filepath}") + + if output_gaussian_png_filepath: + fig.write_image(output_gaussian_png_filepath.with_suffix(".png")) + logger.info(f"PNG gaussian figure written to {output_gaussian_png_filepath}") + if output_gaussian_html_filepath: + fig.write_html(output_gaussian_html_filepath.with_suffix(".html")) + logger.info(f"Interactive HTML gaussian figure written to {output_gaussian_png_filepath}") z_transformed_abundances = z_transform.zfpkm z_transformed_abundances[abundance_df == 0] = -4 From 4c381ca5def51ba8310d1441eadbafe9c84fb40e Mon Sep 17 00:00:00 2001 From: Josh Loecker Date: Wed, 11 Dec 2024 16:03:31 -0600 Subject: [PATCH 91/91] fix: no longer require rna processing type --- main/como/rnaseq_preprocess.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/main/como/rnaseq_preprocess.py b/main/como/rnaseq_preprocess.py index 02702b38..83eb504f 100644 --- a/main/como/rnaseq_preprocess.py +++ b/main/como/rnaseq_preprocess.py @@ -578,7 +578,6 @@ async def rnaseq_preprocess( output_gene_info_filepath: Path, como_context_dir: Path | None = None, input_matrix_filepath: Path | list[Path] | None = None, - preparation_method: RNAPrepMethod | list[RNAPrepMethod] | None = None, output_trna_config_filepath: Path | None = None, output_mrna_config_filepath: Path | None = None, output_trna_count_matrix_filepath: Path | None = None, @@ -601,8 +600,7 @@ async def rnaseq_preprocess( :param output_mrna_count_matrix_filepath: The path to write messenger RNA count matrices :param como_context_dir: If in "create" mode, the input path(s) to the COMO_input directory of the current context i.e., the directory containing "fragmentSizes", "geneCounts", "insertSizeMetrics", etc. directories - :param input_matrix_filepath: If in "provide" mode, the path(s) to the count matrices to be processed - :param preparation_method: The preparation method + :param input_matrix_filepath: If in "provide" mode, the path(s) to the count matrices to be processed~ :param cache: Should HTTP requests be cached :param log_level: The logging level :param log_location: The logging location @@ -636,13 +634,6 @@ async def rnaseq_preprocess( ) input_matrix_filepath = _listify(input_matrix_filepath) - preparation_method = _listify(preparation_method) - - if len(input_matrix_filepath) != len(preparation_method): - raise ValueError( - "input_matrix_filepath (--input-matrix-filepath) and " - "preparation_method (--preparation-method) must be the same length." - ) await _process( context_name=context_name, taxon=taxon,