Skip to content

Commit

Permalink
add docs
Browse files Browse the repository at this point in the history
  • Loading branch information
ToryDeng committed Dec 22, 2023
1 parent 6cc3f1b commit 0c8c000
Show file tree
Hide file tree
Showing 59 changed files with 3,601 additions and 231 deletions.
165 changes: 161 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,163 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

/scGeneClust/.idea/
/data/.ipynb_checkpoints/
/test.py
/LEGEND/.idea/
# Distribution / packaging
.Python
build/
develop-eggs/
# dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/


data/*
2 changes: 1 addition & 1 deletion LEGEND/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
# @File : __init__.py
# @Software: PyCharm
from ._model import GeneClust, integrate
from ._utils import load_PBMC3k, load_simulated_data, load_mouse_brain
from ._utils import load_PBMC3k, load_simulated_data, load_mouse_brain, load_mouse_cortex
145 changes: 95 additions & 50 deletions LEGEND/_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# @File : _model.py
# @Software: PyCharm
import os
from typing import Literal, Optional, Union, Tuple
from typing import Literal, Optional, Tuple, Union

import anndata as ad
import numpy as np
Expand All @@ -13,28 +13,29 @@

import LEGEND.pp as pp
import LEGEND.tl as tl

from ._utils import set_logger
from ._validation import check_args, check_all_genes_selected
from ._validation import check_all_genes_selected, check_args


def GeneClust(
adata: ad.AnnData,
image: np.ndarray = None,
n_var_clusters: int = None,
n_obs_clusters: int = None,
n_components: int = 10,
relevant_gene_pct: int = 20,
post_hoc_filtering: bool = True,
version: Literal['fast', 'ps'] = 'fast',
modality: Literal['sc', 'st'] = 'sc',
shape: Literal['hexagon', 'square'] = 'hexagon',
alpha: float = 0.3,
return_info: bool = False,
subset: bool = False,
max_workers: int = os.cpu_count() - 1,
log_path: Optional[Union[os.PathLike, str]] = None,
verbosity: Literal[0, 1, 2] = 1,
random_state: int = 0
adata: ad.AnnData,
image: np.ndarray = None,
n_var_clusters: int = None,
n_obs_clusters: int = None,
n_components: int = 10,
relevant_gene_pct: int = 20,
post_hoc_filtering: bool = True,
version: Literal["fast", "ps"] = "fast",
modality: Literal["sc", "st"] = "sc",
shape: Literal["hexagon", "square"] = "hexagon",
alpha: float = 0.3,
return_info: bool = False,
subset: bool = False,
max_workers: int = os.cpu_count() - 1,
log_path: Optional[Union[os.PathLike, str]] = None,
verbosity: Literal[0, 1, 2] = 1,
random_state: int = 0,
) -> Optional[Union[Tuple[ad.AnnData, np.ndarray], np.ndarray]]:
"""
This function is the common interface for *GeneClust-fast* and *GeneClust-ps*.
Expand All @@ -51,7 +52,7 @@ def GeneClust(
n_var_clusters : int
The number of clusters in gene clustering. Only valid in GeneClust-fast.
n_obs_clusters : int
The number of clusters in cell clustering used to find high-confidence cells. Only valid in GeneClust-ps.
The number of clusters in cell/spots clustering used to find high-confidence cells/spots. Only valid in GeneClust-ps.
n_components : int, default=10
The number of principal components used along with the first component. Only valid in GeneClust-ps.
relevant_gene_pct: int, default=20
Expand Down Expand Up @@ -102,6 +103,7 @@ def GeneClust(
Genes relevance values are in `copied_adata.var['relevance']`. Irrelevant genes are filtered.
Gene redundancy values are in `copied_adata.varp['redundancy']`.
MST of relevant genes is in `copied_adata.uns['MST']`.
Gene outlier scores are in `copied_adata.var['outlier_score']`.
Representative genes are indicated by `copied_adata.var['representative']`.
selected_genes : ndarray
Names of selected genes.
Expand All @@ -120,8 +122,21 @@ def GeneClust(

# check arguments
do_norm = check_args(
adata, image, version, n_var_clusters, n_obs_clusters, n_components, relevant_gene_pct, post_hoc_filtering,
modality, shape, alpha, return_info, subset, max_workers, random_state
adata,
image,
version,
n_var_clusters,
n_obs_clusters,
n_components,
relevant_gene_pct,
post_hoc_filtering,
modality,
shape,
alpha,
return_info,
subset,
max_workers,
random_state,
)

# feature selection starts
Expand All @@ -139,16 +154,30 @@ def GeneClust(
pp.reduce_dim(copied_adata, version, random_state)
# gene clustering
tl.cluster_genes(
copied_adata, image, version, modality, shape, alpha, n_var_clusters, n_obs_clusters, n_components,
relevant_gene_pct, max_workers, random_state
copied_adata,
image,
version,
modality,
shape,
alpha,
n_var_clusters,
n_obs_clusters,
n_components,
relevant_gene_pct,
max_workers,
random_state,
)
# select features from gene clusters
selected_genes = tl.select_from_clusters(copied_adata, version, modality, 20, post_hoc_filtering, random_state)
selected_genes = tl.select_from_clusters(
copied_adata, version, modality, 20, post_hoc_filtering, random_state
)
check_all_genes_selected(copied_adata, selected_genes)

if subset:
adata._inplace_subset_var(selected_genes)
logger.opt(colors=True).info(f"<magenta>GeneClust-{version}</magenta> finished.")
logger.opt(colors=True).info(
f"<magenta>GeneClust-{version}</magenta> finished."
)
return None

logger.opt(colors=True).info(f"<magenta>GeneClust-{version}</magenta> finished.")
Expand All @@ -159,16 +188,16 @@ def GeneClust(


def integrate(
adata_rna: ad.AnnData,
adata_st: ad.AnnData,
rna_weight: float = 0.5,
rel_pct: int = 20,
post_hoc_filtering: bool = True,
return_info: bool = False,
max_workers: int = os.cpu_count() - 1,
log_path: Optional[Union[os.PathLike, str]] = None,
verbosity: Literal[0, 1, 2] = 1,
random_state: int = 0
adata_rna: ad.AnnData,
adata_st: ad.AnnData,
rna_weight: float = 0.5,
rel_pct: int = 20,
post_hoc_filtering: bool = True,
return_info: bool = False,
max_workers: int = os.cpu_count() - 1,
log_path: Optional[Union[os.PathLike, str]] = None,
verbosity: Literal[0, 1, 2] = 1,
random_state: int = 0,
):
"""
Integrate information from multimodal data to identify co-expressed genes.
Expand Down Expand Up @@ -220,23 +249,39 @@ def integrate(
pseudo_adata = ad.AnnData(np.zeros((1, common_genes.shape[0])), dtype=float)
pseudo_adata.var_names = common_genes

comb_redundancy = rna_weight * adata_rna.varp['redundancy'] + (1 - rna_weight) * adata_st.varp['redundancy']
comb_relevance = rna_weight * adata_rna.var['relevance'] + (1 - rna_weight) * adata_st.var['relevance']
comb_redundancy = (
rna_weight * adata_rna.varp["redundancy"]
+ (1 - rna_weight) * adata_st.varp["redundancy"]
)
comb_relevance = (
rna_weight * adata_rna.var["relevance"]
+ (1 - rna_weight) * adata_st.var["relevance"]
)
comb_MST = tl.information.build_MST(-comb_redundancy)
adata_st.uns['MST'], adata_rna.uns['MST'] = comb_MST, comb_MST
logger.opt(colors=True).info(f"Start to compute complementarity on <magenta>SRT</magenta> data...")
st_complm = tl.information.compute_gene_complementarity(adata_st, max_workers, random_state)
logger.opt(colors=True).info(f"Start to compute complementarity on <magenta>scRNA-seq</magenta> data...")
rna_complm = tl.information.compute_gene_complementarity(adata_rna, max_workers, random_state)
comb_MST.es['complm'] = rna_weight * st_complm + (1 - rna_weight) * rna_complm

pseudo_adata.uns['MST'] = comb_MST
pseudo_adata.var['relevance'] = comb_relevance
pseudo_adata.var['relevance_rna'] = adata_rna.var['relevance']
pseudo_adata.var['relevance_st'] = adata_st.var['relevance']
adata_st.uns["MST"], adata_rna.uns["MST"] = comb_MST, comb_MST
logger.opt(colors=True).info(
f"Start to compute complementarity on <magenta>SRT</magenta> data..."
)
st_complm = tl.information.compute_gene_complementarity(
adata_st, max_workers, random_state
)
logger.opt(colors=True).info(
f"Start to compute complementarity on <magenta>scRNA-seq</magenta> data..."
)
rna_complm = tl.information.compute_gene_complementarity(
adata_rna, max_workers, random_state
)
comb_MST.es["complm"] = rna_weight * st_complm + (1 - rna_weight) * rna_complm

pseudo_adata.uns["MST"] = comb_MST
pseudo_adata.var["relevance"] = comb_relevance
pseudo_adata.var["relevance_rna"] = adata_rna.var["relevance"]
pseudo_adata.var["relevance_st"] = adata_st.var["relevance"]

tl.cluster.generate_gene_clusters(pseudo_adata)
selected_genes = tl.select_from_clusters(pseudo_adata, 'ps', 'st', rel_pct, post_hoc_filtering, random_state)
selected_genes = tl.select_from_clusters(
pseudo_adata, "ps", "st", rel_pct, post_hoc_filtering, random_state
)
check_all_genes_selected(pseudo_adata, selected_genes)

if return_info:
Expand Down
Loading

0 comments on commit 0c8c000

Please sign in to comment.