Skip to content

Commit

Permalink
Add gene threshold and add input for plotting cond factors
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrew Ramirez committed Aug 15, 2024
1 parent a1743ef commit 772d1b7
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 27 deletions.
14 changes: 7 additions & 7 deletions pf2rnaseq/figures/commonFuncs/plotFactors.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,21 @@
def plot_condition_factors(
data: AnnData,
ax: Axes,
condition_name: str,
cond_group_labels: Optional[pd.Series] = None,
ThomsonNorm=False,
groupConditions=False,
):
"""Plots Pf2 condition factors"""
pd.set_option("display.max_rows", None)
yt = pd.Series(np.unique(data.obs["time"]))
yt = pd.Series(np.unique(data.obs[condition_name]))
X = np.array(data.uns["Pf2_A"])

X = np.log10(X)
if ThomsonNorm is True:
controls = yt.str.contains("CTRL")
X = X[controls]
# X = np.log10(X)
XX = X

X -= np.median(X, axis=0)
X /= np.std(X, axis=0)
X -= np.median(XX, axis=0)
X /= np.std(XX, axis=0)

ind = reorder_table(X)
X = X[ind]
Expand Down Expand Up @@ -172,6 +171,7 @@ def reorder_table(projs: np.ndarray) -> np.ndarray:
return sch.leaves_list(Z)



def bot_top_genes(X, cmp, geneAmount=5):
"""Saves most pos/negatively genes"""
df = pd.DataFrame(
Expand Down
14 changes: 6 additions & 8 deletions pf2rnaseq/figures/figureCA1.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
)
from pf2rnaseq.figures.commonFuncs.plotPaCMAP import plot_labels_pacmap
import numpy as np
from pf2rnaseq.imports import import_CA, prepare_dataset
from pf2rnaseq.imports import import_CA, prepare_dataset_bulk

from pf2rnaseq.factorization import pf2

Expand All @@ -30,18 +30,16 @@ def makeFigure():

#Import the CA data as a DataFrame and put into an Anndata object
CA_ad = import_CA()
X = prepare_dataset(CA_ad, "sample_id")
X = prepare_dataset_bulk(CA_ad, "sample_id", geneThreshold=0.001)

pf2(X, rank=10)
X = pf2(X, rank=10)

plot_condition_factors(X, ax[0])
plot_condition_factors(X, ax[0], condition_name="sample_id")
plot_eigenstate_factors(X, ax[1])
plot_gene_factors(X, ax[2])
plot_factor_weight(X, ax[3])

plot_labels_pacmap(X, "Condition", ax[4])
# plot_labels_pacmap(X, "time", ax[4])


return f

makeFigure()
return f
46 changes: 34 additions & 12 deletions pf2rnaseq/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,27 +22,49 @@ def import_CA():

return CA_ad


def prepare_dataset_bulk(
X: ad.AnnData, condition_name: str,
geneThreshold: float
) -> ad.AnnData:
assert isinstance(X.X, spmatrix)
assert np.amin(X.X.data) >= 0.0 # type: ignore

# Filter out genes with too few reads
readmean, _ = mean_variance_axis(X.X, axis=0) # type: ignore
X = X[:, readmean > geneThreshold]

# # Get the indices for subsetting the data
_, sgIndex = np.unique(X.obs_vector(condition_name), return_inverse=True)
X.obs["condition_unique_idxs"] = sgIndex

# Pre-calculate gene means
means, _ = mean_variance_axis(X.X, axis=0) # type: ignore
X.var["means"] = means

return X

def prepare_dataset(
X: ad.AnnData, condition_name: str,
# geneThreshold: float
geneThreshold: float
) -> ad.AnnData:
assert isinstance(X.X, spmatrix)
assert np.amin(X.X.data) >= 0.0 # type: ignore

# # Filter out genes with too few reads
# readmean, _ = mean_variance_axis(X.X, axis=0) # type: ignore
# X = X[:, readmean > geneThreshold]
# Filter out genes with too few reads
readmean, _ = mean_variance_axis(X.X, axis=0) # type: ignore
X = X[:, readmean > geneThreshold]

# # Normalize read depth
# sc.pp.normalize_total(X, exclude_highly_expressed=False, inplace=True)
# Normalize read depth
sc.pp.normalize_total(X, exclude_highly_expressed=False, inplace=True)

# # Scale genes by sum
# readmean, _ = mean_variance_axis(X.X, axis=0) # type: ignore
# readsum = X.shape[0] * readmean
# inplace_column_scale(X.X, 1.0 / readsum)
# Scale genes by sum
readmean, _ = mean_variance_axis(X.X, axis=0) # type: ignore
readsum = X.shape[0] * readmean
inplace_column_scale(X.X, 1.0 / readsum)

# # Transform values
# X.X.data = np.log10((1000.0 * X.X.data) + 1.0) # type: ignore
# Transform values
X.X.data = np.log10((1000.0 * X.X.data) + 1.0) # type: ignore

# # Get the indices for subsetting the data
_, sgIndex = np.unique(X.obs_vector(condition_name), return_inverse=True)
Expand Down

0 comments on commit 772d1b7

Please sign in to comment.