Skip to content

Commit

Permalink
Merge branch 'gtf_fix'
Browse files Browse the repository at this point in the history
  • Loading branch information
HAL9032 committed Dec 12, 2023
2 parents 070a946 + a2daf62 commit 0e845ef
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 9 deletions.
12 changes: 10 additions & 2 deletions episcanpy/count_matrix/_bld_atac_mtx.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,8 +394,16 @@ def gene_activity_mtx(fragments_file,
features = features[features.source == source]

features["gene_id"] = [attr.replace("gene_id", "").strip().strip("\"") for feature_attr in features.attribute for attr in feature_attr.split(";") if attr.strip().startswith("gene_id")]
features["gene_name"] = [attr.replace("gene_name", "").strip().strip("\"") for feature_attr in features.attribute for attr in feature_attr.split(";") if attr.strip().startswith("gene_name")]
features["gene_type"] = [attr.replace("gene_type", "").strip().strip("\"") for feature_attr in features.attribute for attr in feature_attr.split(";") if attr.strip().startswith("gene_type")]

tmp = [attr.replace("gene_name", "").strip().strip("\"") for feature_attr in features.attribute for attr in feature_attr.split(";") if attr.strip().startswith("gene_name ")]
if not tmp:
tmp = [attr.replace("gene", "").strip().strip("\"") for feature_attr in features.attribute for attr in feature_attr.split(";") if attr.strip().startswith("gene ")]
features["gene_name"] = tmp

tmp = [attr.replace("gene_type", "").strip().strip("\"") for feature_attr in features.attribute for attr in feature_attr.split(";") if attr.strip().startswith("gene_type ")]
if not tmp:
tmp = [attr.replace("gene_biotype", "").strip().strip("\"") for feature_attr in features.attribute for attr in feature_attr.split(";") if attr.strip().startswith("gene_biotype ")]
features["gene_type"] = tmp

if gene_type:
features = features[[feature in gene_type for feature in features.gene_type]]
Expand Down
12 changes: 6 additions & 6 deletions episcanpy/preprocessing/_quality_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -851,24 +851,24 @@ def set_filter(adata,
df = adata.obs if in_obs else adata.var

if "passes_filter" in df:
if min_threshold and max_threshold:
if min_threshold is not None and max_threshold is not None:
tmp = np.logical_and(df[key] >= min_threshold, df[key] <= max_threshold)
df["passes_filter"] = [False if not passed else val for passed, val in zip(tmp, df["passes_filter"])]
elif min_threshold:
elif min_threshold is not None:
tmp = df[key] >= min_threshold
df["passes_filter"] = [False if not passed else val for passed, val in zip(tmp, df["passes_filter"])]
elif max_threshold:
elif max_threshold is not None:
tmp = df[key] <= max_threshold
df["passes_filter"] = [False if not passed else val for passed, val in zip(tmp, df["passes_filter"])]

else:
if min_threshold and max_threshold:
if min_threshold is not None and max_threshold is not None:
tmp = np.logical_and(df[key] >= min_threshold, df[key] <= max_threshold)
df["passes_filter"] = tmp
elif min_threshold:
elif min_threshold is not None:
tmp = df[key] >= min_threshold
df["passes_filter"] = tmp
elif max_threshold:
elif max_threshold is not None:
tmp = df[key] <= max_threshold
df["passes_filter"] = tmp

Expand Down
4 changes: 3 additions & 1 deletion episcanpy/preprocessing/_tss_enrichment.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def get_tss(gtf,
def tss_enrichment(adata,
fragments,
gtf,
source="HAVANA",
n=5000,
score="avg_score_of_center_region",
distance_to_tss=1000,
Expand All @@ -59,6 +60,7 @@ def tss_enrichment(adata,
adata: AnnData
fragments: path to fragments file
gtf: path to GTF file
source: source of the feature
n: number of TSS to use for calculation
score: value that is used as TSS enrichment score for individual observations
distance_to_tss: distance to TSS
Expand All @@ -69,7 +71,7 @@ def tss_enrichment(adata,
None
"""

features = get_tss(gtf, source="HAVANA", feature="gene", protein_coding_only=True)
features = get_tss(gtf, source=source, feature="gene", protein_coding_only=True)

features["start"] = features.tss_pos - distance_to_tss
features["stop"] = features.tss_pos + distance_to_tss
Expand Down

0 comments on commit 0e845ef

Please sign in to comment.