-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscanpy.py
executable file
·59 lines (44 loc) · 1.84 KB
/
scanpy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 25 19:57:32 2020
@author: Arsh
"""
import numpy as np
import pandas as pd
import scanpy as sc
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
sc.settings.set_figure_params(dpi=80)
adata= sc.read_csv('combinedtpm.csv').T
data=pd.read_csv("row_data.csv", index_col=0)
adata.var['gene_ids']= np.asanyarray(data[['Description.x']])
adata.var_names_make_unique() # this is unnecessary if using `var_names='gene_ids'` in `sc.read_10x_mtx`
sc.pl.highest_expr_genes(adata, n_top=20, )
### Filetr the samples
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=3)
mito_genes = adata.var_names.str.startswith('MT-')
# for each cell compute fraction of counts in mito genes vs. all genes
# the `.A1` is only necessary as X is sparse (to transform to a dense array after summing)
adata.obs['percent_mito'] = np.sum(
adata[:, mito_genes].X, axis=1) / np.sum(adata.X, axis=1)
# add the total counts per cell as observations-annotation to adata
adata.obs['n_counts'] = adata.X.sum(axis=1)
###
sc.pl.violin(adata, ['n_genes', 'n_counts', 'percent_mito'],
jitter=0.4, multi_panel=True)
sc.pl.scatter(adata, x='n_counts', y='percent_mito')
sc.pl.scatter(adata, x='n_counts', y='n_genes')
###3 filetring
adata = adata[adata.obs.n_genes < 2500, :]
adata = adata[adata.obs.percent_mito < 0.05, :]
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
sc.pl.highly_variable_genes(adata)
adata = adata[:, adata.var.highly_variable]
sc.pp.regress_out(adata, ['n_counts', 'percent_mito'])
sc.pp.scale(adata, max_value=10)
#### PCA ####
sc.tl.pca(adata, svd_solver='arpack')
sc.pl.pca(adata, color=adata.var['gene_ids']=="LMOD1")