-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgene_interaction_pathway.py
52 lines (51 loc) · 1.77 KB
/
gene_interaction_pathway.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# pathway interactions
import pandas as pd
import numpy as np
import os
df = pd.read_csv('GSE18123GPL570.csv')
feats = list(df.columns)
feats.remove('condition')
feat_idx = {}
for i in range(len(feats)):
feat_idx[feats[i]] = i
dirin = 'mapping/'
methods = [file.replace('.csv','') for file in os.listdir(dirin) if file.endswith(".csv")]
for method in methods:
print(method)
connectivity = np.zeros((len(feats),len(feats)))
for i in range(len(feats)):
connectivity[i][i] = 1
df = pd.read_csv(dirin + method + '.csv')
cols = list(df.columns)
gene_pathway = dict(zip(df[cols[0]],df[cols[1]]))
pathway_gene = {}
for gene in gene_pathway:
pathways = gene_pathway[gene].split('|')
for pathway in pathways:
pathway = pathway.strip()
if pathway:
try:
pathway_gene[pathway] += [ gene ]
except:
pathway_gene[pathway] = [ gene ]
for e in pathway_gene:
genes = list(set(pathway_gene[e]))
if len(genes)>1:
for i in range(len(genes)-1):
i_idx = -1
try:
i_idx = feat_idx[genes[i]]
except:
pass
if i_idx>=0:
for j in range(i+1, len(genes)):
j_idx = -1
try:
j_idx = feat_idx[genes[j]]
except:
pass
if j_idx>=0:
connectivity[i_idx][j_idx] = 1
connectivity[j_idx][i_idx] = 1
bf = pd.DataFrame(connectivity)
bf.to_csv('gene_interaction/' + method.replace('hgncTo','') + '.csv',index=None,header=None)