-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcreate_uhc_dendrogram_heatmap.py
58 lines (50 loc) · 1.8 KB
/
create_uhc_dendrogram_heatmap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from cospec import plot_uhc_heatmap
from cospec import plot_uhc_dendrogram
from figures import spectrum_map
import collections
from scipy.cluster.hierarchy import dendrogram
from fastcluster import linkage
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import scipy.cluster.hierarchy as hac
import scipy.stats as sc
import glob
import csv
import argparse
#COSMIC_csv_reader.py
#Version 1.0
#Author: James Gatter, jggatter [at] mit.edu
#Author of cospec.py: linakim [at] mit.edu
#July 26th, 2018
SUBSTITUTION = 0
CONTEXT = 1
NORMALIZED_PROPORTION = 3
parser = argparse.ArgumentParser(description="""See the Juptyer Notebook for help""",
epilog="James was here")
parser.add_argument("-i", "--input",
default="../../muts_csv/",
help="The path to the directory containing select contexted sample .csv's. Make sure it ends with a '/'")
args = parser.parse_args()
cluster_names = []
spec_list = collections.OrderedDict()
for file in glob.glob(args.input+"*.csv"):
filename = file.replace(args.input, "")
cluster_names.append(filename.replace("_contexted", "").replace('.csv',''))
file_dictionary = {}
with open(file) as csvfile:
reader = csv.reader(csvfile)
for row in reader:
if row[SUBSTITUTION] == "Substitution": continue
subcon = "(" + str(row[SUBSTITUTION]) + " ," + str(row[CONTEXT]) + ")"
file_dictionary[subcon] = float(row[NORMALIZED_PROPORTION])
spec_list[file.replace("_contexted", "").replace(".csv","")] = file_dictionary
print("Plotting dendrogram...")
plot_uhc_dendrogram(spec_list, cluster_names)
print("Plotting heatmap. If this takes a while you probably are comparing too many samples.")
plot_uhc_heatmap(spec_list, cluster_names)
print("DONE")