-
Notifications
You must be signed in to change notification settings - Fork 47
/
Copy pathplotting.py
executable file
·109 lines (91 loc) · 2.94 KB
/
plotting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env python3
# coding: utf-8
import sys
import matplotlib
matplotlib.use("Agg")
import pylab as plot
import numpy as np
from matplotlib import font_manager
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
import configparser
import config_path
config = configparser.RawConfigParser()
config.read(config_path.CONFIG)
root = config.get("Files and directories", "root")
path = config.get("Files and directories", "font")
font = font_manager.FontProperties(fname=path)
def singularplot(word, modelname, vector, fname):
xlocations = np.array(list(range(len(vector))))
plot.clf()
plot.bar(xlocations, vector)
plot_title = word.split("_")[0].replace("::", " ") + "\n" + modelname + " model"
plot.title(plot_title, fontproperties=font)
plot.xlabel("Vector components")
plot.ylabel("Components values")
plot.savefig(
root + "data/images/singleplots/" + modelname + "_" + fname + ".png",
dpi=150,
bbox_inches="tight",
)
plot.close()
plot.clf()
def embed(words, matrix, classes, usermodel, fname, method="tsne"):
perplexity = int(
len(words) ** 0.5
) # We set perplexity to a square root of the words number
if method == "pca":
embedding = PCA(n_components=2, random_state=0)
else:
embedding = TSNE(
n_components=2,
perplexity=perplexity,
metric="cosine",
max_iter=500,
init="pca",
)
y = embedding.fit_transform(matrix)
print("2-d embedding finished", file=sys.stderr)
class_set = [c for c in set(classes)]
colors = plot.cm.rainbow(np.linspace(0, 1, len(class_set)))
class2color = [colors[class_set.index(w)] for w in classes]
xpositions = y[:, 0]
ypositions = y[:, 1]
seen = set()
plot.clf()
for color, word, class_label, x, y in zip(
class2color, words, classes, xpositions, ypositions
):
plot.scatter(
x,
y,
20,
marker=".",
color=color,
label=class_label if class_label not in seen else "",
)
seen.add(class_label)
lemma = word.split("_")[0].replace("::", " ")
mid = len(lemma) / 2
if method == "pca":
mid *= 0.01
else:
mid *= 4 # TODO Should really think about how to adapt this variable to the real plot size
plot.annotate(
lemma,
xy=(x - mid, y),
size="x-large",
weight="bold",
fontproperties=font,
color=color,
)
plot.tick_params(axis="x", which="both", bottom=False, top=False, labelbottom=False)
plot.tick_params(axis="y", which="both", left=False, right=False, labelleft=False)
plot.legend(loc="best")
plot.savefig(
f"{root}data/images/tsneplots/{usermodel}_{fname}_{method}.png",
dpi=150,
bbox_inches="tight",
)
plot.close()
plot.clf()