-
Notifications
You must be signed in to change notification settings - Fork 37
/
Copy pathrender.py
172 lines (142 loc) · 5.72 KB
/
render.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/usr/local/bin/python
# encoding=utf8
import sys
import json
from jinja2 import Environment, FileSystemLoader
from collections import defaultdict
import re
import markdown
import logging
import subprocess
import base64
import hashlib
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
md = markdown.Markdown(extensions=['meta', 'footnotes'])
packages = {
"pandas": "Pandas",
"matplotlib": "Matplotlib",
"seaborn": "Seaborn",
"plotnine": "plotnine",
"plotly": "plotly",
"altair": "Altair",
"ggplot": "ggplot2 (R)",
}
names = {
"bar-counts": "Bar Chart",
"simple-histogram": "Histogram",
"scatter-plot": "Scatter Plot",
"timeseries": "Time Series",
"scatter-plot-with-colors": "Scatter Plot with Faceted with Color",
"scatter-plot-with-size": "Scatter Plot with Points Sized by Continuous Value",
"scatter-plot-with-facet": "Scatter Plot Faceted on One Variable",
"scatter-plot-with-facets": "Scatter Plot Faceted on Two Variables",
"scatter-with-regression": "Scatter Plot and Regression Line with 95% Confidence Interval Layered",
"stacked-smooth-line-and-scatter": "Smoothed Line Plot and Scatter Plot Layered",
"stacked-bar-chart": "Stacked Bar Chart",
"dodged-bar-chart": "Dodged Bar Chart",
"stacked-kde": "Stacked KDE Plot",
}
with open("INTRO.md", "r") as f:
intro = f.read()
def image_from_cell(cell):
try:
for c in cell['outputs']:
if 'data' in c and 'image/png' in c['data']:
base64_img = c['data']['image/png'].replace("\n", "").strip()
filename = hashlib.md5()
filename.update(base64_img.encode('ascii'))
web_path = "/img/plots/{}.png".format(filename.hexdigest())
full_path = "web" + web_path
with open(full_path, "wb") as fh:
fh.write(base64.b64decode(base64_img))
return web_path
except KeyError as e:
logging.error("Can't find image in cell: %s", cell['source'])
raise e
raise Exception("Can't find an image in cell %s", cell['source'])
def source_from_cell(cell):
source = "".join(cell['source']).strip()
source = source.replace(";", "")
source = re.sub(r"\bImage\(.*\)", "", source) # remove bokeh render
if "%%R" in source:
source = '\n'.join(source.split('\n')[1:])
if "%%altair" in source:
source = '\n'.join(source.split('\n')[1:])
else:
source = source.replace('"', "'")
if source.startswith('"""') or source.startswith("'''"):
m = re.match("(?:[\"']{3,})((?:.|\n)*)(?:[\"']{3,})((?:.|\n)*)", source, re.MULTILINE)
return m.groups()
return "", source
def tags_from_cell(cell, type='ex'):
tags = set(cell['metadata'].get('tags') or {})
if type in tags:
return {t.split(":")[0]: t.split(":")[1] for t in tags if ":" in t}
def data_from_cell(cell):
classes = "table table-sm table-striped table-responsive table-bordered"
try:
for c in cell['outputs']:
if 'data' in c and 'text/html' in c['data']:
table = ' '.join(c['data']['text/html'])
table = table.replace('border="1" class="dataframe"', 'class="{}"'.format(classes))
table = table.replace('<thead>', '<thead class="thead-inverse">')
return table
except KeyError as e:
logging.error("Can't find data in cell: %s", cell['source'])
raise e
raise Exception("Can't find an dataset in cell %s", cell['source'])
def reorder_meta(meta):
def order_plots(plots):
if plots:
return sorted(plots, key=lambda k: list(packages.keys()).index(k['package-slug']))
else:
return plots
meta = {(name, slug): order_plots(meta[slug]) for slug, name in names.items()}
return meta
def extract_data(path):
with open(path, 'r') as f:
nb = json.load(f)
cells = nb['cells']
full_data = {}
tags = {i: tags_from_cell(c, type='data') for i, c in enumerate(cells)}
for cell_num, tags in tags.items():
if tags is None:
continue
data = data_from_cell(cells[cell_num])
full_data[tags['name']] = data
return full_data
def extract_cells(path):
with open(path, 'r') as f:
nb = json.load(f)
cells = nb['cells']
tags = {i: tags_from_cell(c) for i, c in enumerate(cells)}
meta = defaultdict(list)
for cell_num, tags in tags.items():
if tags is None:
continue
comment, source = source_from_cell(cells[cell_num])
image = image_from_cell(cells[cell_num])
meta[tags['name']].append({
"cell_num": cell_num,
"package": packages.get(tags["package"], tags["package"]),
"package-slug": tags['package'],
"image": image,
"content": source,
"comment": md.convert(comment) or None,
})
meta = reorder_meta(meta)
return meta
def get_git_revision_short_hash():
return subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).strip().decode("utf8")
if __name__ == '__main__':
plots = extract_cells(sys.argv[1])
data = extract_data(sys.argv[1])
env = Environment(loader=FileSystemLoader('templates'), extensions=['jinja2_highlight.HighlightExtension'])
template = env.get_template('t_index.html')
output_from_parsed_template = template.render(intro=md.convert(intro),
plots=plots,
git=get_git_revision_short_hash(),
data=data)
# to save the results
with open("web/index.html", "w") as fh:
fh.write(output_from_parsed_template)