-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathautotc.py
83 lines (70 loc) · 2.56 KB
/
autotc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import json
import os
import re
import logging
import argparse
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
fmt = logging.Formatter("[%(levelname)s] [%(asctime)s] %(name)s: %(message)s")
ch.setFormatter(fmt)
logger.addHandler(ch)
parser = argparse.ArgumentParser()
parser.add_argument("--dir", help="Directory with notebooks to process", default="out")
parser.add_argument("--output", help="Output filename", default="temp_Curriculum.md")
lpattern = re.compile(r"L\d+(_\w*)?\.ipynb") # filename pattern
tpattern = re.compile(r".*<.*>(.*)<\/.*>.*") # title pattern
hpattern = re.compile(r"#{1,2}\s*([^#\n<>]+)") # header pattern
cpattern = re.compile(r"```[^`]*?```") # code pattern in markdown
def analyze_lecture(path):
with open(path, encoding="utf-8") as f:
lecture = json.load(f)
cells = lecture.get("cells")
if not cells:
logger.warning(f"File '{fname}' incorrect")
return
title_src = "".join(cells[1]["source"])
title = tpattern.match(title_src)
if not title:
title = os.path.basename(path)
logger.warning(f"Lecture's title not found, using name '{title}'")
else:
title = title.group(1).strip()
headers = []
for v in cells[2:]:
if v["cell_type"] != "markdown":
continue
# get rid of code in markdown so that we don't grab comments
s = "".join(v["source"])
s = cpattern.sub("", s)
for source in s.split("\n"):
source = cpattern.sub("", source)
header = hpattern.match(source.strip())
if not header:
continue
headers.append(header.group(1))
return title, headers
def generate_md(lectures, path):
f = open(path, "w", encoding="utf-8")
print("Программа курса\n", file=f)
i = 1
for title, headers in lectures.items():
print(f"## Лекция {i} “{title}”\n", file=f)
print(". ".join(headers) + "\n", file=f)
i += 1
f.close()
logger.info(f"File created: {path}")
def main():
args = parser.parse_args()
out_dir = os.path.abspath(args.dir)
lectures = {}
for fname in sorted(os.listdir(out_dir)):
if not lpattern.fullmatch(fname):
continue
title, headers = analyze_lecture(os.path.join(out_dir, fname))
lectures[title] = headers
logger.info(f"Total analyzed: {len(lectures)}")
generate_md(lectures, os.path.join(out_dir, args.output))
if __name__ == "__main__":
main()