-
Notifications
You must be signed in to change notification settings - Fork 2
/
md2dict.py
100 lines (89 loc) · 3.2 KB
/
md2dict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import sys
import re
from pprint import pprint
sample = { 'awesome-cancer-variant-databases':
{ 'Cancer':
{
'Clinically-focused':
[{'CanDL': 'https://candl.osu.edu' }],
'Catalogs':
[{ 'COSMIC':
'http://cancer.sanger.ac.uk/cancergenome/projects/cosmic'}]
}
,
'Germline':
[
{ 'dnSNP': 'http://www.ncbi.nlm.nih.gov/SNP' },
{ 'Exome Aggregation Consortium':
'http://exac.broadinstitute.org'}
]
}
}
sample_meta = { 'awesome-cancer-variant-databases': 'A community-maintained repository of cancer clinical knowledge bases and databases focused on cancer and normal variants. [Contributions welcome](https://github.com/seandavi/awesome-cancer-variant-databases/blob/master/CONTRIBUTE.md]).',
'CanDL': 'an expert-curated database of potentially actionable driver mutations for molecular pathologists and laboratory directors to facilitate literature-based annotation of genomic testing of tumors. [web app, Download]'
}
sample_info = { 'author': 'Sean Davis',
'gituser': 'seandavi',
'date': '10-16-2016',
'total': 16
}
# Ugly parsing code
def txt2dict(text):
cnt = 0
doc = {}
depth = 0
depth_pointer = {}
pointer = doc
istitle = False
for line in text.split("\n"):
if not len(line):
continue
line_type = find_type(line)
text = line.split(" ")[1:]
text = " ".join(text)
if line_type[0] == "H":
if not pointer: # is empty
if depth <= int(line_type[1:]):
pointer[text] = {}
if not istitle:
istitle = True
depth_pointer[0] = pointer[text]
pointer = pointer[text]
else:
pointer = depth_pointer[int(line_type[1:]) - 1]
pointer[text] = {}
pointer = pointer[text]
depth = int(line_type[1:])
depth_pointer[depth] = pointer
if line_type == "L":
ldict = parsing(text)
if not pointer:
pointer['LIST'] = [{ldict['name']:ldict['url']}]
else:
pointer['LIST'].append({ldict['name']:ldict['url']})
cnt += 1
return doc
def find_type(text):
if text[0] == "#":
header = text.split(" ")[0]
return "H" + str(len(header))
if text[0] == ("-" or "*"):
return "L"
if text[0] == " ":
islist = text.find('-')
if islist:
return "SubL" + str(islist)
return "Else"
def main():
with open(sys.argv[1], "r") as f:
lines = f.read()
new = txt2dict(lines)
pprint (new)
def parsing(inputtext):
# [name](url) description
line = re.compile(r'\[([^\]]*)\]\s*\(([^\)]*)\)([^$]*)')
depth = 0
result = line.match(inputtext)
return { "name": result.group(1), "url": result.group(2), "description":
result.group(3)}
main()