-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild_author_index.py
executable file
·99 lines (88 loc) · 2.57 KB
/
build_author_index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python
import sqlite3
import json
import glob
import os
def get_authors(conn):
cursor = conn.cursor()
sql = """
SELECT
`sender_id`,
`from`,
count(*) AS `messages`
FROM
`messages`
GROUP BY
`sender_id`
ORDER BY
`messages` DESC;
"""
for row in cursor.execute(sql):
yield row[0], row[1], row[2]
def make_thread(conn, thread_root):
def make_dict(row):
return {
'message_hash': row[0],
'message_id': row[2],
'file_year': row[3],
'date': row[4],
'raw_date': row[5],
'from': row[7],
'to': row[8],
'subject': row[9],
'reply_to': row[10],
'no_parent': row[11],
'children': build_tree(make_dict, conn, row[2])
}
sql = "SELECT * FROM `messages` WHERE `message_hash` = ?;"
try:
cursor = conn.cursor()
cursor.execute(sql, [thread_root])
row = cursor.fetchone()
return make_dict(row)
except sqlite3.IntegrityError:
print("sqlite3.IntegrityError", thread_root)
return None
except sqlite3.ProgrammingError:
print("sqlite3.ProgrammingError", thread_root)
return None
def build_tree(make_dict, conn, message_id):
sql = "SELECT * FROM `messages` WHERE `reply_to` = ? ORDER BY `date` ASC;"
children = []
for row in conn.cursor().execute(sql, [message_id]):
children.append(make_dict(row))
return children
def make_threads(conn, sender_id):
sql = """
SELECT DISTINCT
`thread_root`
FROM
`messages`
WHERE
`sender_id` = ?
ORDER BY
`date` ASC
"""
threads = []
for row in conn.cursor().execute(sql, [sender_id]):
threads.append(make_thread(conn, row[0]))
return threads
def clean_the_slate():
if not os.path.exists("json_authors/"):
os.makedirs("json_authors/")
for f in glob.glob('json_authors/*.json'):
os.remove(f)
def main():
clean_the_slate()
conn = sqlite3.connect('database.db')
for sender_id, from_email, count in get_authors(conn):
with open('json_authors/{}.json'.format(sender_id), 'w') as o:
print(sender_id, count)
o.write(json.dumps({
'sender_id': sender_id,
'from': from_email,
'count': count,
'threads': make_threads(conn, sender_id)
}))
if __name__ == "__main__":
main()