-
Notifications
You must be signed in to change notification settings - Fork 0
/
db_fill_contests_names.py
165 lines (140 loc) · 5.24 KB
/
db_fill_contests_names.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
from argparse import ArgumentParser
import xml.etree.ElementTree as ETree
import os
import os.path
import sqlite3
import re
name_to_order = {
"июль": 1,
"август": 2,
"кострома": 3,
"николаев": 4,
"подмосковье": 5,
"зима": 6
}
parallel_convert = {
'A\'': 'A\'',
'A0': 'A0',
'AA': 'AA',
'AS': 'AS',
'AY': 'AY',
'B\'': 'B\'',
'C\'': 'C\'',
'Ccpp': 'C.cpp',
'Cpy': 'C.py',
'A\'+': 'A\'',
'A0+': 'A0',
'AA+': 'AA',
'AS+': 'AS',
'AY+': 'AY',
'B\'+': 'B\'',
'C\'+': 'C\'',
'Ccpp+': 'C.cpp',
'Cpy+': 'C.py'
}
def parse_args():
parser = ArgumentParser(description="db_fill_contest_names - read contest names from xmls, get parallels and seasos from names and write it to database.")
parser.add_argument('xmls', help='directory with xmls')
parser.add_argument('database', help='sqlite3 database file')
return vars(parser.parse_args())
def ejudge_get_contest_name(xml_filename):
try:
with open(xml_filename, encoding='utf-8') as f:
data = f.read()
except UnicodeError:
return None
try:
xml_root = ETree.fromstring(data)
except ETree.ParseError:
return None
try:
return xml_root.find("name").text
except AttributeError:
return None
year_regex = re.compile('20[0-9]{2}')
parallel_regex = re.compile('(?:' + re.escape('.') + '|\\s)' +
'(?:[ABCDPSKWTMEFZСА]|AS|AA|AY)(?:\.?py|\.?python|prime|' +
'\.?' + re.escape('c++') + '|' + re.escape('++') + '|\.?cpp|[0-9]+|' +
re.escape('\'') + ')?' + re.escape('+') +
'?(?:' + re.escape('.') + '|\\s|$)')
season_regex = re.compile('(?:Июль|Август|Зима|Николаев|Подмосковье)', re.I)
day_regex = re.compile(
'(?:(?:день|day)(?:\\s|\\.)*[0-9]{1,2}|(?:(?:\\s|\\.|D)[0-9]{1,2}(?:\\s|\\.|[^0-9]|$))(?!(?:день|day)))',
re.I)
def get_contest_info(contest_name): # (year, order, parallel, day)
if 'ЛКШ' not in contest_name or 'template' in contest_name.lower():
return None
year, order, parallel = 0, 0, ""
if not year_regex.findall(contest_name):
year = 0
else:
year = int(year_regex.findall(contest_name)[0])
if 'олимпиада' in contest_name.lower() or 'contest' in contest_name.lower() or 'соревнование' in contest_name.lower():
return None
if not parallel_regex.findall(contest_name):
return None
# Please, think twice, if you want to change this replaces.
parallel = re.sub('\\s', '', parallel_regex.findall(contest_name)[0].replace('.', ''))
parallel = re.sub('prime', '\'', parallel)
parallel = re.sub('python', 'py', parallel)
parallel = re.sub(re.escape('c++'), 'cpp', parallel)
parallel = re.sub(re.escape('++'), 'cpp', parallel)
parallel = re.sub(re.escape('С'), 'C', parallel) # Russian letters!
parallel = re.sub(re.escape('А'), 'A', parallel)
if parallel.startswith('D') and parallel != 'D':
parallel = 'D'
if len(parallel) == 1 or (len(parallel) == 2 and parallel[1] == '+'):
parallel = parallel[0]
elif parallel not in parallel_convert:
return None
else:
parallel = parallel_convert[parallel]
if not season_regex.findall(contest_name):
return None
order_name = season_regex.findall(contest_name)[0].lower()
order = name_to_order.get(order_name, 0)
if 'зачет' in contest_name.lower() or 'зачёт' in contest_name.lower() or 'зачот' in contest_name.lower() or 'exam' in contest_name.lower():
return None
if not day_regex.findall(contest_name):
day = 0
else: # This replaces is also dangerous.
day = day_regex.findall(contest_name)[0]
day = re.sub('\\s', '', day)
day = re.sub('\\.', '', day)
day = re.sub('(?:день|day)', '', day, flags=re.I)
day = day.lstrip('D')
day = day.lstrip('d')
day = day.lstrip('0')
day = re.sub('[^0-9]', '', day)
try:
day = int(day)
except ValueError:
return None
return (year, order, parallel, day)
args = parse_args()
xmls_dir = args['xmls']
if xmls_dir[-1] != '/':
xmls_dir += '/'
database_file = args['database']
conn = sqlite3.connect(database_file)
cursor = conn.cursor()
for xml_filename in os.listdir(xmls_dir):
try:
contest_id = int(xml_filename.split('.')[0])
except ValueError:
continue
contest_name = ejudge_get_contest_name(xmls_dir + xml_filename)
if contest_name is None:
continue
contest_info = get_contest_info(contest_name)
if contest_info is None:
continue
year, order, parallel_name, day = contest_info
cursor.execute("SELECT id FROM stats_parallel WHERE name=?", (parallel_name,))
parallel_id = cursor.fetchone()[0]
cursor.execute("SELECT id FROM stats_season WHERE year=? AND \"order\"=?", (year, order))
season_id = cursor.fetchone()[0]
cursor.execute("UPDATE stats_contest SET name=?, parallel_id=?, season_id=?, day=? WHERE contest_id=?",
(contest_name, parallel_id, season_id, day, contest_id))
conn.commit()
conn.close()