-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathparse-stations
118 lines (98 loc) · 4.99 KB
/
parse-stations
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python
"""
The clusters file has lines of the following type:
R <4 NLC code> <4 NLC member> <8 end_date> <8 start_date>
The locations file has lines of the following types:
RL <7 UIC code> <8 end_date> <8 start_date> <8 quote_date> <3 admin area code>
<4 NLC code> <16 desc> <3 CRS code> <5 resv code> <2 ERS country> <3 ERS code>
<6 fare group> <2 county> <2 PTE code> <4 zone NLC> <2 zone ind> <1 region>
<1 hierarchy> <41 cc desc out> <16 cc desc rtn> <60 ATB desc out> <30 ATB desc rtn>
<26 special facilities> <29 LUL things>
RA <7 UIC code> <8 end_date> <7 associated UIC code> <3 associated CRS code>
RR <7 UIC code> <3 railcard code> <8 end_date>
RG <7 UIC code> <8 end_date> <8 start_date> <8 quote_date> <16 desc> <2 ERS country> <3 ERS code>
RM <7 UIC code> <8 end_date> <7 UIC group member> <3 CRS group member>
RS <7 UIC code> <8 end_date> <8 start_date> <16 synonym>
"""
from datetime import date
import json
import re
from split.parse import unpack, cnv_full_date, loop, fare_file
data = {}
for row in loop(fare_file('FSC')):
row = unpack(row,
('cluster_id', 4), ('member_id', 4),
('date_to', 8, cnv_full_date), ('date_from', 8, cnv_full_date)
)
# Not needed for May 2015 update
#assert row['date_to'] == date(2999, 12, 31)
#assert row['date_from'] <= date(2015, 6, 7)
del row['date_to'], row['date_from']
data.setdefault(row['member_id'], []).append(row['cluster_id'])
json.dump(data, open('data/clusters.json', 'w'), separators=(',', ': '), indent=2, sort_keys=True)
lookups = {
'R': (
('route_code', 5), ('end_date', 8, cnv_full_date), ('start_date', 8, cnv_full_date),
('quote_date', 8, cnv_full_date), ('description', 16), ('atb', 35*4), ('cc_desc', 16),
('aaa_desc', 41), ('uts', 1+6+3*4) ),
'L': ( ('route_code', 5), ('end_date', 8, cnv_full_date), ('admin_area', 3),
('nlc_code', 4), ('crs_code', 3), ('incl_excl', 1) ),
}
NICER_DESC = {
"WARMSTER-SALSBRY": "VIA WARMINSTER-SALISBURY",
}
data = {}
for row in loop(fare_file('RTE')):
record_type = row[0]
row = unpack(row[1:], *lookups[record_type])
if row['end_date'] < date.today(): continue
if record_type == 'L':
data.setdefault(row['route_code'], {}).setdefault(row['incl_excl'], []).append(row['crs_code'])
else:
data.setdefault(row['route_code'], {})['desc'] = NICER_DESC.get(row['description'], row['description'])
json.dump(data, open('data/routes.json', 'w'), separators=(',', ': '), indent=2, sort_keys=True)
lookups = {
'L': ( ('uic_code', 7), ('end_date', 8, cnv_full_date),
('start_date', 8, cnv_full_date), ('quote_date', 8, cnv_full_date),
('admin_area_code', 3), ('nlc_code', 4), ('desc', 16), ('crs_code', 3),
('resv_code', 5), ('ers_country', 2), ('ers_code', 3),
('fare_group', 6), ('county', 2), ('pte_code', 2), ('zone_nlc', 4),
('zone_ind', 2), ('region', 1), ('hierarchy', 1),
('cc_desc_out', 41), ('cc_desc_rtn', 16), ('atb_desc_out', 60), ('atb_desc_rtn', 30),
('special_facilities', 26), ('lul', 29) ),
'A': ( ('uic_code', 7), ('end_date', 8, cnv_full_date),
('assoc_uic_code', 7), ('assoc_crs_code', 3) ),
'R': ( ('uic_code', 7), ('railcard', 3), ('end_date', 8, cnv_full_date) ),
'G': ( ('uic_code', 7), ('end_date', 8, cnv_full_date),
('start_date', 8, cnv_full_date), ('quote_date', 8, cnv_full_date),
('description', 16), ('ers_country', 2), ('ers_code', 3) ),
'M': ( ('uic_code', 7), ('end_date', 8, cnv_full_date),
('group_member_uic_code', 7), ('group_member_crs_code', 3) ),
'S': ( ('uic_code', 7), ('end_date', 8, cnv_full_date),
('start_date', 8, cnv_full_date), ('synonym', 16) ),
}
data = {}
for row in loop(fare_file('LOC')):
record_type = row[0]
row = unpack(row[1:], *lookups[record_type])
if record_type in ('G', 'R', 'S', 'M'): continue
if row['end_date'] < date.today(): continue
if record_type == 'L':
if row['admin_area_code'] != '70': continue
if not row['crs_code']: continue
# region 0 non-BR/LUL, 1 ER 2 LMR 3 SCR 4 SR 5 WR 6 LUL
for i in ('ers_country', 'ers_code', 'quote_date', 'start_date', 'end_date', 'special_facilities', 'lul', 'cc_desc_out', 'cc_desc_rtn', 'atb_desc_rtn', 'zone_ind', 'zone_code', 'admin_area_code', 'pte_code', 'county', 'desc', 'region', 'resv_code', 'hierarchy', 'zone_nlc'):
if i in row: del row[i]
if record_type == 'L':
if row['crs_code'][0] in ('X', 'Z') and row['crs_code'] != 'ZFD': continue
name = re.sub('([a-z])\.$', r'\1', row['atb_desc_out'].title())
data[row['crs_code']] = {
'description': name,
'code': row['nlc_code'],
}
if row['fare_group'] != row['nlc_code']:
data[row['crs_code']]['fare_group'] = row['fare_group']
del data['SPX']
#del data['RMZ']
del data['MCZ']
json.dump(data, open('data/stations.json', 'w'), separators=(',', ': '), indent=2, sort_keys=True)