-
Notifications
You must be signed in to change notification settings - Fork 6
/
importer.py
79 lines (70 loc) · 2.67 KB
/
importer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import traceback
import urllib2
import calendar
import pytz
from operator import itemgetter
from datetime import timedelta
import dateutil.parser
import defusedxml.ElementTree as ET
def get_schedule(url, schedule_tz):
def load_events(xml):
def to_unixtimestamp(dt):
dt = dt.astimezone(pytz.utc)
ts = int(calendar.timegm(dt.timetuple()))
return ts
def text_or_empty(node, child_name):
child = node.find(child_name)
if child is None:
return u""
if child.text is None:
return u""
return unicode(child.text)
def parse_duration(value):
h, m = map(int, value.split(':'))
return timedelta(hours=h, minutes=m)
def all_events():
schedule = ET.fromstring(xml)
for day in schedule.findall('day'):
for room in day.findall('room'):
for event in room.findall('event'):
yield event
parsed_events = []
for event in all_events():
event_time = dateutil.parser.parse(event.find('date').text)
if event_time.tzinfo is None:
# Assume the provided timezone for naive dates
start = schedule_tz.localize()
else:
# Otherwise convert to provided timezone
start = event_time.astimezone(schedule_tz)
duration = parse_duration(event.find('duration').text)
end = start + duration
persons = event.find('persons')
if persons is not None:
persons = persons.findall('person')
parsed_events.append(dict(
start_str = start.strftime('%H:%M'),
end_str = end.strftime('%H:%M'),
start_unix = to_unixtimestamp(start),
end_unix = to_unixtimestamp(end),
duration = int(duration.total_seconds() / 60),
title = text_or_empty(event, 'title'),
place = text_or_empty(event, 'room'),
speakers = [
unicode(person.text.strip())
for person in persons
] if persons else [],
lang = text_or_empty(event, 'language') or "unk",
id = event.attrib["id"],
type = "talk",
))
parsed_events.sort(key=itemgetter('start_unix'))
return parsed_events
try:
resp = urllib2.urlopen(url)
schedule = resp.read()
events = load_events(schedule)
except Exception, err:
traceback.print_exc()
return False, None
return True, events