Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hackathon 2019 web api #170

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
199 changes: 199 additions & 0 deletions web_app/create_json_for_web_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
# %%
import geojson
import csv
from collections import defaultdict
import json

# %% md
# PARAMETERS
## Define path of data source file:
# - SIRI_EXPORTED_FROM_SPLUNK -> siri data about real time bus locations
# - GTFS_STAT_EXPORTED_FROM_SPLUNK -> static data about the route and the trip
# - SHAPE_FILE_7716 -> data about the shape of the bus route
# %%
# SIRI_EXPORTED_FROM_SPLUNK = '/home/aviv/Downloads/siri_149_two_directions_30_days.csv'
# GTFS_STAT_EXPORTED_FROM_SPLUNK = '/home/aviv/Downloads/gtfs_stat_149_two_directions_30_days.csv'
# SHAPE_FILE_7716 = '/home/aviv/Downloads/route_id_7716_shape_id_93603.csv'
SIRI_EXPORTED_FROM_SPLUNK = 'D:\Downloads\web_app\siri_149.csv'
GTFS_STAT_EXPORTED_FROM_SPLUNK = 'D:\Downloads\web_app\gtfs_149.csv'
SHAPE_FILE_7716 = 'D:/Downloads/web_app/route_id_7716_shape_id_93603.csv'


# %% md
# SIRI Objects
# %%
## SIRI

class SiriKey():
def __init__(self, date, planned_start_time, route_id, route_short_name):
self.date = date
self.planned_start_time = planned_start_time
self.route_id = route_id
self.route_short_name = route_short_name

def _tuple(self):
return (self.date, self.planned_start_time, self.route_id, self.route_short_name)

def __eq__(self, other):
return self._tuple() == other._tuple()

def __lt__(self, other):
return (self.date, self.route_id) < (other.date, other.route_id)

def __hash__(self):
return self._tuple().__hash__()

def __repr__(self):
return 'SiriKey: ' + "@".join(self._tuple())

@staticmethod
def of_csv_row(csv_row_dict):
return SiriKey(csv_row_dict['timestamp'][:10], csv_row_dict['planned_start_time'],
csv_row_dict['route_id'], csv_row_dict['route_short_name'])


class SiriTrip():
def __init__(self, key, geojson_feature_collection):
self.key = key
self.geojson_feature_collection = geojson_feature_collection

def __repr__(self):
return 'SiriTrip: ' + str(self.key)

# function to return the second element of the
# two elements passed as the parameter
@staticmethod
def sort_by_lat(val):
return val["lat"]

@staticmethod
def of_csv_rows(key, rows):
# sort rows of the specific route_id by time from midnight to midnight
rows.sort(key=lambda item: item['time_recorded'])

features = [geojson.Feature(geometry=geojson.Point([float(row['lat']), float(row['lon'])]),
properties={'time_recorded': row['time_recorded']})
for row in rows if float(row['lat']) > 0]

return SiriTrip(key=key, geojson_feature_collection=geojson.FeatureCollection(features))

# %%


def groupby(iterable, projection):
result = defaultdict(list)
for item in iterable:
result[projection(item)].append(item)
return result


def read_siri_trips_from_exported_file(f):
return [SiriTrip.of_csv_rows(siri_key, siri_csv_rows)
for siri_key, siri_csv_rows
in groupby(csv.DictReader(f), SiriKey.of_csv_row).items()]


# %% md
# GTFS Objects
# %%
# GTFS_STAT

class Route():
def __init__(self, route_id, date, route_short_name, agency_name, stops,
route_long_name, is_loop, route_type, start_zone, end_zone,
service_duration, speed, start_times, trip_ids):
self.route_id = route_id
self.date = date
self.route_short_name = route_short_name
self.agency_name = agency_name
self.stops = stops
self.route_long_name = route_long_name
self.is_loop = is_loop
self.route_type = route_type
self.start_zone = start_zone
self.end_zone = end_zone
self.service_duration = service_duration
self.speed = speed
self.start_times = start_times
self.trip_ids = trip_ids

def __repr__(self):
return "Route: " + "@".join([self.route_id, self.date, self.route_short_name, self.agency_name])

@staticmethod
def parse(csv_row):
stops = [geojson.Feature(geometry=geojson.Point((float(itr[0][0]), float(itr[0][1]))),
properties=dict(stop_code=itr[1],
stop_id=itr[2]))
for itr in zip([i.split(',') for i in csv_row['all_stop_latlon'].split(';')],
csv_row['all_stop_code'].split(';'), csv_row['all_stop_id'].split(';'))]

return Route(route_id=csv_row['route_id'],
date=csv_row['date'],
route_short_name=csv_row['route_short_name'],
agency_name=csv_row['agency_name'],
stops=stops,
route_long_name=csv_row['route_long_name'],
is_loop=csv_row['is_loop'],
route_type=csv_row['route_type'],
start_zone=csv_row['start_zone'],
end_zone=csv_row['end_zone'],
service_duration=csv_row['service_duration'],
speed=csv_row['service_speed'],
start_times=csv_row['all_start_time'].split(';'),
trip_ids=csv_row['all_trip_id'].split(';'))
# %% md


# Handle SHAPE FIle
# %%
# SHAPE

def create_line_string_from_shape_file(path):
coordinates = [tuple([float(i['shape_pt_lat']), float(i['shape_pt_lon'])])
for i in csv.DictReader(open(path, encoding="utf8"))]

return geojson.LineString(coordinates=coordinates)


# %% md
# Combine the data as one structure
# %%
siri_data = read_siri_trips_from_exported_file(open(SIRI_EXPORTED_FROM_SPLUNK, encoding="utf8"))
# %%
gtfs_stat_data = [Route.parse(row) for row in csv.DictReader(open(GTFS_STAT_EXPORTED_FROM_SPLUNK, encoding="utf8"))]
gtfs_stat_data_dict = {(i.date, i.route_id): i for i in gtfs_stat_data}
# %%
shape = create_line_string_from_shape_file(SHAPE_FILE_7716)
# %%
results = []

for siri_itm in filter(lambda x: x.key.route_id == '7716', siri_data):
gtfs_itm = gtfs_stat_data_dict[(siri_itm.key.date, siri_itm.key.route_id)]
curr_shape = shape

tripId = None
if siri_itm.key.planned_start_time in gtfs_itm.start_times:
tripId = gtfs_itm.trip_ids[gtfs_itm.start_times.index(siri_itm.key.planned_start_time)]

res = dict(tripId=tripId,
planned_time=siri_itm.key.planned_start_time,
date=gtfs_itm.date,
routeId=gtfs_itm.route_id,
routeShortName=gtfs_itm.route_short_name,
routeLongName=gtfs_itm.route_long_name,
agencyName=gtfs_itm.agency_name,
routeType=gtfs_itm.route_type,
stops=gtfs_itm.stops,
startZone=gtfs_itm.start_zone,
endZone=gtfs_itm.end_zone,
isLoop=gtfs_itm.is_loop,
distance=None,
duration=gtfs_itm.service_duration,
speed=gtfs_itm.speed,
shape=curr_shape,
siri=siri_itm.geojson_feature_collection)

results.append(res)
# %%
json.dump(results, open('out', 'w', encoding="utf8"), ensure_ascii=False)
64 changes: 64 additions & 0 deletions web_app/web_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import json, urllib.request, time
from collections import defaultdict
from datetime import datetime

dict_output = defaultdict(lambda: defaultdict(int))
lateness_interval = 300 #seconds


def clean_data(json_object):
del json_object["shape"]["type"]

for element1 in json_object['stops']:
del element1["geometry"]["type"]
del element1["type"]
print(element1)
return


def open_json(url):
url = urllib.request.urlopen(url).read()
data_from_json = json.loads(url.decode())
return data_from_json


def check_depart_on_time(json_object):
p = '%H:%M:%S'
epoch = datetime(1970, 1, 1)

planned_time = (data_from_json["planned_time"])
# print(planned_time)
planned_time_epoch = (datetime.strptime(planned_time, p) - epoch).total_seconds()
# print(planned_time_epoch)

try:
time_recorded = (data_from_json["siri"]["features"][0]["properties"]["time_recorded"])
except IndexError:
return None
# print(time_recorded)
time_recorded_epoch = (datetime.strptime(time_recorded, p) - epoch).total_seconds()
# print(time_recorded_epoch)
seconds_elapsed = abs(planned_time_epoch - time_recorded_epoch)
# print(seconds_elapsed)

return lateness_interval > seconds_elapsed


if __name__ == '__main__':
#18/3/2019: http://142.93.111.211:3000/trips/114 <-> http://142.93.111.211:3000/trips/167
#total entries: http://142.93.111.211:3000/trips/0 <-> http://142.93.111.211:3000/trips/1329
for i in range(1330):
api_url = "http://142.93.111.211:3000/trips/"+str(i)
print("checking: " + api_url)

data_from_json = open_json(api_url)
ride_on_time = check_depart_on_time(data_from_json)
ride_date = data_from_json["date"]
if ride_on_time is not None:
dict_output[ride_date]["ride_on_time"] += ride_on_time
dict_output[ride_date]["number_of_rides"] += 1
else:
print("ERROR: problem with data on "+api_url)

# print(dict_output)
json.dump(dict_output, open('results.json', 'w', encoding="utf8"), ensure_ascii=False)