diff --git a/web_app/create_json_for_web_api.py b/web_app/create_json_for_web_api.py new file mode 100644 index 00000000..50a7cdb3 --- /dev/null +++ b/web_app/create_json_for_web_api.py @@ -0,0 +1,199 @@ +# %% +import geojson +import csv +from collections import defaultdict +import json + +# %% md +# PARAMETERS +## Define path of data source file: +# - SIRI_EXPORTED_FROM_SPLUNK -> siri data about real time bus locations +# - GTFS_STAT_EXPORTED_FROM_SPLUNK -> static data about the route and the trip +# - SHAPE_FILE_7716 -> data about the shape of the bus route +# %% +# SIRI_EXPORTED_FROM_SPLUNK = '/home/aviv/Downloads/siri_149_two_directions_30_days.csv' +# GTFS_STAT_EXPORTED_FROM_SPLUNK = '/home/aviv/Downloads/gtfs_stat_149_two_directions_30_days.csv' +# SHAPE_FILE_7716 = '/home/aviv/Downloads/route_id_7716_shape_id_93603.csv' +SIRI_EXPORTED_FROM_SPLUNK = 'D:\Downloads\web_app\siri_149.csv' +GTFS_STAT_EXPORTED_FROM_SPLUNK = 'D:\Downloads\web_app\gtfs_149.csv' +SHAPE_FILE_7716 = 'D:/Downloads/web_app/route_id_7716_shape_id_93603.csv' + + +# %% md +# SIRI Objects +# %% +## SIRI + +class SiriKey(): + def __init__(self, date, planned_start_time, route_id, route_short_name): + self.date = date + self.planned_start_time = planned_start_time + self.route_id = route_id + self.route_short_name = route_short_name + + def _tuple(self): + return (self.date, self.planned_start_time, self.route_id, self.route_short_name) + + def __eq__(self, other): + return self._tuple() == other._tuple() + + def __lt__(self, other): + return (self.date, self.route_id) < (other.date, other.route_id) + + def __hash__(self): + return self._tuple().__hash__() + + def __repr__(self): + return 'SiriKey: ' + "@".join(self._tuple()) + + @staticmethod + def of_csv_row(csv_row_dict): + return SiriKey(csv_row_dict['timestamp'][:10], csv_row_dict['planned_start_time'], + csv_row_dict['route_id'], csv_row_dict['route_short_name']) + + +class SiriTrip(): + def __init__(self, key, geojson_feature_collection): + self.key = key + self.geojson_feature_collection = geojson_feature_collection + + def __repr__(self): + return 'SiriTrip: ' + str(self.key) + + # function to return the second element of the + # two elements passed as the parameter + @staticmethod + def sort_by_lat(val): + return val["lat"] + + @staticmethod + def of_csv_rows(key, rows): + # sort rows of the specific route_id by time from midnight to midnight + rows.sort(key=lambda item: item['time_recorded']) + + features = [geojson.Feature(geometry=geojson.Point([float(row['lat']), float(row['lon'])]), + properties={'time_recorded': row['time_recorded']}) + for row in rows if float(row['lat']) > 0] + + return SiriTrip(key=key, geojson_feature_collection=geojson.FeatureCollection(features)) + + # %% + + +def groupby(iterable, projection): + result = defaultdict(list) + for item in iterable: + result[projection(item)].append(item) + return result + + +def read_siri_trips_from_exported_file(f): + return [SiriTrip.of_csv_rows(siri_key, siri_csv_rows) + for siri_key, siri_csv_rows + in groupby(csv.DictReader(f), SiriKey.of_csv_row).items()] + + +# %% md +# GTFS Objects +# %% +# GTFS_STAT + +class Route(): + def __init__(self, route_id, date, route_short_name, agency_name, stops, + route_long_name, is_loop, route_type, start_zone, end_zone, + service_duration, speed, start_times, trip_ids): + self.route_id = route_id + self.date = date + self.route_short_name = route_short_name + self.agency_name = agency_name + self.stops = stops + self.route_long_name = route_long_name + self.is_loop = is_loop + self.route_type = route_type + self.start_zone = start_zone + self.end_zone = end_zone + self.service_duration = service_duration + self.speed = speed + self.start_times = start_times + self.trip_ids = trip_ids + + def __repr__(self): + return "Route: " + "@".join([self.route_id, self.date, self.route_short_name, self.agency_name]) + + @staticmethod + def parse(csv_row): + stops = [geojson.Feature(geometry=geojson.Point((float(itr[0][0]), float(itr[0][1]))), + properties=dict(stop_code=itr[1], + stop_id=itr[2])) + for itr in zip([i.split(',') for i in csv_row['all_stop_latlon'].split(';')], + csv_row['all_stop_code'].split(';'), csv_row['all_stop_id'].split(';'))] + + return Route(route_id=csv_row['route_id'], + date=csv_row['date'], + route_short_name=csv_row['route_short_name'], + agency_name=csv_row['agency_name'], + stops=stops, + route_long_name=csv_row['route_long_name'], + is_loop=csv_row['is_loop'], + route_type=csv_row['route_type'], + start_zone=csv_row['start_zone'], + end_zone=csv_row['end_zone'], + service_duration=csv_row['service_duration'], + speed=csv_row['service_speed'], + start_times=csv_row['all_start_time'].split(';'), + trip_ids=csv_row['all_trip_id'].split(';')) + # %% md + + +# Handle SHAPE FIle +# %% +# SHAPE + +def create_line_string_from_shape_file(path): + coordinates = [tuple([float(i['shape_pt_lat']), float(i['shape_pt_lon'])]) + for i in csv.DictReader(open(path, encoding="utf8"))] + + return geojson.LineString(coordinates=coordinates) + + +# %% md +# Combine the data as one structure +# %% +siri_data = read_siri_trips_from_exported_file(open(SIRI_EXPORTED_FROM_SPLUNK, encoding="utf8")) +# %% +gtfs_stat_data = [Route.parse(row) for row in csv.DictReader(open(GTFS_STAT_EXPORTED_FROM_SPLUNK, encoding="utf8"))] +gtfs_stat_data_dict = {(i.date, i.route_id): i for i in gtfs_stat_data} +# %% +shape = create_line_string_from_shape_file(SHAPE_FILE_7716) +# %% +results = [] + +for siri_itm in filter(lambda x: x.key.route_id == '7716', siri_data): + gtfs_itm = gtfs_stat_data_dict[(siri_itm.key.date, siri_itm.key.route_id)] + curr_shape = shape + + tripId = None + if siri_itm.key.planned_start_time in gtfs_itm.start_times: + tripId = gtfs_itm.trip_ids[gtfs_itm.start_times.index(siri_itm.key.planned_start_time)] + + res = dict(tripId=tripId, + planned_time=siri_itm.key.planned_start_time, + date=gtfs_itm.date, + routeId=gtfs_itm.route_id, + routeShortName=gtfs_itm.route_short_name, + routeLongName=gtfs_itm.route_long_name, + agencyName=gtfs_itm.agency_name, + routeType=gtfs_itm.route_type, + stops=gtfs_itm.stops, + startZone=gtfs_itm.start_zone, + endZone=gtfs_itm.end_zone, + isLoop=gtfs_itm.is_loop, + distance=None, + duration=gtfs_itm.service_duration, + speed=gtfs_itm.speed, + shape=curr_shape, + siri=siri_itm.geojson_feature_collection) + + results.append(res) +# %% +json.dump(results, open('out', 'w', encoding="utf8"), ensure_ascii=False) \ No newline at end of file diff --git a/web_app/web_app.py b/web_app/web_app.py new file mode 100644 index 00000000..451ad425 --- /dev/null +++ b/web_app/web_app.py @@ -0,0 +1,64 @@ +import json, urllib.request, time +from collections import defaultdict +from datetime import datetime + +dict_output = defaultdict(lambda: defaultdict(int)) +lateness_interval = 300 #seconds + + +def clean_data(json_object): + del json_object["shape"]["type"] + + for element1 in json_object['stops']: + del element1["geometry"]["type"] + del element1["type"] + print(element1) + return + + +def open_json(url): + url = urllib.request.urlopen(url).read() + data_from_json = json.loads(url.decode()) + return data_from_json + + +def check_depart_on_time(json_object): + p = '%H:%M:%S' + epoch = datetime(1970, 1, 1) + + planned_time = (data_from_json["planned_time"]) + # print(planned_time) + planned_time_epoch = (datetime.strptime(planned_time, p) - epoch).total_seconds() + # print(planned_time_epoch) + + try: + time_recorded = (data_from_json["siri"]["features"][0]["properties"]["time_recorded"]) + except IndexError: + return None + # print(time_recorded) + time_recorded_epoch = (datetime.strptime(time_recorded, p) - epoch).total_seconds() + # print(time_recorded_epoch) + seconds_elapsed = abs(planned_time_epoch - time_recorded_epoch) + # print(seconds_elapsed) + + return lateness_interval > seconds_elapsed + + +if __name__ == '__main__': + #18/3/2019: http://142.93.111.211:3000/trips/114 <-> http://142.93.111.211:3000/trips/167 + #total entries: http://142.93.111.211:3000/trips/0 <-> http://142.93.111.211:3000/trips/1329 + for i in range(1330): + api_url = "http://142.93.111.211:3000/trips/"+str(i) + print("checking: " + api_url) + + data_from_json = open_json(api_url) + ride_on_time = check_depart_on_time(data_from_json) + ride_date = data_from_json["date"] + if ride_on_time is not None: + dict_output[ride_date]["ride_on_time"] += ride_on_time + dict_output[ride_date]["number_of_rides"] += 1 + else: + print("ERROR: problem with data on "+api_url) + + # print(dict_output) + json.dump(dict_output, open('results.json', 'w', encoding="utf8"), ensure_ascii=False)