Skip to content

Commit

Permalink
Merge pull request e-mission#917 from JGreenlee/composite-trip-sections
Browse files Browse the repository at this point in the history
✨ Include 'sections' in composite trips
  • Loading branch information
shankari authored Jun 21, 2023
2 parents d5a602c + fad4a62 commit dc2394f
Show file tree
Hide file tree
Showing 21 changed files with 56,921 additions and 27,866 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,14 @@ Note that loading the data retains the object IDs. This means that if you load t
$ ./e-mission-py.bash bin/debug/load_timeline_for_day_and_user.py -n /tmp/data-collection-eval/results_dec_2015/ucb.sdb.android.1/timeseries/active_day_2.2015-11-27 [email protected]
```

Note that this will typically only generate cleaned sections which only distinguish between 4 modes - `walk`, `bike`, `motorized` and `air_or_hsr`. This should be sufficient in general, since the most of the server code falls back to cleaned sections if inferred sections are not present.

However, if you do want to work with inferred sections, perhaps to test the UI with a more complex set of outputs, you will need to copy a saved random forest model. If you do not care about model accuracy, you can use the included model from test data. **DO NOT USE THIS MODEL IN PRODUCTION**

```
$ cp emission/tests/data/seed_model_from_test_data.json seed_model.json
```

#### Other data sources ####
1. Get your own data. You can export your timeline for a particular day via email (Profile -> Download json dump) and then load and view it as above.

Expand Down
37 changes: 36 additions & 1 deletion emission/analysis/plotting/composite_trip_creation.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
import emission.analysis.config as eac
import emission.core.wrapper.entry as ecwe
import emission.core.wrapper.motionactivity as ecwm
import emission.core.wrapper.modeprediction as ecwmp

import emission.analysis.userinput.matcher as eaum
import emission.storage.decorations.analysis_timeseries_queries as esda
import emission.storage.decorations.trip_queries as esdt
import emission.storage.pipeline_queries as epq
import emission.storage.timeseries.abstract_timeseries as esta
import emission.storage.timeseries.timequery as estt

import emission.analysis.config as eac

import copy
import logging

Expand Down Expand Up @@ -74,7 +80,7 @@ def create_composite_trip(ts, ct):
composite_trip_data["confirmed_trip"] = ct["_id"]
composite_trip_data["start_confirmed_place"] = eaum.get_confirmed_place_for_confirmed_trip(ct, "start_place")
composite_trip_data["end_confirmed_place"] = eaum.get_confirmed_place_for_confirmed_trip(ct, "end_place")
# later we will want to put section & modes in composite_trip as well
composite_trip_data["sections"] = get_sections_for_confirmed_trip(ct)
composite_trip_entry = ecwe.Entry.create_entry(ct["user_id"], "analysis/composite_trip", composite_trip_data)
composite_trip_entry["metadata"]["origin_key"] = origin_key
ts.insert(composite_trip_entry)
Expand Down Expand Up @@ -152,3 +158,32 @@ def get_locations_for_confirmed_trip(ct, max_entries=100):
sample_rate = len(locations)//max_entries + 1
locations = locations[::sample_rate]
return locations

def get_sections_for_confirmed_trip(ct):
if "cleaned_trip" not in ct["data"]:
return [] # untracked time has no sections
section_key = eac.get_section_key_for_analysis_results()
logging.debug("Retrieving sections with key %s" % section_key)
sections = esdt.get_sections_for_trip(key = section_key,
user_id = ct["user_id"], trip_id = ct["data"]["cleaned_trip"])

# Fallback to cleaned sections if inferred sections do not exist
# TODO: Decide whether to keep or remove the fallback
if len(sections) == 0 and section_key == esda.INFERRED_SECTION_KEY:
logging.debug("len(inferred_sections) == 0, falling back to cleaned_sections")
section_key = "analysis/cleaned_section"
sections = esdt.get_sections_for_trip(key = section_key,
user_id = ct["user_id"], trip_id = ct["data"]["cleaned_trip"])

cleaned_section_mapper = lambda sm: ecwm.MotionTypes(sm).name
inferred_section_mapper = lambda sm: ecwmp.PredictedModeTypes(sm).name
sel_section_mapper = cleaned_section_mapper \
if section_key == "analysis/cleaned_section" else inferred_section_mapper

# on the phone, we don't need lists of 'speeds' and 'distances'
# for every section, and they can get big - so let's save some bandwidth
for section in sections:
section["sensed_mode_str"] = sel_section_mapper(section["data"]["sensed_mode"])
del section["data"]["speeds"]
del section["data"]["distances"]
return sections
33 changes: 33 additions & 0 deletions emission/analysis/userinput/matcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import emission.storage.pipeline_queries as epq
import emission.core.wrapper.entry as ecwe
import emission.core.wrapper.userinput as ecwui
import emission.core.wrapper.motionactivity as ecwm
import emission.core.wrapper.modeprediction as ecwmp
import emission.storage.decorations.place_queries as esdp

obj_to_dict_key = lambda key: key.split("/")[1]
Expand Down Expand Up @@ -191,6 +193,32 @@ def create_and_link_timeline(ts, timeline, last_confirmed_place):
confirmed_places, confirmed_trips)
return confirmed_tl

def get_section_summary(ts, cleaned_trip, section_key):
"""
Returns the proportions of the distance, duration and count for each mode
in this trip. Note that sections are unimodal by definition.
cleaned_trip: the cleaned trip object associated with the sections
section_key: 'inferred_section' or 'cleaned_section'
"""
import emission.core.get_database as edb

sections = esdt.get_sections_for_trip(key = section_key,
user_id = cleaned_trip["user_id"], trip_id = cleaned_trip["_id"])
if len(sections) == 0:
logging.warning("While getting section summary, section length = 0. This should never happen, but let's not crash if it does")
return {"distance": {}, "duration": {}, "count": {}}
sections_df = ts.to_data_df(section_key, sections)
cleaned_section_mapper = lambda sm: ecwm.MotionTypes(sm).name
inferred_section_mapper = lambda sm: ecwmp.PredictedModeTypes(sm).name
sel_section_mapper = cleaned_section_mapper \
if section_key == "analysis/cleaned_section" else inferred_section_mapper
sections_df["sensed_mode_str"] = sections_df["sensed_mode"].apply(sel_section_mapper)
grouped_section_df = sections_df.groupby("sensed_mode_str")
return {
"distance": grouped_section_df.distance.sum().to_dict(),
"duration": grouped_section_df.duration.sum().to_dict(),
"count": grouped_section_df.trip_id.count().to_dict()
}

def create_confirmed_entry(ts, tce, confirmed_key, input_key_list):
# Copy the entry and fill in the new values
Expand All @@ -199,6 +227,11 @@ def create_confirmed_entry(ts, tce, confirmed_key, input_key_list):
# confirmed_object_dict["metadata"]["key"] = confirmed_key
if (confirmed_key == esda.CONFIRMED_TRIP_KEY):
confirmed_object_data["expected_trip"] = tce.get_id()
logging.debug("creating confimed entry from %s" % tce)
cleaned_trip = ts.get_entry_from_id(esda.CLEANED_TRIP_KEY,
tce["data"]["cleaned_trip"])
confirmed_object_data['inferred_section_summary'] = get_section_summary(ts, cleaned_trip, "analysis/inferred_section")
confirmed_object_data['cleaned_section_summary'] = get_section_summary(ts, cleaned_trip, "analysis/cleaned_section")
elif (confirmed_key == esda.CONFIRMED_PLACE_KEY):
confirmed_object_data["cleaned_place"] = tce.get_id()
confirmed_object_data["user_input"] = \
Expand Down
17 changes: 3 additions & 14 deletions emission/core/wrapper/compositetrip.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,13 @@

class Compositetrip(ecwc.Confirmedtrip):
props = ecwc.Confirmedtrip.props
props.update({# # confirmedplace stuff
props.update({
"start_confirmed_place": ecwb.WrapperBase.Access.WORM, # object contains all properties for the source confirmed_place object
"end_confirmed_place": ecwb.WrapperBase.Access.WORM, # object contains all properties for the destination confirmed_place object
"confirmed_trip": ecwb.WrapperBase.Access.WORM, # the id of the corresponding confirmed trip
"locations": ecwb.WrapperBase.Access.WORM, # object containing cleaned location entries (max 100)
# # sections stuff
"cleaned_section": ecwb.WrapperBase.Access.WORM,
"inferred_section": ecwb.WrapperBase.Access.WORM,
"inferred_mode": ecwb.WrapperBase.Access.WORM, # inferred by mode inference algo
"confirmed_mode": ecwb.WrapperBase.Access.WORM, # confirmed by user
# mode to be used for analysis; confirmed mode if we know factors for it, inferred mode otherwise
"analysis_mode": ecwb.WrapperBase.Access.WORM,
# mode for user display; inferred mode if not confirmed; confirmed mode otherwise
"display_mode": ecwb.WrapperBase.Access.WORM
"locations": ecwb.WrapperBase.Access.WORM, # list containing cleaned location entries (max 100)
"sections": ecwb.WrapperBase.Access.WORM, # list containing cleaned sections during the trip
})
# # sections stuff
enums = {"inferred_mode": ecwm.PredictedModeTypes,
"analysis_mode": ecwm.PredictedModeTypes}

def _populateDependencies(self):
super(Compositetrip, self)._populateDependencies()
6 changes: 2 additions & 4 deletions emission/core/wrapper/confirmedtrip.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,8 @@ class Confirmedtrip(ecwt.Trip):
"expectation": ecwb.WrapperBase.Access.WORM,
"confidence_threshold": ecwb.WrapperBase.Access.WORM,
"expected_trip": ecwb.WrapperBase.Access.WORM,
# the confirmed section that is the "primary"
# https://github.com/e-mission/e-mission-docs/issues/476#issuecomment-738120752
"primary_section": ecwb.WrapperBase.Access.WORM,
"inferred_primary_mode": ecwb.WrapperBase.Access.WORM,
"inferred_section_summary": ecwb.WrapperBase.Access.WORM,
"cleaned_section_summary": ecwb.WrapperBase.Access.WORM,
# the user input will have all `manual/*` entries
# let's make that be somewhat flexible instead of hardcoding into the data model
"user_input": ecwb.WrapperBase.Access.WORM,
Expand Down
31 changes: 31 additions & 0 deletions emission/tests/analysisTests/intakeTests/TestPipelineRealData.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ def tearDown(self):
self.clearRelatedDb()
if hasattr(self, "analysis_conf_path"):
os.remove(self.analysis_conf_path)
if hasattr(self, "seed_mode_path"):
os.remove(self.seed_mode_path)
logging.info("tearDown complete")

def clearRelatedDb(self):
Expand Down Expand Up @@ -713,7 +715,18 @@ def compare_composite_objects(self, ct, et):
if 'exit_ts' in et['data']['end_confirmed_place']:
self.assertEqual(ct['data']['end_confirmed_place']['exit_ts'],
et['data']['end_confirmed_place']['exit_ts'])
# check locations
self.assertEqual(len(ct['data']['locations']), len(et['data']['locations']))
self.assertEqual([l['data']['ts'] for l in ct['data']['locations']],
[l['data']['ts'] for l in et['data']['locations']])

# check sections; if this gets more complex, we might want to move it to a separate
# compare_sections method
self.assertEqual(len(ct['data']['sections']), len(et['data']['sections']))
self.assertEqual([s['data']['start_ts'] for s in ct['data']['sections']],
[s['data']['start_ts'] for s in et['data']['sections']])
self.assertEqual([s['data']['sensed_mode'] for s in ct['data']['sections']],
[l['data']['sensed_mode'] for l in et['data']['sections']])

def testJackUntrackedTimeMar12(self):
dataFile = "emission/tests/data/real_examples/jack_untracked_time_2023-03-12"
Expand All @@ -727,6 +740,24 @@ def testJackUntrackedTimeMar12(self):
for i in range(len(composite_trips)):
self.compare_composite_objects(composite_trips[i], expected_trips[i])

def testJackUntrackedTimeMar12InferredSections(self):
# Setup to use the inferred sections
self.analysis_conf_path = \
etc.set_analysis_config("analysis.result.section.key", "analysis/inferred_section")
# along with the proper random seed
self.seed_mode_path = etc.copy_dummy_seed_for_inference()
dataFile = "emission/tests/data/real_examples/jack_untracked_time_2023-03-12"
etc.setupRealExample(self, dataFile)
etc.runIntakePipeline(self.testUUID)
ts = esta.TimeSeries.get_time_series(self.testUUID)
composite_trips = list(ts.find_entries(["analysis/composite_trip"], None))
with open(dataFile+".inferred_section.expected_composite_trips") as expectation:
expected_trips = json.load(expectation, object_hook = esj.wrapped_object_hook)
self.assertEqual(len(composite_trips), len(expected_trips))
for i in range(len(composite_trips)):
self.compare_composite_objects(composite_trips[i], expected_trips[i])


def testShankariNotUntrackedTimeMar21(self):
# https://github.com/e-mission/e-mission-docs/issues/870
# This data *used to* process with untracked time.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import unittest
import logging
import json
import bson.json_util as bju
import argparse
import numpy as np
import uuid

import emission.core.wrapper.entry as ecwe
import emission.analysis.userinput.matcher as eaum
import emission.storage.decorations.trip_queries as esdt
import emission.storage.timeseries.abstract_timeseries as esta

import emission.core.wrapper.motionactivity as ecwma
import emission.core.wrapper.modeprediction as ecwmp

import emission.analysis.userinput.matcher as eaum

# Test imports
import emission.tests.common as etc

class TestConfirmedObjectFakeData(unittest.TestCase):

def setUp(self):
self.testUUID = uuid.uuid4()
self.test_ts = esta.TimeSeries.get_time_series(self.testUUID)
self.BLANK_RESULT={"distance": {}, "duration": {}, "count": {}}

def tearDown(self):
logging.debug("Clearing related databases for %s" % self.testUUID)
self.clearRelatedDb()
logging.info("tearDown complete")


def clearRelatedDb(self):
import emission.core.get_database as edb
logging.info("Timeseries delete result %s" % edb.get_timeseries_db().delete_many({"user_id": self.testUUID}).raw_result)
logging.info("Analysis delete result %s" % edb.get_analysis_timeseries_db().delete_many({"user_id": self.testUUID}).raw_result)
logging.info("Usercache delete result %s" % edb.get_usercache_db().delete_many({"user_id": self.testUUID}).raw_result)

def testGetSectionSummaryCleaned(self):
fake_ct = ecwe.Entry({"metadata": {"key": "analysis/cleaned_trip"}, "data": {}})
fake_ct_id = self.test_ts.insert(fake_ct)
fake_cs_list = [
ecwe.Entry({"metadata": {"key": "analysis/cleaned_section", "write_ts": 1},
"data": {"distance": 500, "duration": 50, "sensed_mode": ecwma.MotionTypes['BICYCLING'].value,
"trip_id": fake_ct["_id"]}}),
ecwe.Entry({"metadata": {"key": "analysis/cleaned_section", "write_ts": 2},
"data": {"distance": 200, "duration": 20, "sensed_mode": ecwma.MotionTypes['BICYCLING'].value,
"trip_id": fake_ct["_id"]}}),
ecwe.Entry({"metadata": {"key": "analysis/cleaned_section", "write_ts": 3},
"data": {"distance": 200, "duration": 20, "sensed_mode": ecwma.MotionTypes['UNKNOWN'].value,
"trip_id": fake_ct["_id"]}}),
ecwe.Entry({"metadata": {"key": "analysis/cleaned_section", "write_ts": 4},
"data": {"distance": 400, "duration": 40, "sensed_mode": ecwma.MotionTypes['IN_VEHICLE'].value,
"trip_id": fake_ct["_id"]}}),
ecwe.Entry({"metadata": {"key": "analysis/cleaned_section", "write_ts": 5},
"data": {"distance": 300, "duration": 30, "sensed_mode": ecwma.MotionTypes['WALKING'].value,
"trip_id": fake_ct["_id"]}})]

fake_cs_ids = [self.test_ts.insert(fake_cs) for fake_cs in fake_cs_list]

cleaned_section_summary = eaum.get_section_summary(self.test_ts, fake_ct, "analysis/cleaned_section")
logging.debug(cleaned_section_summary)
self.assertEqual(cleaned_section_summary['distance'],
{'BICYCLING': 700, 'UNKNOWN': 200, 'IN_VEHICLE': 400, 'WALKING': 300})
self.assertEqual(cleaned_section_summary['duration'],
{'BICYCLING': 70, 'UNKNOWN': 20, 'IN_VEHICLE': 40, 'WALKING': 30})
self.assertEqual(cleaned_section_summary['count'],
{'BICYCLING': 2, 'UNKNOWN': 1, 'IN_VEHICLE': 1, 'WALKING': 1})

def testGetSectionSummaryInferred(self):
fake_ct = ecwe.Entry({"metadata": {"key": "analysis/cleaned_trip"}, "data": {}})
fake_ct_id = self.test_ts.insert(fake_ct)
fake_cs_list = [
ecwe.Entry({"metadata": {"key": "analysis/inferred_section", "write_ts": 1},
"data": {"distance": 500, "duration": 50, "sensed_mode": ecwmp.PredictedModeTypes['BUS'].value,
"trip_id": fake_ct["_id"]}}),
ecwe.Entry({"metadata": {"key": "analysis/inferred_section", "write_ts": 2},
"data": {"distance": 200, "duration": 20, "sensed_mode": ecwmp.PredictedModeTypes['BUS'].value,
"trip_id": fake_ct["_id"]}}),
ecwe.Entry({"metadata": {"key": "analysis/inferred_section", "write_ts": 2},
"data": {"distance": 200, "duration": 20, "sensed_mode": ecwmp.PredictedModeTypes['TRAIN'].value,
"trip_id": fake_ct["_id"]}}),
ecwe.Entry({"metadata": {"key": "analysis/inferred_section", "write_ts": 3},
"data": {"distance": 300, "duration": 30, "sensed_mode": ecwmp.PredictedModeTypes['CAR'].value,
"trip_id": fake_ct["_id"]}})]

fake_cs_ids = [self.test_ts.insert(fake_cs) for fake_cs in fake_cs_list]

inferred_section_summary = eaum.get_section_summary(self.test_ts, fake_ct, "analysis/inferred_section")
logging.debug(inferred_section_summary)
self.assertEqual(inferred_section_summary['distance'],
{'BUS': 700, 'TRAIN': 200, 'CAR': 300})
self.assertEqual(inferred_section_summary['duration'],
{'BUS': 70, 'TRAIN': 20, 'CAR': 30})
self.assertEqual(inferred_section_summary['count'],
{'BUS': 2, 'TRAIN': 1, 'CAR': 1})

def testNoSections(self):
fake_ct = ecwe.Entry({"metadata": {"key": "analysis/cleaned_trip"}, "data": {}})
fake_ct_id = self.test_ts.insert(fake_ct)
self.assertEqual(eaum.get_section_summary(self.test_ts, fake_ct, "analysis/inferred_section"),
self.BLANK_RESULT)
self.assertEqual(eaum.get_section_summary(self.test_ts, fake_ct, "analysis/cleaned_section"),
self.BLANK_RESULT)

def testInvalidInput(self):
with self.assertRaises(TypeError) as te:
eaum.get_section_summary(self.test_ts, None, "foobar")
self.assertEqual(str(te.exception), "'NoneType' object is not subscriptable")

with self.assertRaises(KeyError) as ke:
fake_ct = ecwe.Entry({"metadata": {"key": "analysis/cleaned_trip"}, "data": {}})
eaum.get_section_summary(self.test_ts, fake_ct, "foobar")
self.assertEqual(str(ke.exception), "'user_id'")

fake_ct = ecwe.Entry({"metadata": {"key": "analysis/cleaned_trip"}, "data": {}})
fake_ct_id = self.test_ts.insert(fake_ct)
self.assertEqual(eaum.get_section_summary(self.test_ts, fake_ct, "foobar"), self.BLANK_RESULT)

if __name__ == '__main__':
etc.configLogging()
unittest.main()
Loading

0 comments on commit dc2394f

Please sign in to comment.