Merge pull request #2 from shankari/trb_2017

Percom analysis + adapt the notebook script to read config
e-mission · Mar 7, 2018 · 3565a46 · 3565a46
2 parents 20d9069 + 3fe973c
commit 3565a46
Show file tree

Hide file tree

Showing 13 changed files with 17,961 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,11 @@
 This repository contains ipython notebooks for the evaluation of the e-mission platform
+- Check out the e-mission server repository
+    https://github.com/e-mission/e-mission-server
+- Set the `EMISSION_SERVER_HOME` environment variable
+    ```
+    $ export EMISSION_SERVER_HOME=<path to e-mission-server repo>
+    ```
 - To start the notebooks, use `bin/em-jupyter-notebook.sh` instead of `jupyter notebook`
-  You may need to edit the path to the e-mission server directory to match your system
 - To get the data for the notebooks to run on, look at the dataset listed at
   the top of the notebook, and request the data for research purposes using 
     https://github.com/e-mission/e-mission-server/wiki/Requesting-data-as-a-collaborator
diff --git a/bin/em-jupyter-notebook.sh b/bin/em-jupyter-notebook.sh
@@ -5,4 +5,4 @@
 
 # Make sure that the python here is the anaconda python if that is not the one in the path
 
-PYTHONPATH=$HOME/e-mission/e-mission-server jupyter notebook $*
+PYTHONPATH=$EMISSION_SERVER_HOME jupyter notebook $*
diff --git a/percom_moves_collect_2014/Project_scratch_mode_inference.ipynb b/percom_moves_collect_2014/Project_scratch_mode_inference.ipynb
diff --git a/percom_moves_collect_2014/README.md b/percom_moves_collect_2014/README.md
@@ -0,0 +1,38 @@
+### Overview
+
+This directory contains the analysis included in the PerCom 2014 paper. Unfortunately, the underlying dataset cannot be published because it contains privacy-sensitive location data. However, the corresponding author can run analyses based on the notebooks here against the raw data and return the results.
+
+The analysis is represented in three notebooks:
+- `mode_inference_percom_july_2014_data.ipynb`: base notebook. Since results are not included, can easily track changes to the analysis scripts. Needs access to the raw dataset to re-generate results.
+- `mode_inference_percom_july_2014_data_with_result.ipynb`: copy of previous notebook with results included. Since results are included, diffs are going to be very hard to parse, but it shows a lot more detailed results than the paper without needing access to the data.
+- `Project_scratch_mode_inference.ipynb`: original notebook used for paper. has bitrotted and cannot be run any more, but is useful as the source of the original results
+
+### Running
+
+The notebook *must* be run from the current directory because it uses some files that are relative to this directory.
+
+1. Install the e-mission server, including setting it up
+
+1. Set the home environment variable
+
+```
+$ export EMISSION_SERVER_HOME=<path_to_emission_server_repo>
+```
+
+1. Set up this analysis
+
+```
+$ source setup.sh
+```
+
+1. Start the notebook server
+
+```
+$ ../bin/em-jupyter-notebook.sh
+```
+
+After completing analysis, tear down
+
+```
+$ source teardown.sh
+```
diff --git a/percom_moves_collect_2014/TestFeatureCalc.py b/percom_moves_collect_2014/TestFeatureCalc.py
@@ -0,0 +1,176 @@
+import unittest
+import json
+from featurecalc import calDistance, calSpeed, calHeading, calAvgSpeed, calSpeeds, calAccels, getIthMaxSpeed, getIthMaxAccel
+import pygeocoder
+
+
+class TestFeatureCalc(unittest.TestCase):
+  # All the test data is obtained by using 
+  # Sections.find_one({"$and": [{'type': 'move'}, {'confirmed_mode': <modeId>}]})
+  # and then picking the first two points from the track points for that section
+  def setUp(self):
+    self.walktp1 = {"track_location": {"type": "Point",
+        "coordinates": [37.8724267522, -122.2593326013]}, "time": "20140415T182218Z"}
+    self.walktp2 = {"track_location": {"type": "Point",
+        "coordinates": [37.8722939116, -122.2594439528]}, "time": "20140415T182251Z"}
+
+    self.biketp1 = {"track_location": {"type": "Point", "coordinates": [37.8385561216, -122.2495945853]}, "time": "20140418T181035-0700"}
+    self.biketp2 = {"track_location": {"type": "Point", "coordinates": [37.838834329, -122.249646471]}, "time": "20140418T181100-0700"}
+
+    self.traintp1 = {"track_location": {"type": "Point", "coordinates": [37.8419243845, -122.251608766]}, "time": "20140418T083731-0700"}
+    self.traintp2 = {"track_location": {"type": "Point", "coordinates": [37.841983358, -122.2516275124]}, "time": "20140418T083748-0700"}
+
+    self.cartp1 = {"track_location": {"type": "Point", "coordinates": [37.8796206126, -122.272393763]}, "time": "20140418T013558Z"}
+    self.cartp2 = {"track_location": {"type": "Point", "coordinates": [37.8796948352, -122.2724807525]}, "time": "20140418T013618Z"}
+
+    from datetime import datetime
+
+    segments = json.load(open("testFeatureCalcData.json"))
+    self.walkSegment = segments[0]
+    self.walkSegment['section_start_datetime'] = datetime(2014, 4, 15, 18, 22, 18)
+    self.walkSegment['section_end_datetime'] = datetime(2014, 4, 15, 18, 31, 27)
+
+    self.bikeSegment = segments[1]
+    self.bikeSegment['section_start_datetime'] = datetime(2014, 4, 19, 1, 10, 35)
+    self.bikeSegment['section_end_datetime'] = datetime(2014, 4, 19, 1, 23, 16)
+
+    self.trainSegment = segments[2]
+    self.trainSegment['section_start_datetime'] = datetime(2014, 4, 18, 15, 37, 31)
+    self.trainSegment['section_end_datetime'] = datetime(2014, 4, 18, 15, 48, 3)
+
+    self.carSegment = segments[3]
+    self.carSegment['section_start_datetime'] = datetime(2014, 4, 18, 1, 35, 58)
+    self.carSegment['section_end_datetime'] = datetime(2014, 4, 18, 1, 42, 46)
+
+  # We spot check by using real values from the test data and comparing them to
+  # the calculations at http://www.movable-type.co.uk/scripts/latlong.html
+  def testCalDistance(self):
+    self.assertAlmostEqual(
+        calDistance([37.8724267522, -122.2593326013], [37.8678553385, -122.2597410423]),
+          509.6, places=1)
+
+    self.assertAlmostEqual(
+      calDistance(self.walktp1['track_location']['coordinates'], self.walktp2['track_location']['coordinates']),
+      17.71, places=1)
+
+    # points obtained from a bike trip
+    self.assertAlmostEqual(
+      calDistance(self.biketp1['track_location']['coordinates'], self.biketp2['track_location']['coordinates']),
+      31.27, places=1)
+
+    self.assertAlmostEqual(
+      calDistance(self.traintp1['track_location']['coordinates'], self.traintp2['track_location']['coordinates']),
+      6.761, places=1)
+
+    self.assertAlmostEqual(
+      calDistance(self.cartp1['track_location']['coordinates'], self.cartp2['track_location']['coordinates']),
+      11.24, places=1)
+
+  def testCalcSpeed(self):
+    # points obtained from a walk trip
+    self.assertAlmostEqual(
+      calSpeed(self.walktp1, self.walktp2), 0.53666, places=2)
+
+    self.assertAlmostEqual(
+      calSpeed(self.biketp1, self.biketp2), 1.25, places=2)
+
+    self.assertAlmostEqual(
+      calSpeed(self.traintp1, self.traintp2), 0.3977, places=2)
+
+    self.assertAlmostEqual(
+      calSpeed(self.cartp1, self.cartp2), 0.562, places=2)
+
+  def testCalcHeading(self):
+    # points from a walking trip
+    self.assertAlmostEqual(
+      calHeading(self.walktp1['track_location']['coordinates'], self.walktp2['track_location']['coordinates']),
+      -147, places=0)
+
+    self.assertAlmostEqual(
+      calHeading(self.biketp1['track_location']['coordinates'], self.biketp2['track_location']['coordinates']),
+      -8.37, places=0)
+
+    self.assertAlmostEqual(
+      calHeading(self.traintp1['track_location']['coordinates'], self.traintp2['track_location']['coordinates']),
+      -14.09, places=0)
+
+    self.assertAlmostEqual(
+      calHeading(self.cartp1['track_location']['coordinates'], self.cartp2['track_location']['coordinates']),
+      -43, places=0)
+
+    # The website returns only a positive heading - it converts a negative heading to positive
+    # by subtracting from 360. I think we can deal with negative headings, so we don't subtract
+    # but then we need to fix the value that we compare against
+    # 184deg 02'04''
+    self.assertAlmostEqual(
+        calHeading([37.8724267522, -122.2593326013], [37.8678553385, -122.2597410423]),
+          - (360 - 184.035), places=1)
+
+  def testAvgSpeeds(self):
+    self.assertAlmostEqual(
+      calAvgSpeed(self.walkSegment), 1.14025, places = 2)
+
+    self.assertAlmostEqual(
+      calAvgSpeed(self.bikeSegment), 4.92509, places = 2)
+
+    self.assertAlmostEqual(
+      calAvgSpeed(self.trainSegment), 4.36708, places = 2)
+
+    self.assertAlmostEqual(
+      calAvgSpeed(self.carSegment), 4.52696, places = 2)
+
+  def testSegSpeeds(self):
+    self.assertAlmostEqual(
+      calSpeeds(self.walkSegment)[0], 0.53666, places=2)
+
+    self.assertAlmostEqual(
+      calSpeeds(self.bikeSegment)[0], 1.25, places=2)
+
+    self.assertAlmostEqual(
+      calSpeeds(self.trainSegment)[0], 0.3977, places=2)
+
+    self.assertAlmostEqual(
+      calSpeeds(self.carSegment)[0], 0.562, places=2)
+
+  def testSegSecondSpeeds(self):
+    self.assertAlmostEqual(
+      calSpeeds(self.walkSegment)[1], 0.47711, places=2) # 52 secs
+
+    self.assertAlmostEqual(
+      calSpeeds(self.bikeSegment)[1], 2.05027, places=2) # 181 secs
+
+    self.assertAlmostEqual(
+      calSpeeds(self.trainSegment)[1], 5.61904, places=2) # 21 secs
+
+    self.assertAlmostEqual(
+      calSpeeds(self.carSegment)[1], 0.26278, places=2) # 19 secs
+
+  def testFirstAccel(self):
+    self.assertAlmostEqual(
+      calAccels(self.walkSegment)[0], 0.01626, places=3) # 33 secs
+
+    self.assertAlmostEqual(
+      calAccels(self.bikeSegment)[0], 0.05, places=4) # 25 secs
+
+    self.assertAlmostEqual(
+      calAccels(self.trainSegment)[0], 0.02339, places=4) # 17 secs
+
+    self.assertAlmostEqual(
+      calAccels(self.carSegment)[0], 0.02810, places=4) # 20 secs
+
+  def testSecondAccel(self):
+    self.assertAlmostEqual(
+      calAccels(self.walkSegment)[1], -0.00114, places=4) # 52 secs
+
+    self.assertAlmostEqual(
+      calAccels(self.bikeSegment)[1], 0.00442, places=4) # 181 secs
+
+    self.assertAlmostEqual(
+      calAccels(self.trainSegment)[1], 0.24863, places=3) # 21 secs
+
+    self.assertAlmostEqual(
+      calAccels(self.carSegment)[1], -0.01574, places=4) # 19 secs
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/percom_moves_collect_2014/displayHelpers.py b/percom_moves_collect_2014/displayHelpers.py
@@ -0,0 +1,115 @@
+import matplotlib.pyplot as plt
+import numpy as np
+
+def showCategoryChart(nameList, countListList, countLabelList, countColorList, ylabel, title, cleanNameDict = None, figsize = None, width=0.35, barLabelSize=None):
+    N = len(countListList[0])
+    print(N)
+
+    ind = np.arange(N)  # the x locations for the groups
+
+    if figsize:
+      fig, ax = plt.subplots(figsize=figsize)
+    else: 
+      fig, ax = plt.subplots()
+
+    rectList = []
+    for i, countList in enumerate(countListList):
+      # print "Added bar for %s" % countList
+      currInd = np.arange(len(countList))
+      if countColorList != None:
+        currColor = countColorList[i]
+      else:
+        currColor = np.random.rand(3,1)
+      rectList.append(ax.bar(currInd + i * width, countList, width, color = currColor))
+      # rectList.append(ax.bar(ind, countList, width, color = 'r'))
+
+    cleanedNameList = []
+    if cleanNameDict:
+      for name in nameList:
+          if name in cleanNameDict:
+              cleanedNameList.append(cleanNameDict[name])
+          else:
+              cleanedNameList.append(name)
+    else:
+      cleanedNameList = nameList
+
+    # add some
+    ax.set_ylabel(ylabel)
+    ax.set_title(title)
+    ax.set_xticks(ind+width/2)
+    # ax.set_xticklabels(cleanedNameList, rotation = 45, rotation_mode = "anchor")
+    ax.set_xticklabels(cleanedNameList)
+
+    print("len(rectList) = %d, len(countLabelList) = %d" % (len(rectList), len(countLabelList)))
+    if len(countLabelList) > 1:
+      if len(countLabelList) > 3:
+        nCols = len(countLabelList) // 3
+      else:
+        nCols = len(countLabelList)
+      plt.legend(rectList, countLabelList, loc="best", framealpha=0.3, ncol = nCols + 1)
+      # ax.legend(rectList, countLabelList)
+
+    def autolabel(rects):
+        # attach some text labels
+        for rect in rects:
+            height = rect.get_height()
+            ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%d'%int(height),
+                    ha='center', va='bottom', size=barLabelSize)
+
+    if not barLabelSize == None:
+      for rect in rectList:
+        autolabel(rect)
+    return (fig, ax)
+
+
+def showHorizCategoryChart(nameList, countListList, countLabelList, countColorList, ylabel, title, cleanNameDict = None, figsize = None, width=0.35, barLabelSize=None):
+    N = len(countListList[0])
+    print(N)
+
+    ind = np.arange(N)  # the x locations for the groups
+
+    if figsize:
+      fig, ax = plt.subplots(figsize=figsize)
+    else: 
+      fig, ax = plt.subplots()
+
+    rectList = []
+    for i, countList in enumerate(countListList):
+      currInd = np.arange(len(countList))
+      rectList.append(ax.barh(currInd + i * width, countList, width, color = countColorList[i]))
+      # rectList.append(ax.bar(ind, countList, width, color = 'r'))
+
+    cleanedNameList = []
+    if cleanNameDict:
+      for name in nameList:
+          if name in cleanNameDict:
+              cleanedNameList.append(cleanNameDict[name])
+          else:
+              cleanedNameList.append(name)
+    else:
+      cleanedNameList = nameList
+
+    # add some
+    ax.set_xlabel(ylabel)
+    ax.set_title(title)
+    ax.set_yticks(ind+width/2)
+    # ax.set_xticklabels(cleanedNameList, rotation = 45, rotation_mode = "anchor")
+    ax.set_yticklabels(cleanedNameList)
+
+    if len(countLabelList) > 1:
+      # ax.legend(rectList, countLabelList)
+      plt.legend(rectList, countLabelList, loc="best", framealpha=0.5)
+
+    def autolabel(rects):
+        # attach some text labels
+        for rect in rects:
+            height = rect.get_height()
+            bw = rect.get_width()
+            print(rect.get_y(), rect.get_height(), bw)
+            ax.text(bw + 0.02 * (ax.get_xlim()[1] - ax.get_xlim()[0]),
+                    rect.get_y()+rect.get_height()/2.,'%d'%int(bw),
+                    ha='left', va='center', size=barLabelSize)
+
+    for rect in rectList:
+      autolabel(rect)
+    return (fig, ax)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -5,4 +5,4 @@

		# Make sure that the python here is the anaconda python if that is not the one in the path

		PYTHONPATH=$HOME/e-mission/e-mission-server jupyter notebook $*
		PYTHONPATH=$EMISSION_SERVER_HOME jupyter notebook $*