Skip to content

Commit

Permalink
Merge pull request #2 from shankari/trb_2017
Browse files Browse the repository at this point in the history
Percom analysis + adapt the notebook script to read config
  • Loading branch information
shankari authored Mar 7, 2018
2 parents 20d9069 + 3fe973c commit 3565a46
Show file tree
Hide file tree
Showing 13 changed files with 17,961 additions and 2 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
This repository contains ipython notebooks for the evaluation of the e-mission platform
- Check out the e-mission server repository
https://github.com/e-mission/e-mission-server
- Set the `EMISSION_SERVER_HOME` environment variable
```
$ export EMISSION_SERVER_HOME=<path to e-mission-server repo>
```
- To start the notebooks, use `bin/em-jupyter-notebook.sh` instead of `jupyter notebook`
You may need to edit the path to the e-mission server directory to match your system
- To get the data for the notebooks to run on, look at the dataset listed at
the top of the notebook, and request the data for research purposes using
https://github.com/e-mission/e-mission-server/wiki/Requesting-data-as-a-collaborator
2 changes: 1 addition & 1 deletion bin/em-jupyter-notebook.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@

# Make sure that the python here is the anaconda python if that is not the one in the path

PYTHONPATH=$HOME/e-mission/e-mission-server jupyter notebook $*
PYTHONPATH=$EMISSION_SERVER_HOME jupyter notebook $*
8,613 changes: 8,613 additions & 0 deletions percom_moves_collect_2014/Project_scratch_mode_inference.ipynb

Large diffs are not rendered by default.

38 changes: 38 additions & 0 deletions percom_moves_collect_2014/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
### Overview

This directory contains the analysis included in the PerCom 2014 paper. Unfortunately, the underlying dataset cannot be published because it contains privacy-sensitive location data. However, the corresponding author can run analyses based on the notebooks here against the raw data and return the results.

The analysis is represented in three notebooks:
- `mode_inference_percom_july_2014_data.ipynb`: base notebook. Since results are not included, can easily track changes to the analysis scripts. Needs access to the raw dataset to re-generate results.
- `mode_inference_percom_july_2014_data_with_result.ipynb`: copy of previous notebook with results included. Since results are included, diffs are going to be very hard to parse, but it shows a lot more detailed results than the paper without needing access to the data.
- `Project_scratch_mode_inference.ipynb`: original notebook used for paper. has bitrotted and cannot be run any more, but is useful as the source of the original results

### Running

The notebook *must* be run from the current directory because it uses some files that are relative to this directory.

1. Install the e-mission server, including setting it up

1. Set the home environment variable

```
$ export EMISSION_SERVER_HOME=<path_to_emission_server_repo>
```

1. Set up this analysis

```
$ source setup.sh
```

1. Start the notebook server

```
$ ../bin/em-jupyter-notebook.sh
```

After completing analysis, tear down

```
$ source teardown.sh
```
176 changes: 176 additions & 0 deletions percom_moves_collect_2014/TestFeatureCalc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
import unittest
import json
from featurecalc import calDistance, calSpeed, calHeading, calAvgSpeed, calSpeeds, calAccels, getIthMaxSpeed, getIthMaxAccel
import pygeocoder


class TestFeatureCalc(unittest.TestCase):
# All the test data is obtained by using
# Sections.find_one({"$and": [{'type': 'move'}, {'confirmed_mode': <modeId>}]})
# and then picking the first two points from the track points for that section
def setUp(self):
self.walktp1 = {"track_location": {"type": "Point",
"coordinates": [37.8724267522, -122.2593326013]}, "time": "20140415T182218Z"}
self.walktp2 = {"track_location": {"type": "Point",
"coordinates": [37.8722939116, -122.2594439528]}, "time": "20140415T182251Z"}

self.biketp1 = {"track_location": {"type": "Point", "coordinates": [37.8385561216, -122.2495945853]}, "time": "20140418T181035-0700"}
self.biketp2 = {"track_location": {"type": "Point", "coordinates": [37.838834329, -122.249646471]}, "time": "20140418T181100-0700"}

self.traintp1 = {"track_location": {"type": "Point", "coordinates": [37.8419243845, -122.251608766]}, "time": "20140418T083731-0700"}
self.traintp2 = {"track_location": {"type": "Point", "coordinates": [37.841983358, -122.2516275124]}, "time": "20140418T083748-0700"}

self.cartp1 = {"track_location": {"type": "Point", "coordinates": [37.8796206126, -122.272393763]}, "time": "20140418T013558Z"}
self.cartp2 = {"track_location": {"type": "Point", "coordinates": [37.8796948352, -122.2724807525]}, "time": "20140418T013618Z"}

from datetime import datetime

segments = json.load(open("testFeatureCalcData.json"))
self.walkSegment = segments[0]
self.walkSegment['section_start_datetime'] = datetime(2014, 4, 15, 18, 22, 18)
self.walkSegment['section_end_datetime'] = datetime(2014, 4, 15, 18, 31, 27)

self.bikeSegment = segments[1]
self.bikeSegment['section_start_datetime'] = datetime(2014, 4, 19, 1, 10, 35)
self.bikeSegment['section_end_datetime'] = datetime(2014, 4, 19, 1, 23, 16)

self.trainSegment = segments[2]
self.trainSegment['section_start_datetime'] = datetime(2014, 4, 18, 15, 37, 31)
self.trainSegment['section_end_datetime'] = datetime(2014, 4, 18, 15, 48, 3)

self.carSegment = segments[3]
self.carSegment['section_start_datetime'] = datetime(2014, 4, 18, 1, 35, 58)
self.carSegment['section_end_datetime'] = datetime(2014, 4, 18, 1, 42, 46)

# We spot check by using real values from the test data and comparing them to
# the calculations at http://www.movable-type.co.uk/scripts/latlong.html
def testCalDistance(self):
self.assertAlmostEqual(
calDistance([37.8724267522, -122.2593326013], [37.8678553385, -122.2597410423]),
509.6, places=1)

self.assertAlmostEqual(
calDistance(self.walktp1['track_location']['coordinates'], self.walktp2['track_location']['coordinates']),
17.71, places=1)

# points obtained from a bike trip
self.assertAlmostEqual(
calDistance(self.biketp1['track_location']['coordinates'], self.biketp2['track_location']['coordinates']),
31.27, places=1)

self.assertAlmostEqual(
calDistance(self.traintp1['track_location']['coordinates'], self.traintp2['track_location']['coordinates']),
6.761, places=1)

self.assertAlmostEqual(
calDistance(self.cartp1['track_location']['coordinates'], self.cartp2['track_location']['coordinates']),
11.24, places=1)

def testCalcSpeed(self):
# points obtained from a walk trip
self.assertAlmostEqual(
calSpeed(self.walktp1, self.walktp2), 0.53666, places=2)

self.assertAlmostEqual(
calSpeed(self.biketp1, self.biketp2), 1.25, places=2)

self.assertAlmostEqual(
calSpeed(self.traintp1, self.traintp2), 0.3977, places=2)

self.assertAlmostEqual(
calSpeed(self.cartp1, self.cartp2), 0.562, places=2)

def testCalcHeading(self):
# points from a walking trip
self.assertAlmostEqual(
calHeading(self.walktp1['track_location']['coordinates'], self.walktp2['track_location']['coordinates']),
-147, places=0)

self.assertAlmostEqual(
calHeading(self.biketp1['track_location']['coordinates'], self.biketp2['track_location']['coordinates']),
-8.37, places=0)

self.assertAlmostEqual(
calHeading(self.traintp1['track_location']['coordinates'], self.traintp2['track_location']['coordinates']),
-14.09, places=0)

self.assertAlmostEqual(
calHeading(self.cartp1['track_location']['coordinates'], self.cartp2['track_location']['coordinates']),
-43, places=0)

# The website returns only a positive heading - it converts a negative heading to positive
# by subtracting from 360. I think we can deal with negative headings, so we don't subtract
# but then we need to fix the value that we compare against
# 184deg 02'04''
self.assertAlmostEqual(
calHeading([37.8724267522, -122.2593326013], [37.8678553385, -122.2597410423]),
- (360 - 184.035), places=1)

def testAvgSpeeds(self):
self.assertAlmostEqual(
calAvgSpeed(self.walkSegment), 1.14025, places = 2)

self.assertAlmostEqual(
calAvgSpeed(self.bikeSegment), 4.92509, places = 2)

self.assertAlmostEqual(
calAvgSpeed(self.trainSegment), 4.36708, places = 2)

self.assertAlmostEqual(
calAvgSpeed(self.carSegment), 4.52696, places = 2)

def testSegSpeeds(self):
self.assertAlmostEqual(
calSpeeds(self.walkSegment)[0], 0.53666, places=2)

self.assertAlmostEqual(
calSpeeds(self.bikeSegment)[0], 1.25, places=2)

self.assertAlmostEqual(
calSpeeds(self.trainSegment)[0], 0.3977, places=2)

self.assertAlmostEqual(
calSpeeds(self.carSegment)[0], 0.562, places=2)

def testSegSecondSpeeds(self):
self.assertAlmostEqual(
calSpeeds(self.walkSegment)[1], 0.47711, places=2) # 52 secs

self.assertAlmostEqual(
calSpeeds(self.bikeSegment)[1], 2.05027, places=2) # 181 secs

self.assertAlmostEqual(
calSpeeds(self.trainSegment)[1], 5.61904, places=2) # 21 secs

self.assertAlmostEqual(
calSpeeds(self.carSegment)[1], 0.26278, places=2) # 19 secs

def testFirstAccel(self):
self.assertAlmostEqual(
calAccels(self.walkSegment)[0], 0.01626, places=3) # 33 secs

self.assertAlmostEqual(
calAccels(self.bikeSegment)[0], 0.05, places=4) # 25 secs

self.assertAlmostEqual(
calAccels(self.trainSegment)[0], 0.02339, places=4) # 17 secs

self.assertAlmostEqual(
calAccels(self.carSegment)[0], 0.02810, places=4) # 20 secs

def testSecondAccel(self):
self.assertAlmostEqual(
calAccels(self.walkSegment)[1], -0.00114, places=4) # 52 secs

self.assertAlmostEqual(
calAccels(self.bikeSegment)[1], 0.00442, places=4) # 181 secs

self.assertAlmostEqual(
calAccels(self.trainSegment)[1], 0.24863, places=3) # 21 secs

self.assertAlmostEqual(
calAccels(self.carSegment)[1], -0.01574, places=4) # 19 secs


if __name__ == '__main__':
unittest.main()
115 changes: 115 additions & 0 deletions percom_moves_collect_2014/displayHelpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import matplotlib.pyplot as plt
import numpy as np

def showCategoryChart(nameList, countListList, countLabelList, countColorList, ylabel, title, cleanNameDict = None, figsize = None, width=0.35, barLabelSize=None):
N = len(countListList[0])
print(N)

ind = np.arange(N) # the x locations for the groups

if figsize:
fig, ax = plt.subplots(figsize=figsize)
else:
fig, ax = plt.subplots()

rectList = []
for i, countList in enumerate(countListList):
# print "Added bar for %s" % countList
currInd = np.arange(len(countList))
if countColorList != None:
currColor = countColorList[i]
else:
currColor = np.random.rand(3,1)
rectList.append(ax.bar(currInd + i * width, countList, width, color = currColor))
# rectList.append(ax.bar(ind, countList, width, color = 'r'))

cleanedNameList = []
if cleanNameDict:
for name in nameList:
if name in cleanNameDict:
cleanedNameList.append(cleanNameDict[name])
else:
cleanedNameList.append(name)
else:
cleanedNameList = nameList

# add some
ax.set_ylabel(ylabel)
ax.set_title(title)
ax.set_xticks(ind+width/2)
# ax.set_xticklabels(cleanedNameList, rotation = 45, rotation_mode = "anchor")
ax.set_xticklabels(cleanedNameList)

print("len(rectList) = %d, len(countLabelList) = %d" % (len(rectList), len(countLabelList)))
if len(countLabelList) > 1:
if len(countLabelList) > 3:
nCols = len(countLabelList) // 3
else:
nCols = len(countLabelList)
plt.legend(rectList, countLabelList, loc="best", framealpha=0.3, ncol = nCols + 1)
# ax.legend(rectList, countLabelList)

def autolabel(rects):
# attach some text labels
for rect in rects:
height = rect.get_height()
ax.text(rect.get_x()+rect.get_width()/2., 1.05*height, '%d'%int(height),
ha='center', va='bottom', size=barLabelSize)

if not barLabelSize == None:
for rect in rectList:
autolabel(rect)
return (fig, ax)


def showHorizCategoryChart(nameList, countListList, countLabelList, countColorList, ylabel, title, cleanNameDict = None, figsize = None, width=0.35, barLabelSize=None):
N = len(countListList[0])
print(N)

ind = np.arange(N) # the x locations for the groups

if figsize:
fig, ax = plt.subplots(figsize=figsize)
else:
fig, ax = plt.subplots()

rectList = []
for i, countList in enumerate(countListList):
currInd = np.arange(len(countList))
rectList.append(ax.barh(currInd + i * width, countList, width, color = countColorList[i]))
# rectList.append(ax.bar(ind, countList, width, color = 'r'))

cleanedNameList = []
if cleanNameDict:
for name in nameList:
if name in cleanNameDict:
cleanedNameList.append(cleanNameDict[name])
else:
cleanedNameList.append(name)
else:
cleanedNameList = nameList

# add some
ax.set_xlabel(ylabel)
ax.set_title(title)
ax.set_yticks(ind+width/2)
# ax.set_xticklabels(cleanedNameList, rotation = 45, rotation_mode = "anchor")
ax.set_yticklabels(cleanedNameList)

if len(countLabelList) > 1:
# ax.legend(rectList, countLabelList)
plt.legend(rectList, countLabelList, loc="best", framealpha=0.5)

def autolabel(rects):
# attach some text labels
for rect in rects:
height = rect.get_height()
bw = rect.get_width()
print(rect.get_y(), rect.get_height(), bw)
ax.text(bw + 0.02 * (ax.get_xlim()[1] - ax.get_xlim()[0]),
rect.get_y()+rect.get_height()/2.,'%d'%int(bw),
ha='left', va='center', size=barLabelSize)

for rect in rectList:
autolabel(rect)
return (fig, ax)
Loading

0 comments on commit 3565a46

Please sign in to comment.