Skip to content

Commit

Permalink
Merge pull request #441 from shankari/deploy_fixes
Browse files Browse the repository at this point in the history
Minor post-deploy fixes
  • Loading branch information
shankari authored Oct 27, 2016
2 parents 0b9b2fa + 24ccf27 commit f009ded
Show file tree
Hide file tree
Showing 8 changed files with 473,192 additions and 3 deletions.
4 changes: 3 additions & 1 deletion emission/analysis/intake/cleaning/filter_accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,9 @@ def convert_to_filtered(entry):
return entry

def continuous_collection_in_range(timeseries):
return timeseries.user_id in estag.TEST_PHONE_IDS
import emission.pipeline.scheduler as eps
return timeseries.user_id in estag.TEST_PHONE_IDS and \
timeseries.user_id not in eps.TEMP_HANDLED_PUBLIC_PHONES

def filter_accuracy(user_id):
time_query = epq.get_time_range_for_accuracy_filtering(user_id)
Expand Down
1 change: 1 addition & 0 deletions emission/core/wrapper/transition.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class TransitionType(enum.Enum):
STOP_TRACKING = 3
# android specific transitions
BOOTED = 4
TRACKING_ERROR = 18
# iOS specific transitions
INIT_COMPLETE = 5
TRIP_STARTED = 6
Expand Down
3 changes: 2 additions & 1 deletion emission/net/api/cfc_webapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,8 @@ def summarize_metrics(time_type):
if old_style:
logging.debug("old_style metrics found, returning array of entries instead of array of arrays")
assert(len(metric_list) == 1)
ret_val['user_metrics'] = ret_val['user_metrics'][0]
if 'user_metrics' in ret_val:
ret_val['user_metrics'] = ret_val['user_metrics'][0]
ret_val['aggregate_metrics'] = ret_val['aggregate_metrics'][0]
return ret_val

Expand Down
3 changes: 2 additions & 1 deletion emission/net/usercache/formatters/android/transition.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
"local.transition.exited_geofence": et.TransitionType.EXITED_GEOFENCE,
"local.transition.stopped_moving": et.TransitionType.STOPPED_MOVING,
"local.transition.stop_tracking": et.TransitionType.STOP_TRACKING,
"local.transition.start_tracking": et.TransitionType.START_TRACKING
"local.transition.start_tracking": et.TransitionType.START_TRACKING,
"local.transition.tracking_error": et.TransitionType.TRACKING_ERROR
}

def format(entry):
Expand Down
8 changes: 8 additions & 0 deletions emission/pipeline/scheduler.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
import logging
import multiprocessing as mp
import pandas as pd
from uuid import UUID

import emission.core.get_database as edb
import emission.net.usercache.abstract_usercache as enua
import emission.storage.timeseries.aggregate_timeseries as estag
import emission.pipeline.intake_stage as epi

TEMP_HANDLED_PUBLIC_PHONES = [UUID("95e70727-a04e-3e33-b7fe-34ab19194f8b"),
UUID("fd7b4c2e-2c8b-3bfa-94f0-d1e3ecbd5fb7"),
UUID("3bc0f91f-7660-34a2-b005-5c399598a369")]

def get_split_uuid_lists(n_splits, is_public_pipeline):
get_count = lambda u: enua.UserCache.getUserCache(u).getMessageCount()

Expand All @@ -32,6 +37,9 @@ def get_split_uuid_lists(n_splits, is_public_pipeline):
sel_uuids = [u for u in all_uuids if u in estag.TEST_PHONE_IDS]
else:
sel_uuids = [u for u in all_uuids if u not in estag.TEST_PHONE_IDS]
# Add back the test phones for now so that we can test the data
# collection changes before deploying them in the wild
sel_uuids.extend(TEMP_HANDLED_PUBLIC_PHONES)

sel_jobs = [(u, get_count(u)) for u in sel_uuids]
# non_zero_jobs = [j for j in sel_jobs if j[1] !=0 ]
Expand Down
38 changes: 38 additions & 0 deletions emission/tests/analysisTests/intakeTests/TestPipelineRealData.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,44 @@ def testFeb22MultiSyncEndNotDetected(self):
self.compare_approx_result(ad.AttrDict({'result': api_result}).result,
ad.AttrDict(ground_truth).data, time_fuzz=60, distance_fuzz=100)

def testOct07MultiSyncSpuriousEndDetected(self):
# Re-run, but with multiple calls to sync data
# This tests the effect of online versus offline analysis and segmentation with potentially partial data

dataFile = "emission/tests/data/real_examples/issue_436_assertion_error"
start_ld = ecwl.LocalDate({'year': 2016, 'month': 10, 'day': 07})
end_ld = ecwl.LocalDate({'year': 2016, 'month': 10, 'day': 07})
cacheKey = "diary/trips-2016-10-07"
ground_truth = json.load(open(dataFile+".ground_truth"), object_hook=bju.object_hook)

logging.info("Before loading, timeseries db size = %s" % edb.get_timeseries_db().count())
all_entries = json.load(open(dataFile), object_hook = bju.object_hook)
# 18:01 because the transition was at 2016-02-22T18:00:09.623404-08:00, so right after
# 18:00
ts_1800 = arrow.get("2016-10-07T18:33:11-07:00").timestamp
logging.debug("ts_1800 = %s, converted back = %s" % (ts_1800, arrow.get(ts_1800).to("America/Los_Angeles")))
before_1800_entries = [e for e in all_entries if ad.AttrDict(e).metadata.write_ts <= ts_1800]
after_1800_entries = [e for e in all_entries if ad.AttrDict(e).metadata.write_ts > ts_1800]

# Sync at 18:00 to capture all the points on the trip *to* the optometrist
# Skip the last few points to ensure that the trip end is skipped
import uuid
self.testUUID = uuid.uuid4()
self.entries = before_1800_entries
etc.setupRealExampleWithEntries(self)
etc.runIntakePipeline(self.testUUID)
api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

# Then sync after 18:00
self.entries = after_1800_entries
etc.setupRealExampleWithEntries(self)
etc.runIntakePipeline(self.testUUID)
api_result = gfc.get_geojson_for_dt(self.testUUID, start_ld, end_ld)

# Although we process the day's data in two batches, we should get the same result
self.compare_approx_result(ad.AttrDict({'result': api_result}).result,
ad.AttrDict(ground_truth).data, time_fuzz=60, distance_fuzz=100)

def testZeroDurationPlaceInterpolationSingleSync(self):
# Test for 545114feb5ac15caac4110d39935612525954b71
dataFile_1 = "emission/tests/data/real_examples/shankari_2016-01-12"
Expand Down
Loading

0 comments on commit f009ded

Please sign in to comment.