Skip to content

Commit

Permalink
Merge pull request #847 from shankari/improve_has_label
Browse files Browse the repository at this point in the history
Create a dataframe version of has_final_labels
  • Loading branch information
shankari authored Jan 15, 2022
2 parents 6bfecc9 + e9fe2ed commit 1f6d6c8
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 0 deletions.
9 changes: 9 additions & 0 deletions emission/storage/decorations/trip_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,15 @@ def has_final_labels(confirmed_trip_data):
return (confirmed_trip_data["user_input"] != {}
or confirmed_trip_data["expectation"]["to_label"] == False)

# Create an alternate method to work on the dataframe column-wise
# instead of iterating over each individual row for improved performance
def has_final_labels_df(df):
# print(df.expectation)
# print(pd.DataFrame(df.expectation.to_list(), index=df.index))
to_list_series = pd.DataFrame(df.expectation.to_list(), index=df.index).to_label
return df[(df.user_input != {})
| (to_list_series == False)]

def get_max_prob_label(inferred_label_list):
# Two columns: "labels" and "p"
label_prob_df = pd.DataFrame(inferred_label_list)
Expand Down
33 changes: 33 additions & 0 deletions emission/tests/storageTests/TestTripQueries.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,39 @@ def testHasFinalLabels(self):
"expectation": {"to_label": False}
})))

def testHasFinalLabelsDataFrame(self):
test_mixed_df = pd.DataFrame(
[{"user_input": {"mode_confirm": "bike", "purpose_confirm": "shopping"},
"expectation": {"to_label": True}}] * 3 +
[{"user_input": {}, "expectation": {"to_label": False},
"inferred_labels":
[{"labels": {"mode_confirm": "bike", "purpose_confirm": "shopping"}, "p": 0.1},
{"labels": {"mode_confirm": "walk", "purpose_confirm": "exercise"}, "p": 0.9}]
}] * 3 +
[{"user_input": {}, "expectation": {"to_label": True},
"inferred_labels":
[{"labels": {"mode_confirm": "bike", "purpose_confirm": "shopping"}, "p": 0.2},
{"labels": {"mode_confirm": "walk", "purpose_confirm": "exercise"}, "p": 0.4},
{"labels": {"mode_confirm": "drove_alone", "purpose_confirm": "work"}, "p": 0.4}]
}] * 3 +
[{"user_input": {}, "expectation": {"to_label": True}}] * 3)

has_user_labels_df = test_mixed_df[test_mixed_df.user_input != {}]
# only the actual user inputs will be counted in the old way
self.assertEqual(has_user_labels_df.shape[0], 3)

# print(test_mixed_df.apply(lambda row: print(row.user_input), axis=1))
self.assertEqual(np.count_nonzero(test_mixed_df.apply(
lambda row: esdt.has_final_labels(row), axis=1)), 6)

has_final_labels_df = test_mixed_df[test_mixed_df.apply(
lambda row: esdt.has_final_labels(row), axis=1)]
# the actual user inputs and to_label = false will be counted in the new way
self.assertEqual(has_final_labels_df.shape[0], 6)

self.assertEqual(esdt.has_final_labels_df(test_mixed_df).shape[0], 6)


def testGetMaxProbLabel(self):
self.assertEqual(esdt.get_max_prob_label([
{'labels': {'mc': 30, 'pc': 40}, 'p': 0.9}]), {'mc': 30, 'pc': 40})
Expand Down

0 comments on commit 1f6d6c8

Please sign in to comment.