diff --git a/emission/analysis/modelling/trip_model/greedy_similarity_binning.py b/emission/analysis/modelling/trip_model/greedy_similarity_binning.py
index 68f402c2f..d750a451e 100644
--- a/emission/analysis/modelling/trip_model/greedy_similarity_binning.py
+++ b/emission/analysis/modelling/trip_model/greedy_similarity_binning.py
@@ -23,30 +23,57 @@ def __init__(self, config=None):
         [https://github.com/e-mission/e-mission-server/blob/5b9e608154de15e32df4f70a07a5b95477e7dbf5/emission/analysis/modelling/tour_model/similarity.py#L67]
 
         this technique employs a greedy similarity heuristic to associate
-        trips with collections of probabilistic class labels. in pseudocode:
+        trips with collections of probabilistic class labels. new bins are
+        created when the next feature vector is not similar to any existing bins.
+        for a new feature vector to be similar to an existing bin, it must be
+        similar to all of the previous feature vectors found in that bin, by way
+        of a provided similarity metric and threshold value.
+
+        in pseudocode:
         
         # fit
         for each bin_id, bin in bins:
-            for each bin_trip in bin.trips:
-                if similar(trip, bin_trip):
-                    append trip to bin.trips
+            for each bin_feature_row in bin.feature_rows:
+                if not similar(trip.feature_row, bin_feature_row):
+                    return
+                append trip to bin
+
+        the prediction of labels for some input trip takes a similar form,
+        where the first bin that is found to be similar is treated as the 
+        class label to apply:
 
         # prediction
         for each bin_id, bin in bins:
-            for each bin_trip in bin.trips:
-                if similar(trip, bin_trip):
-                    return bin.predictions: List[Prediction]
-
-        the number of predictions is not assumed to be the number of features.
-
-        the original similarity class (link above) used a nested List data 
+            for each bin_feature_row in bin.feature_rows:
+                if not similar(trip.feature_row, bin_feature_row):
+                    break
+            return bin_id
+
+        to train the predictions, label sets are aggregated within a bin so that
+        the occurences of some unique label combination is counted. the probability
+        of a specific unique label combination is assigned by the proportion
+        of counts of this unique label set to the total number of trips stored at
+        this bin. the set of unique label sets and their prediction value are then
+        returned during prediction.
+
+        in terms of the data structure of the model, each bin is a Dictionary with 
+        three fields, "feature_rows", "labels", and "predictions", each a list.
+        whereas the number and index of "feature_rows" and "labels" are assumed to 
+        match and be idempotent across multiple training calls, the "predictions" 
+        are over-written at each call of "fit" and are not assumed to match the number 
+        of "feature_rows" or "labels" stored in a bin.
+
+        historical note: the original similarity class (link above) used a nested list data 
         structure to capture the notion of binning. this was then copied into
-        a Dict when the model needed to be saved. the same technique can be 
-        written to work directly on nested Dicts with no loss in performance. 
+        a Dict when the model needed to be saved. the same technique can be re-written to 
+        work directly on Dictionaries with no loss in the algorithm's time complexity. this 
+        also helps when running in incremental mode to persist relevant training data and to
+        minimize codec + serialization errors.
+
         the data takes the form:
         {
             bin_id: {
-                "features": [
+                "feature_rows": [
                     [f1, f2, .., fn],
                     ...
                 ],
@@ -124,7 +151,7 @@ def predict(self, trip: ecwc.Confirmedtrip) -> Tuple[List[Dict], int]:
             return [], 0
         else:
             predictions = predicted_bin_record['predictions']
-            n_features = len(predicted_bin_record['features'])
+            n_features = len(predicted_bin_record['feature_rows'])
             logging.debug(f"found cluster {predicted_bin_id} with predictions {predictions}")
             return predictions, n_features
 
@@ -155,15 +182,15 @@ def _assign_bins(self, trips: List[ecwc.Confirmedtrip]):
             if bin_id is not None:
                 # add to existing bin
                 logging.debug(f"adding trip to bin {bin_id} with features {trip_features}")
-                self.bins[bin_id]['features'].append(trip_features)
+                self.bins[bin_id]['feature_rows'].append(trip_features)
                 self.bins[bin_id]['labels'].append(trip_labels)
             else:
                 # create new bin
                 new_bin_id = str(len(self.bins))
                 new_bin_record = {
-                    "features": [trip_features],
-                    "labels": [trip_labels],
-                    "predictions": []
+                    'feature_rows': [trip_features],
+                    'labels': [trip_labels],
+                    'predictions': []
                 }
                 logging.debug(f"creating new bin {new_bin_id} at location {trip_features}")
                 self.bins[new_bin_id] = new_bin_record
@@ -178,7 +205,7 @@ def _find_matching_bin_id(self, trip_features: List[float]) -> Optional[str]:
         """
         for bin_id, bin_record in self.bins.items():
                 matches_bin = all([self.metric.similar(trip_features, bin_sample, self.sim_thresh)
-                    for bin_sample in bin_record['features']])
+                    for bin_sample in bin_record['feature_rows']])
                 if matches_bin:
                     return bin_id
         return None
@@ -199,7 +226,7 @@ def _nearest_bin(self, trip: ecwc.Confirmedtrip) -> Tuple[Optional[int], Optiona
         trip_features = self.extract_features(trip)
         
         for bin_id, bin_record in self.bins.items():
-            for bin_features in bin_record['features']:
+            for bin_features in bin_record['feature_rows']:
                 if self.metric.similar(trip_features, bin_features, self.sim_thresh):
                     logging.debug(f"found nearest bin id {bin_id}")
                     logging.debug(f"similar: {trip_features}, {bin_features}")
@@ -216,7 +243,7 @@ def _apply_cutoff(self):
         # the cutoff point is an index along the sorted bins. any bin with a gte
         # index value is removed, as that bin has been found to be smaller than the cutoff.
         # This was the last line of calc_cutoff_bins in the old code, and is moved to the equivalent of delete_bins in the new code
-        bins_sorted =  self.bins.sort(key=lambda bin: len(bin['features']), reverse=True)
+        bins_sorted =  self.bins.sort(key=lambda bin: len(bin['feature_rows']), reverse=True)
         
 
 
@@ -225,7 +252,7 @@ def _apply_cutoff(self):
 #         for i in range(len(self.bins)):
 #             y[i] = len(self.bins[i])
         num_bins = len(bins_sorted)
-        bin_sizes = [len(bin_rec['features']) for bin_rec in bins_sorted.values()]
+        bin_sizes = [len(bin_rec['feature_rows']) for bin_rec in bins_sorted.values()]
         _, cutoff_bin_size = util.find_knee_point(bin_sizes)
         logging.debug(
             "bins = %s, elbow distance = %s" % (num_bins, cutoff_bin_size)
@@ -233,7 +260,7 @@ def _apply_cutoff(self):
 
         updated_bins = {bin_id: bin_rec 
                         for bin_id, bin_rec in bins_sorted.items() 
-                        if len(bin_rec['features']) >= cutoff_bin_size}
+                        if len(bin_rec['feature_rows']) >= cutoff_bin_size}
 
         removed = len(bins_sorted) - len(updated_bins)
         logging.debug(
diff --git a/emission/tests/modellingTests/TestGreedySimilarityBinning.py b/emission/tests/modellingTests/TestGreedySimilarityBinning.py
index 0c0f1a26b..32bed47aa 100644
--- a/emission/tests/modellingTests/TestGreedySimilarityBinning.py
+++ b/emission/tests/modellingTests/TestGreedySimilarityBinning.py
@@ -46,7 +46,7 @@ def testBinning(self):
         model.fit(trips)
 
         # $m trip features should appear together in one bin
-        at_least_one_large_bin = any(map(lambda b: len(b['features']) == m, model.bins.values()))
+        at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model.bins.values()))
         self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it")
 
     def testPrediction(self):
diff --git a/emission/tests/modellingTests/TestRunGreedyIncrementalModel.py b/emission/tests/modellingTests/TestRunGreedyIncrementalModel.py
index b9e51495f..e03a4046b 100644
--- a/emission/tests/modellingTests/TestRunGreedyIncrementalModel.py
+++ b/emission/tests/modellingTests/TestRunGreedyIncrementalModel.py
@@ -186,13 +186,13 @@ def testIncrementalRun(self):
         self.assertEqual(len(updated_model.bins), 3, 
             'there should be three bins, one with 2 similar trips, and two singleton bins')
 
-        trips_in_bin = len(updated_model.bins['0']['features'])
-        print(f'trips in bins: {[len(x["features"]) for x in updated_model.bins.values()]}')
+        trips_in_bin = len(updated_model.bins['0']['feature_rows'])
+        print(f'trips in bins: {[len(x["feature_vectors"]) for x in updated_model.bins.values()]}')
         self.assertEqual(trips_in_bin, self.expected_trips,
             'expected number of trips stored in bin')
 
-        self.assertEqual(len(updated_model.bins['1']['features']), 1,
+        self.assertEqual(len(updated_model.bins['1']['feature_rows']), 1,
             'the second bin should have exactly one entry (an outlier)')
-        self.assertEqual(len(updated_model.bins['2']['features']), 1,
+        self.assertEqual(len(updated_model.bins['2']['feature_rows']), 1,
             'the third bin should have exactly one entry (an outlier)')
         
\ No newline at end of file