UDST · Gitiauxx · Sep 24, 2018 · Sep 24, 2018 · Sep 24, 2018 · Sep 24, 2018
diff --git a/environment.yml b/environment.yml
@@ -0,0 +1,48 @@
+name: template-env
+
+channels:
+  - udst  # for orca and pandana
+  - conda-forge  # for choicemodels, statsmodels, and many dependencies
+  - timothyb0912  # for pylogit
+
+dependencies:
+  - python=3.6
+  - geopandas=0.3
+  - jupyter=1.0
+  - line_profiler=2.1
+  - matplotlib=2.2
+  - memory_profiler=0.54
+  - numpy=1.15
+  - orca=1.5
+  - pandana=0.4
+  - pandas=0.23
+  - pylogit=0.2
+  - pytest=3.8
+  - scipy=1.1
+  - statsmodels=0.9
+  - sklearn=0.19.2
+  - dill=0.2.8.2
+
+
+# This Conda environment includes the direct dependencies for template-based UrbanSim 
+# models, plus a variety of other packages that are useful for validation and testing.
+
+# One-time setup (several minutes):
+#   `conda env create -f environment.yml`
+
+# Activate the environment:
+#   `source activate template-env`
+
+# Install development versions of ChoiceModels and UrbanSim Templates 
+# (only needs to be done once, but run git-pull from these directories
+#  periodically to update the codebases)
+
+# Navigate to directory where choicemodels folder should go:
+#   `git clone https://github.com/udst/choicemodels.git`
+#   `cd choicemodels`
+#   `python setup.py develop`
+
+# Navigate to directory where urbansim_templates folder should go:
+#   `git clone https://github.com/udst/urbansim_templates.git`
+#   `cd urbansim_templates`
+#   `python setup.py develop`
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='urbansim_templates',
-    version='0.1.dev13',
+    version='0.1.dev16',
     description='UrbanSim extension for managing model steps',
     author='UrbanSim Inc.',
     author_email='[email protected]',
@@ -21,6 +21,8 @@
         'pandana >= 0.3',
         'pandas >= 0.22',
         'statsmodels >= 0.8',
-        'urbansim >= 3.1.1'
+        'urbansim >= 3.1.1',
+        'sklearn >= 0.19.2',
+        'dill >= 0.2.8.2'
     ]
 )
diff --git a/urbansim_templates/modelmanager.py b/urbansim_templates/modelmanager.py
@@ -2,7 +2,7 @@
 
 import os
 import copy
-import pickle
+import dill as pickle
 from collections import OrderedDict
 
 import orca
@@ -28,7 +28,6 @@ def template(cls):
     """
     _templates[cls.__name__] = cls
     return cls
-
 
 def initialize(path='configs'):
     """
@@ -99,9 +98,9 @@ def build_step(d):
     """
     if 'supplemental_objects' in d:
         for i, item in enumerate(d['supplemental_objects']):
-            content = load_supplemental_object(d['name'], **item)
+            content = load_supplemental_object(d['name'], item['name'], item['content_type'])
             d['supplemental_objects'][i]['content'] = content
-    
+
     return _templates[d['template']].from_dict(d)
 
 
@@ -188,8 +187,12 @@ def save_step_to_disk(step):
     # Save supplemental objects
     if 'supplemental_objects' in d:
         for item in filter(None, d['supplemental_objects']):
-            save_supplemental_object(step.name, **item)
+            content = item['content']
+            content.role = item['object_name']
+            save_supplemental_object(step.name, item['name'], content, item['content_type'])
             del item['content']
+            del item['object_name']
+
 
     # Save main yaml file
     headers = {'modelmanager_version': __version__}
@@ -219,7 +222,7 @@ def save_supplemental_object(step_name, name, content, content_type, required=Tr
 
     """
     if content_type is 'pickle':
-        content.to_pickle(os.path.join(_disk_store, step_name+'-'+name+'.pkl'))
+        pickle.dump(content, open(os.path.join(_disk_store, step_name+'-'+name+'.pkl'), 'wb'))
 
 
 def get_step(name):

diff --git a/urbansim_templates/models/regression.py b/urbansim_templates/models/regression.py
@@ -8,7 +8,11 @@
 from urbansim.models import RegressionModel
 from urbansim.utils import yamlio
 
+
+from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
+
 from .. import modelmanager
+from ..utils import convert_to_model
 from .shared import TemplateStep
 
 
@@ -203,4 +207,176 @@ def run(self):
 
         orca.get_table(tabname).update_col_from_series(colname, values, cast=True)
 
+@modelmanager.template        
+class RandomForestRegressionStep(OLSRegressionStep):
+
+    def __init__(self, tables=None, model_expression=None, filters=None, out_tables=None,
+            out_column=None, out_transform=None, out_filters=None, name=None, tags=[]):
+
+        super().__init__(tables=tables, model_expression=model_expression, filters=filters, out_tables=out_tables,
+            out_column=out_column, out_transform=out_transform, name=name)
+
+        self.cv_metric = None
+        self.importance = None
+
+
+
+    @classmethod
+    def from_dict(cls, d):
+        """
+        Create an object instance from a saved dictionary representation.
+        Use a pickled version of the random forest model
+        Parameters
+        ----------
+        d : dict
+
+        Returns
+        -------
+        RandomForestRegressionStep
+
+        """    
+        # Pass values from the dictionary to the __init__() method
+        obj = cls(tables=d['tables'], model_expression=d['model_expression'], 
+                filters=d['filters'], out_tables=d['out_tables'], 
+                out_column=d['out_column'], out_transform=d['out_transform'],
+                out_filters=d['out_filters'], name=d['name'], tags=d['tags'],
+                )
+
+        # add supplemental objects
+        for i, item in enumerate(d['supplemental_objects']):
+            content = d['supplemental_objects'][i]['content']
+            setattr(obj, content.role, content)
+        return obj
+
+
+    def fit(self):
+        """Fit method to estimate randomForest()
+
+        This function fits a RandomForest() model using the sklearn library,
+        save the results and compute feature importances
+
+        Arguments
+        ------------
+        self: object instance
+
+        """
+
+        # convert model to  a format with similar fit and predict structure as in TemplateStep 
+        self.model = convert_to_model(RandomForestRegressor(), 
+                                      self.model_expression, 
+                                      ytransform=self.out_transform)
+
+        results = self.model.fit(self._get_data())
+        self.name = self._generate_name()
+
+        # compute feature importance
+        importance = self.model.feature_importances_
+        self.importance = {}
+        i = 0
+        for variable in self.model.rhs:
+            self.importance[variable] = float(importance[i])
+            i += 1
+
+
+    def to_dict(self):
+        """
+        Create a dictionary representation of the object.
+
+        Returns
+        -------
+        dict
+
+        """
+        d = TemplateStep.to_dict(self)
+        # Add parameters not in parent class
+        d.update({
+            'cross_validation_metric': self.cv_metric,
+            'features_importance': self.importance
+        })
+
+        # model config is a filepath to a pickled file
+        d['supplemental_objects'] = []
+        d['supplemental_objects'].append({'name': self.name,
+                                    'object_name': 'model',
+                                    'content': self.model, 
+                                    'content_type': 'pickle'})
+
+        return d
+
+    def run(self):
+        """
+        Run the model step: calculate predicted values and use them to update a column.
+
+        The predicted values are written to Orca and also saved to the class object for 
+        interactive use (`predicted_values`, with type pd.Series). But they are not saved 
+        in the dictionary representation of the model step.
+
+        """
+        # TO DO - figure out what we can infer about requirements for the underlying data
+        # and write an 'orca_test' assertion to confirm compliance.
+
+        output_column = self._get_out_column()
+        data = self._get_data('predict')
+
+        values = self.model.predict(self._get_data('predict'))
+        #values = pd.Series(values, index=data.index)
+        self.predicted_values = values
+
+        tabname = self._get_out_table()
+
+        orca.get_table(tabname).update_col_from_series(output_column, values, cast=True)
+
+
+
+
+@modelmanager.template        
+class GradientBoostingRegressionStep(RandomForestRegressionStep):
+
+
+    def fit(self):
+        """Fit method to estimate GradientBoosting()
+
+        This function fits a GradientBoosting() model using the sklearn library,
+        save the results.
+
+        Arguments
+        ------------
+        self: object instance
+
+        """
+
+        # convert model to  a format with similar fit and predict structure as in TemplateStep 
+        self.model = convert_to_model(GradientBoostingRegressor(), 
+                                      self.model_expression, 
+                                      ytransform=self.out_transform)
+
+        results = self.model.fit(self._get_data())
+        self.name = self._generate_name()
+
+    def to_dict(self):
+        """
+        Create a dictionary representation of the object.
+
+        Returns
+        -------
+        dict
+
+        """
+        d = TemplateStep.to_dict(self)
+        # Add parameters not in parent class
+        d.update({
+            'model': self.name,
+            'cross validation metric': self.cv_metric
+        })
+
+        # model config is a filepath to a pickled file
+        d['supplemental_objects'] = []
+        d['supplemental_objects'].append({'name': self.name,
+                                    'object_name': 'model',
+                                    'content': self.model, 
+                                    'content_type': 'pickle'})
+
+        return d
+
+