diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..7fa5295 --- /dev/null +++ b/environment.yml @@ -0,0 +1,48 @@ +name: template-env + +channels: + - udst # for orca and pandana + - conda-forge # for choicemodels, statsmodels, and many dependencies + - timothyb0912 # for pylogit + +dependencies: + - python=3.6 + - geopandas=0.3 + - jupyter=1.0 + - line_profiler=2.1 + - matplotlib=2.2 + - memory_profiler=0.54 + - numpy=1.15 + - orca=1.5 + - pandana=0.4 + - pandas=0.23 + - pylogit=0.2 + - pytest=3.8 + - scipy=1.1 + - statsmodels=0.9 + - sklearn=0.19.2 + - dill=0.2.8.2 + + +# This Conda environment includes the direct dependencies for template-based UrbanSim +# models, plus a variety of other packages that are useful for validation and testing. + +# One-time setup (several minutes): +# `conda env create -f environment.yml` + +# Activate the environment: +# `source activate template-env` + +# Install development versions of ChoiceModels and UrbanSim Templates +# (only needs to be done once, but run git-pull from these directories +# periodically to update the codebases) + +# Navigate to directory where choicemodels folder should go: +# `git clone https://github.com/udst/choicemodels.git` +# `cd choicemodels` +# `python setup.py develop` + +# Navigate to directory where urbansim_templates folder should go: +# `git clone https://github.com/udst/urbansim_templates.git` +# `cd urbansim_templates` +# `python setup.py develop` \ No newline at end of file diff --git a/setup.py b/setup.py index 0b1a6a0..ce935fc 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name='urbansim_templates', - version='0.1.dev13', + version='0.1.dev16', description='UrbanSim extension for managing model steps', author='UrbanSim Inc.', author_email='info@urbansim.com', @@ -21,6 +21,8 @@ 'pandana >= 0.3', 'pandas >= 0.22', 'statsmodels >= 0.8', - 'urbansim >= 3.1.1' + 'urbansim >= 3.1.1', + 'sklearn >= 0.19.2', + 'dill >= 0.2.8.2' ] ) diff --git a/urbansim_templates/modelmanager.py b/urbansim_templates/modelmanager.py index 6d5bc5d..92f48db 100644 --- a/urbansim_templates/modelmanager.py +++ b/urbansim_templates/modelmanager.py @@ -2,7 +2,7 @@ import os import copy -import pickle +import dill as pickle from collections import OrderedDict import orca @@ -28,7 +28,6 @@ def template(cls): """ _templates[cls.__name__] = cls return cls - def initialize(path='configs'): """ @@ -99,9 +98,9 @@ def build_step(d): """ if 'supplemental_objects' in d: for i, item in enumerate(d['supplemental_objects']): - content = load_supplemental_object(d['name'], **item) + content = load_supplemental_object(d['name'], item['name'], item['content_type']) d['supplemental_objects'][i]['content'] = content - + return _templates[d['template']].from_dict(d) @@ -188,8 +187,12 @@ def save_step_to_disk(step): # Save supplemental objects if 'supplemental_objects' in d: for item in filter(None, d['supplemental_objects']): - save_supplemental_object(step.name, **item) + content = item['content'] + content.role = item['object_name'] + save_supplemental_object(step.name, item['name'], content, item['content_type']) del item['content'] + del item['object_name'] + # Save main yaml file headers = {'modelmanager_version': __version__} @@ -219,7 +222,7 @@ def save_supplemental_object(step_name, name, content, content_type, required=Tr """ if content_type is 'pickle': - content.to_pickle(os.path.join(_disk_store, step_name+'-'+name+'.pkl')) + pickle.dump(content, open(os.path.join(_disk_store, step_name+'-'+name+'.pkl'), 'wb')) def get_step(name): diff --git a/urbansim_templates/models/regression.py b/urbansim_templates/models/regression.py index 1750c52..25f3359 100644 --- a/urbansim_templates/models/regression.py +++ b/urbansim_templates/models/regression.py @@ -8,7 +8,11 @@ from urbansim.models import RegressionModel from urbansim.utils import yamlio + +from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor + from .. import modelmanager +from ..utils import convert_to_model from .shared import TemplateStep @@ -203,4 +207,176 @@ def run(self): orca.get_table(tabname).update_col_from_series(colname, values, cast=True) +@modelmanager.template +class RandomForestRegressionStep(OLSRegressionStep): + + def __init__(self, tables=None, model_expression=None, filters=None, out_tables=None, + out_column=None, out_transform=None, out_filters=None, name=None, tags=[]): + + super().__init__(tables=tables, model_expression=model_expression, filters=filters, out_tables=out_tables, + out_column=out_column, out_transform=out_transform, name=name) + + self.cv_metric = None + self.importance = None + + + + @classmethod + def from_dict(cls, d): + """ + Create an object instance from a saved dictionary representation. + Use a pickled version of the random forest model + Parameters + ---------- + d : dict + + Returns + ------- + RandomForestRegressionStep + + """ + # Pass values from the dictionary to the __init__() method + obj = cls(tables=d['tables'], model_expression=d['model_expression'], + filters=d['filters'], out_tables=d['out_tables'], + out_column=d['out_column'], out_transform=d['out_transform'], + out_filters=d['out_filters'], name=d['name'], tags=d['tags'], + ) + + # add supplemental objects + for i, item in enumerate(d['supplemental_objects']): + content = d['supplemental_objects'][i]['content'] + setattr(obj, content.role, content) + return obj + + + def fit(self): + """Fit method to estimate randomForest() + + This function fits a RandomForest() model using the sklearn library, + save the results and compute feature importances + + Arguments + ------------ + self: object instance + + """ + + # convert model to a format with similar fit and predict structure as in TemplateStep + self.model = convert_to_model(RandomForestRegressor(), + self.model_expression, + ytransform=self.out_transform) + + results = self.model.fit(self._get_data()) + self.name = self._generate_name() + + # compute feature importance + importance = self.model.feature_importances_ + self.importance = {} + i = 0 + for variable in self.model.rhs: + self.importance[variable] = float(importance[i]) + i += 1 + + + def to_dict(self): + """ + Create a dictionary representation of the object. + + Returns + ------- + dict + + """ + d = TemplateStep.to_dict(self) + # Add parameters not in parent class + d.update({ + 'cross_validation_metric': self.cv_metric, + 'features_importance': self.importance + }) + + # model config is a filepath to a pickled file + d['supplemental_objects'] = [] + d['supplemental_objects'].append({'name': self.name, + 'object_name': 'model', + 'content': self.model, + 'content_type': 'pickle'}) + + return d + + def run(self): + """ + Run the model step: calculate predicted values and use them to update a column. + + The predicted values are written to Orca and also saved to the class object for + interactive use (`predicted_values`, with type pd.Series). But they are not saved + in the dictionary representation of the model step. + + """ + # TO DO - figure out what we can infer about requirements for the underlying data + # and write an 'orca_test' assertion to confirm compliance. + + output_column = self._get_out_column() + data = self._get_data('predict') + + values = self.model.predict(self._get_data('predict')) + #values = pd.Series(values, index=data.index) + self.predicted_values = values + + tabname = self._get_out_table() + + orca.get_table(tabname).update_col_from_series(output_column, values, cast=True) + + + + +@modelmanager.template +class GradientBoostingRegressionStep(RandomForestRegressionStep): + + + def fit(self): + """Fit method to estimate GradientBoosting() + + This function fits a GradientBoosting() model using the sklearn library, + save the results. + + Arguments + ------------ + self: object instance + + """ + + # convert model to a format with similar fit and predict structure as in TemplateStep + self.model = convert_to_model(GradientBoostingRegressor(), + self.model_expression, + ytransform=self.out_transform) + + results = self.model.fit(self._get_data()) + self.name = self._generate_name() + + def to_dict(self): + """ + Create a dictionary representation of the object. + + Returns + ------- + dict + + """ + d = TemplateStep.to_dict(self) + # Add parameters not in parent class + d.update({ + 'model': self.name, + 'cross validation metric': self.cv_metric + }) + + # model config is a filepath to a pickled file + d['supplemental_objects'] = [] + d['supplemental_objects'].append({'name': self.name, + 'object_name': 'model', + 'content': self.model, + 'content_type': 'pickle'}) + + return d + + diff --git a/urbansim_templates/models/shared.py b/urbansim_templates/models/shared.py index 45a15d1..c42c2d8 100644 --- a/urbansim_templates/models/shared.py +++ b/urbansim_templates/models/shared.py @@ -53,6 +53,7 @@ def __init__(self, tables=None, model_expression=None, filters=None, out_tables= self.name = name self.tags = tags + self.template = type(self).__name__ # class name self.template_version = __version__ @@ -103,7 +104,7 @@ def to_dict(self): 'out_tables': self.out_tables, 'out_column': self.out_column, 'out_transform': self.out_transform, - 'out_filters': self.out_filters + 'out_filters': self.out_filters, } return d @@ -285,8 +286,8 @@ def _get_out_column(self): else: # TO DO - there must be a cleaner way to get LHS column name return self.model_expression.split('~')[0].split(' ')[0] - - + + def _get_out_table(self): """ Return name of the table to save data to. This is 'out_tables' or its first @@ -326,4 +327,86 @@ def _generate_name(self): return self.template + '-' + dt.now().strftime('%Y%m%d-%H%M%S') else: return self.name + + def _split(self, n_splits=10): + """Split the sample between test and train + + This function takes a data and turn it into a k-fold splits,where k=n_splits. + One split is reserved for testing, the remaining will be used for training + + Keywords + ------------ + n_splits: int + number of split (defualt = 10) + + Returns + ---------- + generator + a generator made of tuple, the first element being the training data and + the second being the test data. There is one tuple for each possible + split + """ + + data = self._get_data() + data_index = np.array(data.index).astype('int64') + + np.random.shuffle(data_index) + n_samples = data_index.shape[0] + + fold_sizes = (n_samples // n_splits) * np.ones(n_splits, dtype=np.int) + fold_sizes[:n_samples % n_splits] += 1 + current = 0 + + for fold_size in fold_sizes: + start, stop = current, current + fold_size + test_indices = data_index[start:stop] + test_data = data.loc[test_indices] + training_data = data.drop(test_indices) + current = stop + yield training_data, test_data + + def cross_validate_score(self, n_splits=10, **keywords): + """Cross validation iteration + + This function iterates over n_splits-fold splits of the data into + training and test data, fits the self.model on each on the training set + instance and computes mean squared and mean absolute errors on the corresponding + test data. + + This function adds to the object cross-validation metrics the mean of n_splits + -fold mean squared/absolute error. This is useful to compare various models + performance. + keywords + ----------- + n_splits: int + number of split (default = 10) + + **keywords: + any keywords that may used in the model.fit method (e.g. + batch size if stochastic methods are used) + + """ + + data = self._get_data() + + output_column = self._get_out_column() + + + self.cv_metric = {} + i = 0 + + mean_metric = np.zeros(n_splits) + mae_metric = np.zeros(n_splits) + + for train, test in self._split(n_splits): + + self.model.fit(train, **keywords) + predicted = self.model.predict(test) + + mean_metric[i] = np.mean((predicted - test[output_column]) ** 2) + mae_metric[i] = np.mean(abs(predicted - test[output_column])) + i += 1 + + self.cv_metric['mean_cross_validation'] = float(np.mean(mean_metric)) + self.cv_metric['mae_cross_validation'] = float(np.mean(mae_metric.mean())) diff --git a/urbansim_templates/tests/machine_learning_steps_rental_prices.ipynb b/urbansim_templates/tests/machine_learning_steps_rental_prices.ipynb new file mode 100644 index 0000000..3ac9861 --- /dev/null +++ b/urbansim_templates/tests/machine_learning_steps_rental_prices.ipynb @@ -0,0 +1,419 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "import orca\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from urbansim_templates import modelmanager\n", + "from urbansim_templates.models.regression import RandomForestRegressionStep, OLSRegressionStep, GradientBoostingRegressionStep\n", + "from urbansim_templates.models.neuro_network import NeuralNetworkStep" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# load rental data\n", + "rental = pd.read_csv('data\\\\rentals_with_nodes.csv')\n", + "node_small = pd.read_csv('data\\\\nodessmall_vars.csv') \n", + "node_walk = pd.read_csv('data\\\\nodeswalk_vars.csv') " + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.merge(rental, node_small, left_on='node_id_small', right_on='osmid')\n", + "data = pd.merge(data, node_walk, left_on='node_id_walk', right_on='osmid')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# register data in orca\n", + "orca.add_table('rental_prices', data)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No yaml files found in path 'configs'\n" + ] + } + ], + "source": [ + "# create random forest step \n", + "modelmanager.initialize()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# define model expression\n", + "model_expression = 'np.log(rent_sqft) ~ bedrooms + np.log1p(units_500_walk) + np.log1p(rich_500_walk) + '\\\n", + " 'np.log1p(singles_500_walk) + np.log1p(children_500_walk)'" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# random forest\n", + "rf = RandomForestRegressionStep(out_column='rent_sqft', out_transform=np.exp)\n", + "rf.tables = 'rental_prices'\n", + "rf.model_expression = model_expression\n", + "rf.fit()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# gradient boosting\n", + "gb = GradientBoostingRegressionStep(out_column='rent_sqft', out_transform=np.exp)\n", + "gb.tables = 'rental_prices'\n", + "gb.model_expression = model_expression\n", + "gb.fit()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " OLS Regression Results \n", + "==============================================================================\n", + "Dep. Variable: np.log(rent_sqft) R-squared: 0.408\n", + "Model: OLS Adj. R-squared: 0.408\n", + "Method: Least Squares F-statistic: 4.301e+04\n", + "Date: Wed, 10 Oct 2018 Prob (F-statistic): 0.00\n", + "Time: 12:23:02 Log-Likelihood: -17618.\n", + "No. Observations: 312325 AIC: 3.525e+04\n", + "Df Residuals: 312319 BIC: 3.531e+04\n", + "Df Model: 5 \n", + "Covariance Type: nonrobust \n", + "===============================================================================================\n", + " coef std err t P>|t| [0.025 0.975]\n", + "-----------------------------------------------------------------------------------------------\n", + "Intercept 1.2682 0.002 552.968 0.000 1.264 1.273\n", + "bedrooms -0.1471 0.000 -298.658 0.000 -0.148 -0.146\n", + "np.log1p(units_500_walk) -0.0306 0.002 -20.384 0.000 -0.034 -0.028\n", + "np.log1p(rich_500_walk) 0.1129 0.001 185.490 0.000 0.112 0.114\n", + "np.log1p(singles_500_walk) 0.0575 0.001 61.841 0.000 0.056 0.059\n", + "np.log1p(children_500_walk) -0.0911 0.001 -106.941 0.000 -0.093 -0.089\n", + "==============================================================================\n", + "Omnibus: 77001.354 Durbin-Watson: 0.820\n", + "Prob(Omnibus): 0.000 Jarque-Bera (JB): 541361.022\n", + "Skew: -1.006 Prob(JB): 0.00\n", + "Kurtosis: 9.128 Cond. No. 59.1\n", + "==============================================================================\n", + "\n", + "Warnings:\n", + "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" + ] + } + ], + "source": [ + "# ols\n", + "ols = OLSRegressionStep(out_transform=np.exp, out_column='rent_sqft')\n", + "ols.tables = 'rental_prices'\n", + "ols.model_expression = model_expression\n", + "ols.fit()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# two hiiden layers neuro network\n", + "nn = NeuralNetworkStep(out_transform=np.exp, out_column='rent_sqft')\n", + "nn.tables = 'rental_prices'\n", + "nn.model_expression = model_expression\n", + "nn.fit(verbose=0, epochs=4, batch_size=16)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# create cross validation metrics\n", + "rf.cross_validate_score()\n", + "ols.cross_validate_score()\n", + "gb.cross_validate_score()\n", + "nn.cross_validate_score(verbose=0, epochs=4, batch_size=32)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
msemae
ols0.6939130.589260
random forest0.2777790.304354
gradient boosting0.5735580.525374
neuro_net_one_layer0.6732340.574803
\n", + "
" + ], + "text/plain": [ + " mse mae\n", + "ols 0.693913 0.589260\n", + "random forest 0.277779 0.304354\n", + "gradient boosting 0.573558 0.525374\n", + "neuro_net_one_layer 0.673234 0.574803" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# summary of cross validation statistics\n", + "summary = pd.DataFrame()\n", + "summary.loc['ols', 'mse'] = ols.cv_metric['mean_cross_validation']\n", + "summary.loc['random forest', 'mse'] = rf.cv_metric['mean_cross_validation']\n", + "summary.loc['gradient boosting', 'mse'] = gb.cv_metric['mean_cross_validation']\n", + "summary.loc['neuro_net_one_layer', 'mse'] = nn.cv_metric['mean_cross_validation']\n", + "summary.loc['ols', 'mae'] = ols.cv_metric['mae_cross_validation']\n", + "summary.loc['random forest', 'mae'] = rf.cv_metric['mae_cross_validation']\n", + "summary.loc['gradient boosting', 'mae'] = gb.cv_metric['mae_cross_validation']\n", + "summary.loc['neuro_net_one_layer', 'mae'] = nn.cv_metric['mae_cross_validation']\n", + "summary" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Saving 'random_forest_rental_prices.yaml': C:\\Users\\Xavier\\Urbansim\\udst\\urbansim_templates_testing\\urbansim_templates\\urbansim_templates\\tests\\configs\n", + "Registering model step 'random_forest_rental_prices'\n" + ] + } + ], + "source": [ + "# register best model\n", + "rf.name = 'random_forest_rental_prices'\n", + "modelmanager.register(rf)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Registering model step 'random_forest_rental_prices'\n" + ] + } + ], + "source": [ + "# check whether it is saved\n", + "modelmanager.initialize()" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "m = modelmanager.get_step('random_forest_rental_prices')" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "m.run()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "results = pd.DataFrame(m.predicted_values)\n", + "results.columns = ['predicted']\n", + "results['observed'] = data[m._get_out_column()]" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWAAAADQCAYAAAA53LuNAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJztnXuYHGWZ6H/v9GRuuQ4QwoRAJmCEZRYSQhDwBgRBQRJMVEADAdddIHGP4C67B/VZL8TdPVHOOYsuRPGoLC5iQAMqC5IYQMSVSyAJJnI1BAkZQoAAIQQSkvf88VbZNT19qZ6p7urueX/P8z1VXfV9VV9XV7391Xv7RFVxHMdxqk9T2h1wHMcZqrgAdhzHSQkXwI7jOCnhAthxHCclXAA7juOkhAtgx3GclHAB7NQlInKCiGxMux+OMxhcADuJISIbRGSHiLwuIs+LyLUiMiLtfg0WEVER2R58r9dF5JUqn9//bBoUF8BO0sxU1RHAVOBI4PMp9ycppqjqiKCMKbexiDRXolNOfeMC2KkIqvo8cAcmiAEQkQ+LyCoReU1EnhWRr0T2dQcjzfNE5E8i8qKIfDGyvz0YUW8VkT8AR0fPJyJ/ISJ3i8grIrJORGZF9l0rIleLyO3BCPa3IrKfiPxbcLzHROTIgXxPEfkbEXlKRF4WkZ+LyPjIPhWRz4jIk8CTwbZDRWR5UP9xETkzUv80EfmDiGwTkedE5FIRGQ7cDoyPjMDH9+uIU5+oqhcviRRgA/CBYH0C8Hvgysj+E4DDsT/+I4DNwEeCfd2AAt8F2oEpwFvAXwT7/xfwG2Av4ABgLbAx2DcMeAr4AtACzAC2AYcE+68FXgSOAtqAO4GngXlABvgacFeR76XAO/JsnxEcdxrQCnwLuCen3fKgz+3AcOBZ4FNAc9DuRaAnqN8LvC9Y7wSmRa7bxrR/Xy8VeGbS7oCXximBAH49EH4KrADGFKn/b8D/DdZDATwhsv8B4OxgfT3woci+CyIC+H3A80BTZP8NwFeC9WuB70b2/Q/g0cjnw4FXivRTgdeAV4LyzWD794CvR+qNAHYB3ZF2MyL7zwJ+k3Ps7wBfDtb/BFwIjMqp4wK4QYurIJyk+YiqjsSExqHAPuEOETlGRO4SkS0i8ipwUXR/wPOR9TcwoQYwHhs9hjwTWR8PPKuqe3L27x/5vDmyviPP51LGwmmqOiYon42c98/9UNXXgZdyzhvt80TgmEBN8kpgzJsL7Bfs/yhwGvCMiPxaRI4r0SenznEB7FQEVf01NvK8IrL5R8DPgQNUdTTwbUBiHrIXUz2EHBhZ3wQcICJNOfufK7Pb5bIJE6oABPravXPOG003+Czw64ggH6Nm1JsPoKoPquoZwL7ALcCNeY7hNBAugJ1K8m/AySISGuJGAi+r6psi8i7gk2Uc60bg8yLSKSITMDVCyP3AduAfRWSYiJwAzAR+POhvUJwfAZ8Skaki0gr8C3C/qm4oUP9W4J0icm7Qz2EicnRgQGwRkbkiMlpVd2Eqj91Bu83A3iIyusLfx6kyLoCdiqGqW4DrgH8KNi0ALheRbcCXyI7w4vBV7HX/aWAZ8MPIeXYCs4BTMaPW1cA8VX1ssN+hGKq6AvtuP8VG6AcDZxepvw04JaizCVO3LMIMeADnAhtE5DVMPXNO0O4xTKe9PlBduBdEgyCq/nbjOI6TBj4CdhzHSQkXwI7jOCnhAthxHCclXAA7juOkRF0kCNlnn320u7s77W44juPE4qGHHnpRVceWqlcXAri7u5uVK1em3Q3HcZxYiMgzpWu5CsJxHCc1XAA7juOkhAtgxxkoJ54IIvCBD6TdE6dOcQHsOAPl7rttuWJFqt1w6pe6MMI5Tk0heRK4hds8tN8pAx8BO065nHBC/u0nnVTVbjj1jwtgxymXu+7Kv/1Xv6puP5y6xwWwM3RZvRrGjIFHHhn4McbkmSC5txeOPx6ef77/PseJ4ALYGbqccw68+ip8spy88AGqVrZuza6HLFwI994Ll1+eXF+dhqQu8gFPnz5dPRLOSYx8RrSQwTwP7e3w5pv9t7e1wY4dAz+uU3eIyEOqOr1UPR8BO0OPVatg4sS+27q7Yc2awR13/XobTXd02OeODpg7F55+enDHdRoWF8DO0GPqVBg+vO+24cPhiCMGd9yuLhg1ykbBbW22HDUK9tuvdFtnSOIC2GlcenvhuOOsrFnT1zD20ks2Qv3Od6CnB15+ua/xrLcXjj3W2oZt4hjXNm+Giy6C++6z5YYNbpBzCqOqNV+OOuoodZyy2LRJtasrNI+p9vSoNjWpzpun+v73q553nn2ePz/bZv787Lb587NtwzrR/atWqY4erbpmTfF+RNs4QwZgpcaQbW6EcxqPQsawStDTA2vXxu+DG+SGBG6Ec4YmAxG+c+bA7NlZ41lTzmORycCECXbsXNatM6+KXM8KN8g5MaiYABaR74vICyKyNrJtLxFZLiJPBsvOSp1/SDOUAgFy9bb771/+MZYuhZtvhjfesM979vTdv3s3bNxoI9dc4QxwwAEwbVpWz7xmDZx9NuzcacdsbR2cQW4o/Z5DjEqOgK8FPpSz7TJghapOBlYEn52kGUqBANHvunAh/PGPlT1frnAGeP11i6qbO9f6Ei7vuMP2z5plBrmBCtCh9HsOMSqqAxaRbuBWVf3L4PPjwAmq2isiXcDdqnpIqeO4DjgmQ0nvWE09b1KU+zsMpd+zwahVHfA4Ve0FCJb7FqooIheIyEoRWblly5aqdbCuGUp6x9zvmhZdXSYQi9HePrDfYSj9nkOUmjXCqeo1qjpdVaePHVtyclEHhlYgQO53TYvOTtP1ZjL2OdcYl8nAW28N7HcYSr/nEKXaAnhzoHogWL5Q5fM3PrmBAPVsuOnttei0TMaWS5ZAczPceaft37wZzj3XwoirzcEH28j2ySdh3jwzkvX0wN5723KffWx5/PGD+x0a6fd0+hPHWXigBegG1kY+fwO4LFi/DPh6nON4IMYQJRoMAaotLbbs7CxcZ6Bl0iRbDh9euE5bW//+eZCFkwfSDsQQkRuAE4B9gM3Al4FbgBuBA4E/AR9X1ZdLHcuNcEOMWjawqbpxzClJ6kY4Vf2Eqnap6jBVnaCq31PVl1T1JFWdHCxLCl9nCLJ+vc04XIyODjj66OKpJQdCRwecdlp/496IEdlsaW4ccxKiZo1wzhCmqwsOKeGd2NoKRx2VP3+vCIwcCZMmwYwZ8c8bGromTuwfcCGSzZbmxjEnIVwANwKNGCm1eXNWCDbnTN7d0WERZps32/rBB8O++8KwYWb8mj8f3vtei1C7/vr45+zuNoPf44+bAJ840QTvxIk2As7tnxvHnMESR1GcdnEjXAka1RjU05PfGNbUlK1TKCtZ7jUJDXgtLf2PnZv9rKfH6nR29jf6OU4MSNsIlyRuhCtAoxqDytHr9vRYQpwwK1mlDXh18Lw46ZO6Ec6pAo1qDFq1qv+MFYVYty67FDHhW4kIueHDYcWKZI/pDHlcANczjWgMam+HI4+E7dvLbxvO6zZqVDaz2UDJNcK1tJRn0HOcGLgArncazRgUjuqj5BrhCrFhg3kqbN5cuu7IkZbjF2zk3NRkxrxTTrF9e/ZYmPGiRbYcrEB3nDy4DtipPL29lh93yZJ4o/P58+Gaa2zUuXMnXHghXH21HefTn4bbb8/fbuRIuOceG0EXYsIE2LQpf1rJetedOzWD64CdwZGb6Dzfer66+Yjms129GsaMMX1qoeM880z+Uf3atYWFL8ATT8AHP2jrra3562zcmBW+w4bZslF05079EcdVIu3ibmgpkDtBZb71fHWjtLUVzqvQ2dl3ksze3vzH2bQpuz90CxtM6ejo+7mtrTFd+JxUwd3Q6oRyX88rTRJuXOGr/PLl8OEPW3DDQI7Z1gaf+hQsXjy4/mQyNq1QMWbPtqmJHCcBXAVRL9TadDO5rm2ZTDbXbXS9oyM7mWVoJGtu7vsq/7nPwa5dJnzLzdkQupQNVvhCaeH7jW+48HVSwUfAaVHLQRRRI1jYx9DNLVzfudOEZCnhVg/UwTPg1Bc+Aq51ajmIIuraNmmSldz1iy4yl60JE/qOkCdMMNXDxInpfgfHqQNiOlg6iVPLQRTR1/H16/OvX3WVLcPRcjgqnjkTPvCB/JFscXSxadDenv5bhzMk8RFwmlQiiCJ05VqzpjoZ0nK/w4YNdt4XX+yv901D+Pb0wNSp2c/RrGa19NbhDE3iuEokXYDPAeuAtcANQFux+u6GVgahK1dPjy0/9jHLFrZkSf6sYSFRd6+45GsTnj+TKewKNnr04N3JBlJaW20p4u5nTkUhphtaGsJ3f+BpoD34fCNwfrE2Q14AxxGOxXxuoyVMtRg99jHHqO67rwmmfD64y5cXTvkokm2bhlAdaLnxRtUFC1Rnz07ud3KcgFoXwM8Ce2E66FuBU4q1GfICOE6+302bVD/5SdX29vhCSLXwSLWtLXveMAAiFN7FhH1zc/rCNSzDhsW7Bo6TMHEFcNV1wKr6HHAFNilnL/Cqqi7LrSciF4jIShFZuWXLlmp3szZobzc96uLFFj67eLF9bm/vXzc06r31Vv9MXrl0d5uLWTE3stAHd88e2LrVtkVTPhby63377dhfr+Ls2lW6Tu73KKVDb8TZR5zUqLoAFpFO4AxgEjAeGC4i5+TWU9VrVHW6qk4fO3ZstbtZG5TrqhYaxEJPitA9LJfhw81YNnt2/jof/7idt62t/75oysdapqUF3v3u4nWGD89OtBkSBsbMnZs/QKbWAmecuiYNL4gPAE+r6hZV3QUsBUo8KUOUcl3Vbr/dsoZt2mSfo6PblhZLgtPTAy+/bMceN67/CLinx0axo0aZW1kuGzbAlCnw6quJfMWKMHq0JfQJJ9EsxPbt9l3a2/u/baxb1/etIyxx3kYcJyZpCOA/AceKSIeICHAS8GgK/agPynFVKzRi7u011cTWrZZRLBTQmzdbYMXMmTB2rE1ieeCB8NJL2Yxk++xj+XAnT7Y2I0YUzjQWpVqC6dhjs+cKs5vt2mV/UnHyAp92mr1RhNcut9/t7XYNV6+u3cAZp25JQwd8P/AT4GHg90Efrql2P+qGpUst6GHKFFtGgyRy9ZHljpiXLjXBM2GCCd3TTzcVw7332vKqq2DLFhsxv/iitdm504R5KaoR2NDUZH9M++0HCxbAgw/a8vjjTTD39lq56KLCx5g40dp3dZk6ZseOrA49k7HvOmqUXf9aDZxx6hbPBVHPLFgA3/lONmE5WIKcri644AKLUOvtLZxoptITWFabMI/GggXZJD7z59sf1OrVdi1Cw9zu3ZY8aObM7PU56CAb0e6/v6lrxo2DQw/NXsNyrq0zpImbC8IFcD2SVCKf3l649FK45Rabcic0yO3eba/YxxwDd99t+uM4o960aW0t3M/QoFjOH06SiZFqLe2oU1E8GU8axHFRSsKNKa53RJxz3XWXCZm2NhO8u3dnX7F/+1vzlq0H4Ttzpn3/XM+OpibTYR92mKkrZs/uq2IA8/qIbheBvfeG++9Prn/uPeHko5iTMPB3xUocR+MkSt0EYsQJmIhTJw4XXWTHKRZSW+pc8+ZZMMIhh6iuXq06aZKVtAMoBlImTcpel0J15s9XPeyw+MdMIky5UNBKW9vgj+3ULCQxI4aIfDlYPQQ4Gvh58HkmcI+q/nUl/hRyqXkVRByVQNL5f4vpI0udq7U1v4tZa6upHVpa4Fe/Kr9PjcpgVBG5ap6ODhttX3GFqyIamERUEKr6VVX9KrAPME1V/15V/x44CpiQTFcbgDgqgaTz/151lbmUjRvX3zui0Ll+8QszLuUTvmCqhnvuqV/hG7rKTZ5c2g0uVD+EkXD5ogczGfujC6/bI4/0rxNOMppvH9R22lEndeLqgA8Eok/tTqA78d7UE1H9apyHbLAPYq4+t5hOMXqu1lYbeTU3w8kn13YAxWB58snsstSIdfdu0w2DLffs6R8VuHu3/cFdcoldt09+sv9xzjmn8L5Sszw7Thw9BfBFYA3wFeDLwGrgC3HaJlFqUgecq1+dPduya61ebctTT+2fwWz2bNWZM63drFmqM2YUTxGZ73zFkudEOfVU1a4uO1/a+tlqlELXpZzkRCKqI0eqjh+veuaZA+9LoXskLjfcYMe56aby2jk1A0lnQwOmARcH5ci47ZIoNSWA4xpVCj18YWaxzk7LLgb9U0TGOV9YOjpU587tn6qyqSl9oVgvpbk5/zVctUp14sS+dbu77Q+z2L7BGt5aWqx+S0u8+k7NEVcAl+OG1gG8pqpXAhtFZFJSo/C6opQut1AGs7CEmcW2brV8A5DNMpYvw1ju+ZqasrM6hNO9ZzJw1ln2atvWZsfZs6dy16DRePttuP56C8uOMnVq/6mVhg+3HBPF9g1U3x/eA6GOPpz4tNwZpZ26IZYADrwh/ifw+WDTMOA/K9WpmqaULrfQw/fjH2e35SPMMlbsfGCC9fXXbX33bvt83XVZfXCcFIxOf4YPNx1trt/01q2WoGjJkmwio1L7Bqrvv+GG/nroTAZuuimZ7+jUHnGGyZjOV4BVkW2PxGmbRElUBTGQqXdyydX35s6qUMhHN1Q/FFIlFOqTqxOqW5Lw/y11jxQiVD+ExdUQdQlJ6oCBB4Llw8FyeN0K4FKGkXCKnkMPNcPOihXZ7XEFd6GHr7XVhPCiRdkH7OSTs4I533RAvb3Z2S46OtIXTkOppBEs0dRkQvfii23Z1FT9PjiDJmkBfCnwHWA98DfA74DPxmmbRElEAJdjPIvuz2RMCCYVwRZn7ra2tmyU2r772vnDUXXaQmmolKamwb0lOUOaRAWwHY+TgW9g0wmdHLddEiURAZw7Z1p7e1/Ld9xJLcPS2jrwmYSjo9lMJutC1dHhQjapUmxW5jjlvPMGf88lTRLqM6cqxBXAcY1wi1R1uar+g6peqqrLRWRRAiro6hEaRkIH/R07+hvPZs8ubXFubrbl2WcXDoYoFh3V1WXW7TA4IpoAZ8cO915IikJz3YFd66i4DfMFZzL2+/f0wGuvVaef5eAJfRqPOFKaQPebs62+dMBxVBDFErnEKeFra9S/N9+oZeTI/m2nTUt/1NgoRaTv7x19w8j19122zPbNnGk6+/POU91779oaZXpCn7qDJEbAIjJfRH4PHCoij0TK09hsFgNCRMaIyE9E5DEReVREjhvosWITxzcznKKnqan/SHjkSAtZDd2EMpnsekeH5R/Ys8dGuFH/3vHjLb/C5ZdnfTq3bevfv4cfTvb7DmVU+yYjiqbYzHUHO+ssW957L+y7LyxbZu5ktTTKTDqPiFMzNJfY/yPgduBfgcsi27ep6sv5m8TiSuCXqvoxEWnBgjwqSxzfzGhCm0zGHuSQqNAM24e88UY2D0EhwhkaRPoe16kOYcBKODvGkUf23b91q/1ZhixebCXJpOwDxRP6NCylsqG9qqobMIH5sqo+o6rPALtE5JiBnFBERgHvB74XnGOnqr4ykGOVTTkTXG7cmD/BCmSF78iRthw9On82LciOpMNRS5ixy6ksIn3fUObOtd906VJLoBPWKUZTU+2MMsu5d526odQIOGQxlgsiZHuebXE5CNgC/EBEpgAPARer6vZoJRG5ALgA4MADDxzAafIQHeFedVXxuuGoI1QbhMaxjg4TwHv2ZEfFuRnGMhmboffNN220K2Kj5J077YE+5BB44gnbl8kUNxg5A0O1v+qhq6t/nWKce27tjDLLuXeduiFuLggJFMsAqOoe4gvvXJoxwb1YVY/EhPlluZVU9RpVna6q08eOHTvAUw2SzZttUsc5c+xzU5M9zOec01cnB6Y7/tWvTNju3t1XRRFeup/+1PIOzJhhAlwVZs3qH37qDI62NvttWlrg1luzI8Zlyyw9ZxQRSzfZ0mKfB+IJ0dsLxx1nxUemTjnEsdQBS4HPYjkghmEZ0W6J0zbPsfYDNkQ+vw/4r2JtqpINbdWqbGrIq682K/OkSarHHmupHRcsUF2+3AIjOjstUCKaHlJkYBZ71axvsPsAD76IWGlt1T97N4TkBtlANhPdQEOHc4+bRBizU/eQxJREISKyL/BNYAagwArgElV9YSBCX0R+A/y1qj4uIl8BhqvqPxSqX/EpiVavhqOOslFpTw88+mhff9x582DDBkvOnXRilGmBFmf9enilOqpwJ4eBzvpcaOonqA3jnZMacackij1yTbIAU4GVwCPALUBnsfoVGQFv2mSj2yRHX+VG03mpbslNzh6+tbzznaXvlagvd/h5+XLzGY5G3WUyqnPm1JYfsVN1iDkCLqrHFZF/VNWvi8i3AM0jvD87gD8HVHU1UPrfoZIsXGgW5SQpNBpyaoMdO/oaPTW4pZ94IusRof1u874RaFdfnf18ySXw0kt964bTGNWK8c6paUoZ0h4NljU8JXEZ9PbC/vvnf8icxmbCBDjpJHj2WZujbePGvmqH7m742c/6tslVMYS+wSFhwE1IUxNMnOiGOCc+cYbJaZfEVBDz59tr56RJbvAaaqWrq69a4LDD+u7PNy1UbuKk9nabdihX1dTWln9Ko3LxZDsNAwmpIH4B/VUPEeE9K+k/hIqQO5KpFed6p3r09pofsAa3czibxZe+ZKqFl/MEduZGoL31lrm37dzZV5URnRZqyZKBqx9yVR1Ow1PUC0JEjg9W52DuY+E0RJ/AXMm+UNnuGYP2gujthUsvhVtusYAID35oXEaMgO3bs4I2H+V4KMyZY4L4ggvgmmvg5pvhhRcK3z/z55cvPAt5U7gnRd2SqBcEcE+cbZUqiaggcqcJeu97038t9jK40tRkMxpHt40caX68xdqtWdP33ij16p/PC6LUDCXlZCrLPV6hma6duoGEZ0UeKyIHRaT7JCCl8LQBkhtLn1Z0nZMce/aYD290YswRIyxUd9y4bHRbLt/+dt/PpfLs5u6PqibCyLowT/RAMpV5sp0hS9xAjA8B12BTEgF0Axeq6h2V61qWigVixJnuu6UlO024U32amuIlqY/ex6G+dqCEr/7FAi3e8x6YMsVUE3PnmkdEW5vdKxdeWL4aIlfV0dvbN/+DU1ckHogBtAJTgtIat10SpWKhyKtWpf8a7aV4iYb2LluW/7U/Gm6smp1PLyytraojRmQ9Xzo6VCdPNo+Yd7yj8Kt/PtXAsGG23tmZPd9gwpidhoQkvCAi0rwD+Dtgoqr+jYhMFpFDVPXWAf9F1ALjxhXfP2wY7NoF73gHHH64GWCc6rFggb3KH388fPOb8LWv2W+Syy9+Ufxt5q234OCD4bHH7HM0f/NTT2XrRV/9ly+HU0+F007LjoLfeCNbd+vW/MEbnqnMKYO4OuAfADuBcOaKjcDXKtKjatDbaw/15z9fvN6uXbZ86qn+TvdO5TnxRAuQuPdee83/zW8s9efIkbD33tl6od519WrLUtfWlt3X3Aznn28C86KLTLBOntx3ZpPJky2TXTTP7llnmafDL39p2xcvzup5Q4YPhxUrkv3O4b3pwRxDgzjDZILhNLAqsm1NnLZJlMRVEIOdMddL7ZVMxn7bYnVC1UKuR0xUzVGsfe5cflE1RFLMn9+/T07dQcJeEDtFpB0sKENEDgYGYeVIifb2bL7eXEaPrn5/nMHT1mYj2FNOKW1UDb0Yis0usWxZ3zzPkB3pvv66fd53X5sfMKqSGCzhvbl4sRkdFy+2z+3tyZ3DqT3iSGngZODX2EwW1wMbgBPitE2iJDYC3rQp/ZGal+TL/PnlZaJrbe3v13vMMZYdr7fXRrZxj5UU7gvcUJDUCFhEBHgMi4Y7H7gBmK6qd1fiD6GidHXBhz+cdi+cJBCBo4+29cWL42WiC3XFZ5/d16934UK4/34bEV9+uY1sOzth0SJbtrRYkp0o3d2wZk1y3yeOL7DrhxuPOFIaeChOvUqVQY2Ac6OYJk1Kf8TmJdmSm+c3iZIbyRYnec9gKeXO5vrhuoGEZ8S4CrhWVR9MSvCLSAZLc/mcqp5erO6gAjEWLLDIpxjf02lQMhn44ActSu6//9sS77zxhulX997bRpahXSCTgTPOMHey6Ohz/HjYa6++yXs2bapO/z1XRN0RNxAjrhHuROA+EfmjiDwiIr8XkUcG10UuJptvOHmiRg0Xvo1FdMr5OOzeDatWwZVXwumn981uNmpUX6NsoYTqmzbB2rVw5pm2rJbwBZuuKjoJ7EDCnZ2aJO7MxqcmeVIRmQB8GPhnLMAjedavtyTcccJYnfqgpQXe9S444gj47nf7Cs7OTptxetu2/u322stGuZdfnvWAiGY3mzQpq09+8MHa07F6rojGpZh+AmgDLgH+HbgQaI6j1yhVgJ8ARwEnALeWqj8gHXAx/d5ee6Wvt/QSv4Rzt02alP19W1vNW2HRIlu2tprONM7x2trqL/m5hztXn0HcI8TUARffCUuwHMAXYpNnXhnnoCWOeTpwdbBeUAADF2A64pUHHnhg2RdAV62qjHHGy+BLS0t59Y84wtzEQPWf/qn4QxG2mTo1ux4G3kRdu9yg5ZRiEPdIXAFcKiH771X18GC9GXhAVacNZsQtIv8KnAu8HYywRwFLVfWcQm0GbITr6YE//GGAPXVSpVgWtKam4hnHimUxK5SM3w1aTkgCRs+kjHC7whVVfTvWmUugqp9X1Qmq2g2cDdxZTPgOmN7ebMIVp/4oprsvFSmWa7RqbrakOuefbxFzbtByilFFo2cpATxFRF4LyjbgiHBdRF5LvDdJsnChjXTmzbMRk9NYNDcXfihyjVZ79lggxQ9+ALfdVnsGLQ+wqC2qaPQsKplUNaOqo4IyUlWbI+ujBntyVb1bS/gAl01uTP1117knRCOyZ0/xh6JYvodi+9Kg1IwcTvWp0j0SKxAjbcrSAYcTcP7oR5XtlJMumQzMmpX+rBG9vRbaPJDZkD3AomFJOhCjfhg/3oVvLVFoXjYwITpnTv59bW1Z/W4mkw28aGqyNhs3li98K/GqP5jRqwdYDHkaTwDnmzHBSYczz7RQ3wULLFl6T0/f/WHUmSrMnt233ptv2iiwrc3q7d5tQnjPHnj88exosxyhmuSrfhLpI0vpGl033PjE8VVLu5Tb2t1AAAAQl0lEQVQViLFpU3beLi/plUMO6f/bzJ6dTWp+8MEWWBENKCgnpSRY/Ti+moWOW87U8fnusyTSRxYLsHBf5bqFJAIxaqWUHQkXTr7oJb0yblzfSKI4QrCQUFu9uv/2Qr9xPqFaqVy7xWbWGAyV+MNwqkpcAdx4Koj2dvd6SJPhw205Z07fV/44+s5Cr+RTpvTffs458fWnlXIrqpSl3HXDQ4a4yXjqhziJuZ2B0d5e2jq/fbstFy/Oblu82EomY2O5YgnHb7oJzj0XPvc5S5bT22v7cpPo9Paa/jiuUM3XPspAvBmiRsAkZ0P25DtDhzjD5LRLWSqIVauyr5peki2HHZZNjBOn5OZgOPXUZBOOF9OflptIpdb0rZ58p65hSOuA0xZUjVwyGdWDDuq7LVcoR/WzxfSjoZBsbc1/rsHoPOMKVNe3OhUgrgBuPB0w2KupkzwjR5r/7Y4d5ip28sm2PTfU+6MftRy7kyYV14+GOuKzzkpO51mue5jrW50UaTwdMJi+z0mebdtMPwmmM123ztZzs4vddFPfaK5c/WhuBNh112XXB6vzXL/eIiFvucWmHeroMB/jK67IX9/1rU6KNOYIeNmytHtQ34hYyUd3N8ycaevNwf93NFItzggy36hzwgTLVpaER8Fdd2WDOOII1FrLDeEMGRpzBDxrVto9qG82bYKvftW8BXJd+jZssAI2BRBkR8ChwMtkTK1QyKMg36hz5sxsft/BeBQsXGij854euP76/B4PuVTKm8FxStCYAthd0QZHqGZYsABuvNEEbHMzbNliwrW11V7vQzo6TO9+880m8G6/HZ55xvx/CyVNL+UWVi65ao1162Dq1OKJbQaTSMdxkiCOpS7tUrYXRLlT3njpX1assGtZbnhwWh4FA4l2qzXXM6dhYEh7QWzYUFiH6ZRm5EiYMcPWc/W1xZLbt7Sk51FQjjEtiUQ6jpMAVRfAInKAiNwlIo+KyDoRuTjRE7S3W0pK1UQPO6TYti0rjHIFG8Dkyf3bTJ5saodo3R074M47Td1QjaxecY1p7npWHp6VrWKkMQJ+G/h7Vf0L4FjgMyJyWGJHX78eTjwxscMNWaICN1ewvf66jZIhOyJ++20bbUbrHnaYPbxz51ZnxoelS82INmWKLQvlC3bXs/LwGTsqRuozYojIz4B/V9XlheqUPSvy/Pnw7W8n0LshQqEZiM87D669Nn+bOXNMkEWNaKHAKzYrMdTGjA/F+u8YPmPHgIk7I0bFDGdxCtAN/AkYVaxe2Ua42bPTN2LVW+nsLLyvXCNaaBBrb+97nPb2ZNJAOtWhUmk8hwDUuhFOREYAPwUuUdV+MyyLyAUislJEVm7ZsqW8g/tIJj7d3bBmDZxwgo14Tz01G2ARRze6ejWMGQOPPJLdFr7iv/VWNkAjk7HP/qpfP7iqpuKk4gcsIsMw4Xu9quaVlqp6DXANmAqiit1rHEIj065dVvIxfDgccUT2T2v+fFNHxH3gzjkHXn3VjFpr12a3h7rgxx6z9XHj4NBDB+/v61SXpP21nT5UXQcsIgL8B/Cyql4Sp03ZOuBSOshGp6UFPvEJ+OEPCyenX7LEjCrPP28GtHvugbFj4fDD4fTTTbDOm2fBDLfd1r99MTe/Kt9TjlNr1PKsyO8BzgVmiMjqoJyW2NFFhobwLSQAm5rg058249nGjfndrXp7bcLMtWttdBuOYBcuhK1brd7SpTb66e7Of55Vq2DixL7bQnWG4zixqLoKQlXvBSoXJbFqFXzkI+aT2sjkjjI7Oy0JTfQ1sZgOL1eAr1uXzW6WbzaLXMv31KnZ6YdCQnWG4zixaLxIuCOPbHzhG9LcDF//uiWeaWvL7/9aKDgh3wg2JG52s61b7dxLltjy5ZeT/X6O0+Ck7gcch7J0wKtXmxAeCohkdbwDSSzT0wN/+EPfbeFIOVzfuRMuvLBwUh3HcfpRyzrgyjJ1ato9qB7RP8+BRCtFR7AjR9po97774s1m4TjOoGm8ETDUdyKe9nbzly3kvRAyebJ5LkyaVDha6Xe/M//ee+5x3azjVJGhOwIGSyher+zYUVz4trTYMsy9UCyxTNRH13GcmqMxE7IfdFDaPUiWGTOyRrAw6XkxT4frr7cSsm5d9q2gDt54HGeo0JgCeP160wW/8ELaPcnyznfCiBHwyivWv9ZWUzU0N2en9smlqQmee66/US132pzcaKVHH7VzRL1BurvhZz9L9Cs5jjM4GlMAd3XVlvAFeOIJW4a+szt32rKQ8AU499x4Hg355jTr6elbx310HafmaEwdcC0b4bZvt2UpVcDEiaZGWLFiYMmw3UfXcWqexhwBDxtWOPlMvRCqDz7+cTOkFZvgMkroD/zww9nR85lnVq6fjuMMmMYcARebt2ygdHTYcas9b9jWreXNW+azFzhO3dCYAjgpmprgtNMsT+4HP2jTs7/xRv8cCEkSupnl0tZWPDevTzTpOHVHYwrgt95K7lgTJ1pmsaihq5jhTCQ7lxoUFqhROjpgwgQ4/3x44AFLrJN7zJ07i+fm9YkmHafuaEwd8EAYP95Gi889Zzrko48unEA89LkNAyZCgTxihJVNm/rOOfbud1tym0MPhaeesvVx48w/N/TdnTkzq+NtazPDWUdHtv6ZZxZPhu2zFzhO3dF4AricV+7997cR6tSp5U9jVCrncPR4oedDlDlzzLsh30wDA43k89kLHKeuaLxcEL29NuLcsKFwna6u+g5XdhynpqnpXBAi8iEReVxEnhKRyxI9eFcXfOhDhferuvB1HKcmqLoAFpEMcBVwKnAY8AkROSzRk2zenF0fPTq7Xgejfcdxhg5p6IDfBTylqusBROTHwBnAH4q2Kgeflt5xnDogDRXE/sCzkc8bg219EJELRGSliKzcsmVL1TrnOI5TLdIQwPkSNfTTDajqNao6XVWnjx07tgrdchzHqS5pCOCNwAGRzxMAt4o5jjPkqLobmog0A08AJwHPAQ8Cn1TVdUXabAHiTHW8D/BiEv1MAO9LYWqpP7XUF6it/tRSX6C2+lOqLxNVteSre9WNcKr6toj8LXAHkAG+X0z4Bm1i6SBEZGUc37tq4H0pTC31p5b6ArXVn1rqC9RWf5LqSyqRcKp6G3BbGud2HMepFRozGY/jOE4d0GgC+Jq0OxDB+1KYWupPLfUFaqs/tdQXqK3+JNKXusgF4TiO04g02gjYcRynbnAB7DiOkxJ1J4BLZVITkVYRWRLsv19EuivYlwNE5C4ReVRE1onIxXnqnCAir4rI6qB8qYL92SAivw/O0y9/pxjfDK7NIyIyrYJ9OSTynVeLyGsicklOnYpdGxH5voi8ICJrI9v2EpHlIvJksOws0Pa8oM6TInJeBfvzDRF5LPgtbhaRMQXaFv1dE+rLV0TkuchvcVqBtolmMizQlyWRfmwQkdUF2iZ6XYJj5n2mK3bvqGrdFMxv+I/AQUALsAY4LKfOAuDbwfrZwJIK9qcLmBasj8QCTHL7cwJwa5WuzwZgnyL7TwNux8LBjwXur+Lv9jzmnF6VawO8H5gGrI1s+zpwWbB+GbAoT7u9gPXBsjNY76xQf04BmoP1Rfn6E+d3TagvXwEujfE7Fn3+kuhLzv7/DXypGtclOGbeZ7pS9069jYD/nElNVXcCYSa1KGcA/xGs/wQ4SUTy5Z8YNKraq6oPB+vbgEfJk1iohjgDuE6N+4AxItJVhfOeBPxRVeNEMyaCqt4DvJyzOXpv/AfwkTxNPwgsV9WXVXUrsBwokmB64P1R1WWqGk4weB8Wll9xClybOMR5/hLrS/DcngncMJhzlNmfQs90Re6dehPAcTKp/blOcHO/Cuxd6Y4Fqo4jgfvz7D5ORNaIyO0i0lPBbiiwTEQeEpEL8uyPlYmuApxN4YeoWtcGYJyq9oI9aMC+eeqkdY3+Cns7yUep3zUp/jZQh3y/wCt2ta/N+4DNqvpkgf0VvS45z3RF7p16E8BxMqnFyraWJCIyAvgpcImqvpaz+2Hs1XsK8C3glgp25T2qOg1Ldv8ZEXl/blfztKn0tWkBZgE35dldzWsTlzSu0ReBt4HrC1Qp9bsmwWLgYGAq0Iu9+vfrap5tlbw2n6D46Ldi16XEM12wWZ5tRa9PvQngOJnU/lxHLPHPaAb2uhULERmG/VDXq2q/TPCq+pqqvh6s3wYME5F9KtEXVd0ULF8AbsZeGaOkkYnuVOBhVd2cu6Oa1yZgc6hyCZYv5KlT1WsUGGpOB+ZqoEjMJcbvOmhUdbOq7lbVPcB3C5yjatcmeHbnAEsK1anUdSnwTFfk3qk3AfwgMFlEJgUjq7OBn+fU+TkQWh8/BtxZ6MYeLIGO6nvAo6r6fwrU2S/UQYvIu7Br/lIF+jJcREaG65iBZ21OtZ8D88Q4Fng1fK2qIAVHMdW6NhGi98Z5wM/y1LkDOEVEOoPX8FOCbYkjIh8C/icwS1XfKFAnzu+aRF+itoDZBc4R5/lLig8Aj6nqxnw7K3VdijzTlbl3krQgVqNglvwnMGvsF4Ntl2M3MUAb9rr7FPAAcFAF+/Je7BXjEWB1UE4DLgIuCur8LbAOsxjfB7y7Qn05KDjHmuB84bWJ9kWw+fj+CPwemF7h36oDE6ijI9uqcm0wod8L7MJGJp/GbAErgCeD5V5B3enA/4u0/avg/nkK+FQF+/MUpjMM753Qe2c8cFux37UCfflhcE88ggmbrty+BJ/7PX9J9yXYfm14n0TqVvS6BMct9ExX5N7xUGTHcZyUqDcVhOM4TsPgAthxHCclXAA7juOkhAtgx3GclHAB7DiOkxIugJ2aR0QmiMjPggxTfxSRK0WkRUTOF5F/T7t/uYjI62n3wakPXAA7NU3gGL8UuEVVJwPvBEYA/1yh86UyUa0zNHEB7NQ6M4A3VfUHAKq6G/gc5vDeARwgIr8MctR+Gf4cJfVfQZKftSJyVrD9KBH5dZC85Y5IaOndIvIvIvJr4ItBntmmYF+HiDwrIsNE5ODgXA+JyG9E5NCgziQR+Z2IPCgiC6t9gZz6xf/tnVqnB3goukFVXxORP2H377uAvwTeAB4Ukf8CJgKbVPXDACIyOojv/xZwhqpuCYTyP2OCHGCMqh4f1J8GHA/cBcwE7lDVXSJyDRad9aSIHANcjf1BXAksVtXrROQzlbsUTqPhAtipdYT8GaXC7ctV9SUAEVmKhZLeBlwhIouwhO+/EZG/xAT18iD9RAYLgQ1ZkrN+FiaAzwauDrJjvRu4SbLppVuD5XuAjwbrP8SSqztOSVwAO7XOOrLCDQARGYVlndpNf+GsqvqEiByFxfD/q4gsw7JlrVPV4wqcZ3tk/edBu72Ao4A7geHAK6o6tUB7j+l3ysZ1wE6tswLoEJF5ACKSwXLVXoupHU4Wm6+rHZul4LciMh54Q1X/E7gCm/LmcWCsiBwXHGeYFEgAr5Yi8wFMtXCrWprG14CnReTjQXsRkSlBk99iI2WAucl+faeRcQHs1DRq2aJmAx8XkSexTFxvAl8IqtyLvfavBn6qqiuBw4EHxCZz/CLwNbUpdD4GLBKRNUH9dxc59RLgHPqqJuYCnw7aryM7Hc/FWELwB7H8044TC8+G5jiOkxI+AnYcx0kJF8CO4zgp4QLYcRwnJVwAO47jpIQLYMdxnJRwAew4jpMSLoAdx3FS4v8DcIsHcsB57rgAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots(figsize=(5, 3))\n", + "ax.plot(results.observed, results.predicted, 'r*')\n", + "ax.set_title('Random Forest')\n", + "ax.set_ylabel('Predicted')\n", + "ax.set_xlabel('Observed')\n", + "fig.tight_layout()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "urbansim_templates", + "language": "python", + "name": "urbansim_templates" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/urbansim_templates/tests/test_cross_validate.py b/urbansim_templates/tests/test_cross_validate.py new file mode 100644 index 0000000..faf2489 --- /dev/null +++ b/urbansim_templates/tests/test_cross_validate.py @@ -0,0 +1,48 @@ +import orca +import numpy as np +import pandas as pd +import pytest + +from urbansim_templates import modelmanager +from urbansim_templates.models.regression import RandomForestRegressionStep, GradientBoostingRegressionStep + +@pytest.fixture +def orca_session(): + + d1 = {'a': np.random.random(100), + 'b': np.random.randint(2, size=100), + 'c': np.random.normal(size=100)} + + obs = pd.DataFrame(d1) + orca.add_table('obs', obs) + +def test_run(orca_session): + """ + For now this just tests that the code runs. + + """ + modelmanager.initialize() + + # random forest + rf = RandomForestRegressionStep(out_transform=np.exp, out_column='a') + rf.tables = 'obs' + rf.n_splits = 10 + rf.model_expression = 'a ~ b + c' + + rf.name = 'cross_validate_rf_-test' + + rf.fit() + rf.cross_validate_score() + + # gradient boosting + gb = GradientBoostingRegressionStep(out_transform=np.exp, out_column='a') + gb.tables = 'obs' + gb.n_splits = 10 + gb.model_expression = 'a ~ b + c' + + gb.name = 'cross_validate_gb_-test' + + + + + \ No newline at end of file diff --git a/urbansim_templates/tests/test_gradient_boosting.py b/urbansim_templates/tests/test_gradient_boosting.py new file mode 100644 index 0000000..4377d9a --- /dev/null +++ b/urbansim_templates/tests/test_gradient_boosting.py @@ -0,0 +1,40 @@ +import orca +import numpy as np +import pandas as pd +import pytest + +from urbansim_templates import modelmanager +from urbansim_templates.models.regression import GradientBoostingRegressionStep + +# load rental data +@pytest.fixture +def orca_session(): + + d1 = {'a': np.random.random(100), + 'b': np.random.randint(2, size=100), + 'c': np.random.normal(size=100)} + + obs = pd.DataFrame(d1) + orca.add_table('obs', obs) + +def test_run(orca_session): + """ + For now this just tests that the code runs. + + """ + modelmanager.initialize() + + # gradient boosting + gb = GradientBoostingRegressionStep(out_transform=np.exp, out_column='a') + gb.tables = 'obs' + gb.n_splits = 10 + gb.model_expression = 'a ~ b + c' + + gb.name = 'gb_-test' + + gb.fit() + gb.cross_validate_score() + modelmanager.register(gb) + + + \ No newline at end of file diff --git a/urbansim_templates/tests/test_large_multinomial_logit.py b/urbansim_templates/tests/test_large_multinomial_logit.py index 7203f56..836efe3 100644 --- a/urbansim_templates/tests/test_large_multinomial_logit.py +++ b/urbansim_templates/tests/test_large_multinomial_logit.py @@ -46,4 +46,6 @@ def test_observation_sampling(orca_session): m = modelmanager.get_step('mnl-test') assert(m.chooser_sample_size == 5) - modelmanager.remove_step('mnl-test') \ No newline at end of file + modelmanager.remove_step('mnl-test') + + diff --git a/urbansim_templates/tests/test_neuro_net.py b/urbansim_templates/tests/test_neuro_net.py new file mode 100644 index 0000000..9cce8c9 --- /dev/null +++ b/urbansim_templates/tests/test_neuro_net.py @@ -0,0 +1,48 @@ +import orca +import numpy as np +import pandas as pd +import pytest + +from urbansim_templates import modelmanager +from urbansim_templates.models.neuro_network import NeuralNetworkStep + +# load rental data +rental = pd.read_csv('data\\rentals_with_nodes.csv') +node_small = pd.read_csv('data\\nodessmall_vars.csv') +node_walk = pd.read_csv('data\\nodeswalk_vars.csv') + +data = pd.merge(rental, node_small, left_on='node_id_small', right_on='osmid') +data = pd.merge(data, node_walk, left_on='node_id_walk', right_on='osmid') + +# add columns -- we need a way to register those transformations in the model +data['log_rent_sqft'] = np.log(data.rent_sqft) + + +# register data in orca +orca.add_table('rental_prices', data) + +def test_nnet(): + """ + For now this just tests that the code runs. + + """ + modelmanager.initialize() + + # gradient boosting + nn = NeuralNetworkStep(out_transform=np.exp, out_column='rent_sqft') + nn.tables = 'rental_prices' + nn.n_splits = 10 + nn.model_expression = 'np.log(rent_sqft) ~ bedrooms + np.log1p(units_500_walk) + np.log1p(rich_500_walk) + np.log1p(singles_500_walk)' + + nn.name = 'mlp-test' + + nn.fit(verbose=0, epochs=4, batch_size=16) + nn.cross_validate_score(verbose=0, epochs=4, batch_size=16) + print(nn.cv_metric) + modelmanager.register(nn) + + + +if __name__ == '__main__': + test_nnet() + diff --git a/urbansim_templates/tests/test_random_forest.py b/urbansim_templates/tests/test_random_forest.py new file mode 100644 index 0000000..30874db --- /dev/null +++ b/urbansim_templates/tests/test_random_forest.py @@ -0,0 +1,74 @@ +import orca +import numpy as np +import pandas as pd +import pytest + +from urbansim_templates import modelmanager +from urbansim_templates.models.regression import RandomForestRegressionStep + + +@pytest.fixture +def orca_session(): + + d1 = {'a': np.random.random(100), + 'b': np.random.randint(2, size=100), + 'c': np.random.normal(size=100)} + + obs = pd.DataFrame(d1) + orca.add_table('obs', obs) + + +def test_run(orca_session): + """ + For now this just tests that the code runs. + + """ + modelmanager.initialize() + + m = RandomForestRegressionStep() + m.tables = 'obs' + m.model_expression = 'b ~ a + c' + + m.fit() + + m.name = 'random_forest-test' + modelmanager.register(m) + + modelmanager.initialize() + m = modelmanager.get_step('random_forest-test') + +def test_cross_validate(orca_session): + """ + Initialize and run cross validation + + Test + --------- + Test whether after cross validations validation metrics are added + """ + + modelmanager.initialize() + m = modelmanager.get_step('random_forest-test') + m.cross_validate_score(n_splits=5) + + assert m.cv_metric + + +def test_simulate(orca_session): + """ + Fit and run + + Test + ------------ + Test whether the output variable has been replaced in model.tables + """ + modelmanager.initialize() + m = modelmanager.get_step('random_forest-test') + + d1 = orca.get_table(m.tables).to_frame(columns='b') + m.run() + d2 = orca.get_table(m.tables).to_frame(columns='b') + assert ~ d2.equals(d1) + + + + diff --git a/urbansim_templates/tests/test_regression.py b/urbansim_templates/tests/test_regression.py index 5a73a76..2afbbfe 100644 --- a/urbansim_templates/tests/test_regression.py +++ b/urbansim_templates/tests/test_regression.py @@ -35,4 +35,8 @@ def test_ols(orca_session): modelmanager.initialize() m = modelmanager.get_step('ols-test') - modelmanager.remove_step('ols-test') \ No newline at end of file + modelmanager.remove_step('ols-test') + +if __name__ == '__main__': + session = orca_session() + test_ols(session) diff --git a/urbansim_templates/tests/test_small_multinomial_logit.py b/urbansim_templates/tests/test_small_multinomial_logit.py index 6475a8d..d7bcbb7 100644 --- a/urbansim_templates/tests/test_small_multinomial_logit.py +++ b/urbansim_templates/tests/test_small_multinomial_logit.py @@ -39,4 +39,8 @@ def test_small_mnl(orca_session): modelmanager.initialize() m = modelmanager.get_step('small-mnl-test') - modelmanager.remove_step('small-mnl-test') \ No newline at end of file + modelmanager.remove_step('small-mnl-test') + +if __name__ == '__main__': + session = orca_session() + test_small_mnl(session) diff --git a/urbansim_templates/tests/test_structure.py b/urbansim_templates/tests/test_structure.py new file mode 100644 index 0000000..a1f7e8f --- /dev/null +++ b/urbansim_templates/tests/test_structure.py @@ -0,0 +1,38 @@ +import orca +import numpy as np +import pandas as pd +import pytest + +from urbansim_templates import modelmanager +from urbansim_templates.models import BinaryLogitStep + +d1 = {'a': np.random.random(100), + 'b': np.random.randint(2, size=100)} + +obs = pd.DataFrame(d1) +orca.add_table('obs', obs) + + +def test_binary_logit(): + """ + For now this just tests that the code runs. + + """ + modelmanager.initialize() + + m = BinaryLogitStep() + m.tables = 'obs' + m.model_expression = 'b ~ a' + + m.fit() + + m.name = 'binary-test' + modelmanager.register(m) + + modelmanager.initialize() + m = modelmanager.get_step('binary-test') + + +if __name__ == '__main__': + test_binary_logit() + \ No newline at end of file diff --git a/urbansim_templates/tests/test_utils.py b/urbansim_templates/tests/test_utils.py index 93bf72d..9718f6f 100644 --- a/urbansim_templates/tests/test_utils.py +++ b/urbansim_templates/tests/test_utils.py @@ -1,7 +1,20 @@ import pytest +import orca +import numpy as np +import pandas as pd from urbansim_templates import utils +from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor +@pytest.fixture +def orca_session(): + + d1 = {'a': np.random.random(100), + 'b': np.random.randint(2, size=100), + 'c': np.random.normal(size=100)} + + obs = pd.DataFrame(d1) + orca.add_table('obs', obs) def test_version_parse(): assert utils.version_parse('0.1.0.dev0') == (0, 1, 0, 0) @@ -18,4 +31,86 @@ def test_version_greater_or_equal(): assert utils.version_greater_or_equal('1.1.2', '1.1.3') == False assert utils.version_greater_or_equal('1.1.3', '1.1.3') == True assert utils.version_greater_or_equal('1.1.3.dev1', '1.1.3.dev0') == True - assert utils.version_greater_or_equal('1.1.3.dev0', '1.1.3') == False \ No newline at end of file + assert utils.version_greater_or_equal('1.1.3.dev0', '1.1.3') == False + +def test_convert_to_model(orca_session): + """ + Create a model from sklearn and transform it into a urbansim_templates models + + Test + ------------------ + Check wether model.fit, model.predict have dataframe as argument after conversion + + """ + d1 = orca.get_table('obs') + + rf = RandomForestRegressor() + rf_new = utils.convert_to_model(rf, ' a ~ b + c') + + rf_new.fit(d1) + rf_new.predict(d1) + +def test_fit_keywords(orca_session): + """ + Create a model from sklearn and transform it into a urbansim_templates models + + Test + ------------------ + Check wether keywords in the original fit method are transfered to model.fit + + """ + d1 = orca.get_table('obs') + + rf = RandomForestRegressor() + rf_new = utils.convert_to_model(rf, 'a ~ b + c') + + weights = np.ones(len(d1)) + rf_new.fit(d1, sample_weight=weights) + + +def test_parsing(orca_session): + """ + From a model expression in patsy format, extract corresponding columns from a data + + Test + ------------------------------------- + test whether from_patsy_to_array returns df['a'] and df[['b', 'c']] from 'a ~ b + c' + + """ + d = orca.get_table('obs').to_frame() + d_x, d_y = utils.from_patsy_to_array('a ~ b + c', d) + + assert (d_x == d[['b', 'c']]).all().all() + assert d_y.equals(d[['a']]) + +def test_parsing_transform(orca_session): + """ + From a model expression in patsy format, extract corresponding columns from a data + + Test + ------------------------------------- + test whether from_patsy_to_array returns df['np.exp(a)'] and df[[np.log1p(np.exp(b)), 'c']] + from 'np.exp(a) ~ np.log1p(np.exp(b))+ c' + + """ + d = orca.get_table('obs').to_frame() + d_x, d_y = utils.from_patsy_to_array('np.exp(a) ~ np.log1p(np.exp(b)) + c', d) + + assert (d_x.columns == ['np.log1p(np.exp(b))', 'c']).all() + assert d_y.columns == ['np.exp(a)'] + + +def test_parsing_space(orca_session): + """ + From a model expression in patsy format, extract corresponding columns from a data + after patsy cleans the expression by removing extra space + Test + ------------------------------------- + test whether from_patsy_to_array returns df['a'] and df[['b', 'c']] from ' a ~ b + c' + + """ + d = orca.get_table('obs').to_frame() + d_x, d_y = utils.from_patsy_to_array(' a ~ b + c', d) + + assert (d_x == d[['b', 'c']]).all().all() + assert d_y.equals(d[['a']]) diff --git a/urbansim_templates/utils.py b/urbansim_templates/utils.py index 5e1a8f4..7dba823 100644 --- a/urbansim_templates/utils.py +++ b/urbansim_templates/utils.py @@ -1,5 +1,7 @@ from __future__ import print_function - +import numpy as np +import pandas as pd +from patsy import dmatrices def version_parse(v): """ @@ -82,5 +84,133 @@ def version_greater_or_equal(a, b): return True return False + +def from_patsy_to_array(model_expression, data): + """From a patsy string to the corresponding column in data + + This function looks at each variable names in a + patsy sting, evaluate potential numpy functions applied to that variable + and select the corresponding columns in data + + This is convenient utility to register transformation + as part of the model + + Example + -------- + >>> from urbansim_templates.utils import from_patsy_to_array + >>> import pandas as pd + >>> model_expression = 'np.log(y) ~ np.log1p(x)' + >>> data = pd.DataFrame([[0, 0, 3], [0,2, 4]]) + >>> data.columns =['x', 'y', 'z'] + >>> df_x, df_y = from_patsy_to_array(model_expression, data) + + df_x: np.log1p(x) + 0 0.0 + 1 0.0 + + df_y: + np.log(y) + 0 0.000000 + 1 0.693147 + + + Arguments + -------- + model_expression: a patsy string + model formula "lhs' ~ 'rhs' with possibly numpy function applied to + rhs and/or lhs variables + + data: pandas dataframe + dataframe with columns including all variable in model_expression + + Returns + -------- + df_x: pandas dataframe + a dataframe with transformed rhs variables + + df_y: pandas series + a series with tranformed lhs variable + + To do + ------- + The issue is arbitrary code execution, so we need to restrain + the functions allowed. Right now use only functions implemented in + numpy via patsy + """ + matrix_variables = dmatrices(model_expression + ' - 1', data, return_type='dataframe') + df_x = pd.DataFrame(matrix_variables[1], index=data.index) + df_y = pd.DataFrame(matrix_variables[0], index=data.index) + + return df_x, df_y + + +def convert_to_model(model, model_expression, ytransform=None): + """Convert model to the right format + + This function returns a model with a fit and predict method compatible with the + TemplateStep structure. + + Example + ---------- + >>> from sklearn.ensemble import RandomForestRegressor + >>> model = RandomForestRegressor() + >>> model_expression = 'np.log(y) ~ np.log1p(x)' + >>> new_model = convert_to_model(model, model_expression) + >>> trainX = np.array(data['X']) + >>> trainY = np.array(data['Y'] + Compare new_model.fit(data) to model.fit(np.log1p(trainX), np.log(trainY)) + + Arguments + --------- + model: object + model with a fit and predict method + + model_expression: a patsy string + model formula "lhs' ~ 'rhs' with possibly numpy function applied to + rhs and/or lhs variables + + Keywords: + --------- + ytransform: numpy function + applied to the output once predicted + + Returns + --------- + object + model with modified fit and predict methods + """ + model.fit_previous = model.fit + model.predict_previous = model.predict + + def fit(data, **keywords): + + # transform data using patsy dmatrix + df_x, df_y = from_patsy_to_array(model_expression, data) + + trainX = df_x + trainY = np.ravel(np.array(df_y)) + + # model right habd side variables + model.rhs = list(df_x.columns) + + return model.fit_previous(trainX, trainY, **keywords) + + def predict(data): + + # transform data using patsy dmatrix + df_x, _ = from_patsy_to_array(model_expression, data) + + testX = np.array(df_x) + values = model.predict_previous(testX).ravel() + + if ytransform: + values = ytransform(values) + + return pd.Series(pd.Series(values, index=data.index)) + + model.fit = fit + model.predict = predict + return model + + - \ No newline at end of file