diff --git a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/README.tpl.md b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/README.tpl.md index 60aa98d..882eefe 100644 --- a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/README.tpl.md +++ b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/README.tpl.md @@ -116,9 +116,9 @@ The bash command will access the Dockerfile in the folder, create the image and To test the images in ECR, execute the following notebooks: -- project-name/src/ml/notebooks/Sagemaker_Processor.ipynb -- project-name/src/ml/notebooks/Sagemaker_Train.ipynb -- project-name/src/ml/notebooks/Sagemaker_Inference.ipynb +- project-name/src/ml/notebooks/1_Sagemaker_Processor.ipynb +- project-name/src/ml/notebooks/2_Sagemaker_Train.ipynb +- project-name/src/ml/notebooks/3_Sagemaker_Inference.ipynb ## Stepfunctions @@ -240,5 +240,5 @@ Next, create and attach another new policy to the role you created: To create and test the Step Functions state machines, execute the following notebooks: -- project-name/src/ml/notebooks/Sagemaker_StepFunctions_Train.ipynb -- project-name/src/ml/notebooks/Sagemaker_StepFunctions_Inference.ipynb \ No newline at end of file +- project-name/src/ml/notebooks/4_Sagemaker_StepFunctions_Train.ipynb +- project-name/src/ml/notebooks/5_Sagemaker_StepFunctions_Inference.ipynb \ No newline at end of file diff --git a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/inference/handler.py b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/inference/handler.py index b6bdc50..b0a0ce5 100644 --- a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/inference/handler.py +++ b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/inference/handler.py @@ -3,9 +3,9 @@ import os import logging -import pandas as pd from joblib import load from six import StringIO +import pandas as pd from ml.model.wrapper import Wrapper from sagemaker_inference.default_inference_handler import DefaultInferenceHandler @@ -17,49 +17,106 @@ # Path to access the model MODEL_DIR = '/opt/ml/model' -def _csv_to_pandas(string_like): # type: (str) -> pd.DataFrame - """Convert a CSV object to a pandas DataFrame. - Args: - string_like (str): CSV string. - - Returns: - (pd.DataFrame): pandas DataFrame - """ + +def _csv_to_pandas(string_like): + """ + Convert a CSV object to a pandas DataFrame. + + Parameters + ---------- + string_like : String + CSV string. + + Returns + ------- + pd.DataFrame : pandas DataFrame + """ stream = StringIO(string_like) res = pd.read_csv(stream) return res + class HandlerService(DefaultHandlerService, DefaultInferenceHandler): """ - Execute the inference step in the virtual environment - + Execute the inference step in the virtual environment + """ def __init__(self): op = transformer.Transformer(default_inference_handler=self) super(HandlerService, self).__init__(transformer=op) - - # Loads the model from the disk + def default_model_fn(self, model_dir): - logging.info('Loading the model') + """ + Loads the model from the disk + + Parameters + ---------- + model_dir : string + Path of the model + + Returns + ------- + pkl : model + """ + logging.info('Loading the model') return load(os.path.join(MODEL_DIR, "model.pkl")) - - # Parse and check the format of the input data + def default_input_fn(self, input_data, content_type): + """ + Parse and check the format of the input data + + Parameters + ---------- + input_data : string + CSV string + content_type : string + Type of the file + + Returns + ------- + pd.DataFrame : pandas DataFrame + """ global colunas if content_type != "text/csv": raise Exception("Invalid content-type: %s" % content_type) - return _csv_to_pandas(input_data) - - # Run our model and do the prediction + return _csv_to_pandas(input_data) + def default_predict_fn(self, df, model): - logging.info('Predicting...') - resultados = model.predict(df,included_input=True) - logging.info('Prediction Complete') + """ + Run our model and do the prediction + + Parameters + ---------- + df : pd.DataFrame + Data to be predicted + model : pkl + Model to predict the data + + Returns + ------- + pd.DataFrame : pandas DataFrame + """ + logging.info('Predicting...') + resultados = model.predict(df, included_input=True) + logging.info('Prediction Complete') return resultados.reset_index(drop=True).T.reset_index().T - - # Gets the prediction output and format it to be returned to the user + def default_output_fn(self, prediction, accept): - logging.info('Saving') + """ + Gets the prediction output and format it to be returned to the user + + Parameters + ---------- + prediction : pd.DataFrame + Predicted dataset + accept : string + Output type + + Returns + ------- + CSV : CSV file + """ + logging.info('Saving') if accept != "text/csv": raise Exception("Invalid accept: %s" % accept) - return encoder.encode(prediction, accept) \ No newline at end of file + return encoder.encode(prediction, accept) diff --git a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/inference/main.py b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/inference/main.py index 9ff9b2a..803a9e9 100644 --- a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/inference/main.py +++ b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/inference/main.py @@ -1,12 +1,10 @@ -import argparse import sys import os +import argparse import logging from sagemaker_inference import model_server logging.getLogger().setLevel(logging.INFO) - if __name__ == "__main__": - - model_server.start_model_server(handler_service="serving.handler") \ No newline at end of file + model_server.start_model_server(handler_service="serving.handler") diff --git a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/processor/preprocessor.py b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/processor/preprocessor.py index c78c24b..bb269eb 100644 --- a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/processor/preprocessor.py +++ b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/processor/preprocessor.py @@ -1,71 +1,101 @@ -from ml.preprocessing.preprocessing import Preprocessing -from ml.preprocessing.dataquality import DataQuality -from ml.data_source.spreadsheet import Spreadsheet -import great_expectations as ge -from datetime import date -import pandas as pd import argparse import logging +from datetime import date + +import pandas as pd import glob import json from joblib import dump, load +import great_expectations as ge + +from ml.preprocessing.preprocessing import Preprocessing +from ml.preprocessing.dataquality import DataQuality +from ml.data_source.spreadsheet import Spreadsheet logging.getLogger().setLevel('INFO') -if __name__=='__main__': +path_input = '/opt/ml/processing/input/' +path_output = '/opt/ml/processing/output/' +date = date.today().strftime('%Y%m%d') + +def data_quality(df, step_train): + """ + If True, it creates the DataQuality object, + otherwise it loads an existing one + + Parameters + ---------- + df : pd.Dataframe + Train or test dataset + step_train : boolean + Train or test + """ - Execute the processor step in the virtual environment - + if step_train: + dq = DataQuality(discrete_cat_cols=['Sex', 'Pclass', 'Survived']) + df_ge = dq.perform(df) + df_ge.save_expectation_suite(path_output + + 'expectations/expectations.json') + else: + df_ge = ge.dataset.PandasDataset(df) + ge_val = df_ge.validate(expectation_suite=path_input + + 'expectations/expectations.json', + only_return_failures=False) + with open(f'{path_output}validations/{date}.json', 'w') as f: + json.dump(ge_val.to_json_dict(), f) + + +def preprocessing(df, step_train): + """ + If True, it creates the Preprocessing object, + otherwise it loads an existing one + + Parameters + ---------- + df : pd.Dataframe + Train or test dataset + step_train : boolean + Train or test + + """ + if step_train: + norm_cols = {'min-max': ['Age']} + oneHot_cols = ['Pclass', 'Sex'] + p = Preprocessing(norm_cols, oneHot_cols) + train, test_train = p.execute(df, step_train=True, val_size=0.2) + logging.info("Saving") + dump(p, path_output+'preprocessing/preprocessing.pkl') + train.to_csv(path_output+'processed/train/train.csv', index=False) + test_train.to_csv(path_output+'processed/val/val.csv', index=False) + else: + p = load(path_input+'preprocessing/preprocessing.pkl') + test = p.execute(df, step_train=False) + logging.info("Saving") + test.to_csv(path_output+'processed/inference/inference.csv', + index=False) + + +if __name__ == '__main__': + """ + Execute the processor step in the virtual environment + """ logging.info('Starting the preprocessing') - + # Read the step argument (train or test) parser = argparse.ArgumentParser() parser.add_argument('--step', type=str, default='train') - args = parser.parse_args() + args = parser.parse_args() step_train = True if args.step == "train" else False logging.info(f'step_train: {step_train}') - + logging.info('Reading the inputs') - file = glob.glob("/opt/ml/processing/input/raw_data/*.csv")[0] + file = glob.glob(path_input+"raw_data/*.csv")[0] logging.info(f'Reading file: {file}') df = Spreadsheet().get_data(file) - + logging.info("Data Quality") - # If True, it creates the DataQuality object, otherwise it loads an existing one - if step_train: - dq = DataQuality(discrete_cat_cols=['Sex', 'Pclass']) - df_ge = dq.perform(df, target='Survived') - df_ge.save_expectation_suite('/opt/ml/processing/output/expectations/expectations.json') - else: - date = date.today().strftime('%Y%m%d') - df_without_target = df.copy() - if 'Survived' in df_without_target.columns: - df_without_target.drop(columns=['Survived'], inplace=True) - df_ge = ge.dataset.PandasDataset(df_without_target) - ge_val = df_ge.validate(expectation_suite='/opt/ml/processing/input/expectations/expectations.json', only_return_failures=False) - with open(f'/opt/ml/processing/output/validations/{date}.json', 'w') as f: - json.dump(ge_val.to_json_dict(), f) + data_quality(df, step_train) logging.info("Preprocessing") - # If True, it creates the Preprocessing object, otherwise it loads an existing one - if step_train: - norm_cols = {'min-max': ['Age']} - oneHot_cols = ['Pclass','Sex'] - p = Preprocessing(norm_cols, oneHot_cols) - train, test_train = p.execute(df, step_train = True, val_size = 0.2) - else: - p = load("/opt/ml/processing/input/preprocessing/preprocessing.pkl") - test = p.execute(df, step_train = False) - - logging.info("Saving") - # If True, it saves the Preprocessing to be used later in the inference step - if step_train: - dump(p, '/opt/ml/processing/output/preprocessing/preprocessing.pkl') - - # If True, it saves the train and val files, otherwise it saves only the inference file - if step_train: - train.to_csv('/opt/ml/processing/output/processed/train/train.csv', index=False) - test_train.to_csv('/opt/ml/processing/output/processed/val/val.csv', index=False) - else: - test.to_csv('/opt/ml/processing/output/processed/inference/inference.csv', index=False) \ No newline at end of file + preprocessing(df, step_train) diff --git a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/model/metrics.py b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/model/metrics.py index 34cd079..f9ed342 100644 --- a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/model/metrics.py +++ b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/model/metrics.py @@ -3,210 +3,220 @@ from sklearn.metrics import make_scorer from sklearn.model_selection import cross_validate + class Metrics: - + @classmethod def smape(cls, A, F): """ - Calculates the smape value between the real and the predicted - - Parameters - ---------- + Calculates the smape value between the real and the predicted + + Parameters + ---------- A : array Target values F : array Predicted values - - Returns - ------- - float: smape value - """ + + Returns + ------- + float: smape value + """ return 100/len(A) * np.sum(np.abs(F - A) / (np.abs(A) + np.abs(F))) - + @classmethod def __custom_score(cls, y_true, y_pred): """ - Creates a custom metric - - Parameters - ---------- + Creates a custom metric + + Parameters + ---------- y_true : array Target values y_pred : array Predicted values - - Returns - ------- - sklearn.metrics - """ + + Returns + ------- + sklearn.metrics + """ #return sklearn.metrics.fbeta_score(y_true, y_pred, 2) pass - + @classmethod def customized(cls, y_true, y_pred): """ - Creates a custom metric - - Parameters - ---------- + Creates a custom metric + + Parameters + ---------- y_true : array Target values y_pred : array Predicted values - - Returns - ------- - float - """ + + Returns + ------- + float + """ custom_metric = make_scorer(cls.__custom_score, greater_is_better=True) return custom_metric - + @classmethod def mape(cls, y_true, y_pred): """ - Calculates the map value between the real and the predicted - - Parameters - ---------- + Calculates the map value between the real and the predicted + + Parameters + ---------- y_true : array Target values y_pred : array Predicted values - - Returns - ------- - float : value of mape - """ + + Returns + ------- + float : value of mape + """ y_true, y_pred = np.array(y_true), np.array(y_pred) return np.mean(np.abs(((y_true+1) - (y_pred+1)) / (y_true+1))) * 100 - @classmethod def regression(cls, y_true, y_pred): """ - Calculates some metrics for regression problems - - Parameters - ---------- + Calculates some metrics for regression problems + + Parameters + ---------- y_true : array Target values y_pred : array Predicted values - - Returns - ------- - dict : metrics results - """ - results = {'mean_absolute_error': round(mean_absolute_error(y_true, y_pred), 7), - 'root_mean_squared_error': round(np.sqrt(mean_squared_error(y_true, y_pred)), 7), - 'r2': round(r2_score(y_true, y_pred), 7), - 'smape': round(cls.smape(y_true, y_pred), 7), - 'mape': round(cls.mape(y_true, y_pred), 7) - } + + Returns + ------- + dict : metrics results + """ + results = {'mean_absolute_error': round(mean_absolute_error( + y_true, y_pred), 7), + 'root_mean_squared_error': round(np.sqrt( + mean_squared_error(y_true, y_pred)), 7), + 'r2': round(r2_score(y_true, y_pred), 7), + 'smape': round(cls.smape(y_true, y_pred), 7), + 'mape': round(cls.mape(y_true, y_pred), 7) + } return results - + @classmethod - def crossvalidation(cls, model, X, y, classification: bool, cv=5, agg=np.mean): + def crossvalidation(cls, model, X, y, classification: bool, + cv=5, agg=np.mean): if classification: if len(set(y)) > 2: - metrics = ['accuracy','f1_weighted', 'recall_weighted','precision_weighted'] + metrics = ['accuracy', 'f1_weighted', + 'recall_weighted', 'precision_weighted'] else: - metrics = ['accuracy','f1', 'recall','precision', 'roc_auc'] + metrics = ['accuracy', 'f1', 'recall', 'precision', 'roc_auc'] else: - metrics = ['mean_absolute_error', 'r2', 'root_mean_squared_error', 'smape', 'mape'] - res_metrics = cross_validate(model, X, y, cv=cv, return_train_score=False, scoring=metrics) - results = {metric.replace("test_", ""): round(agg(res_metrics[metric]),7) for metric in res_metrics} + metrics = ['mean_absolute_error', 'r2', 'root_mean_squared_error', + 'smape', 'mape'] + res_metrics = cross_validate(model, X, y, cv=cv, + return_train_score=False, + scoring=metrics) + results = {metric.replace("test_", ""): round(agg( + res_metrics[metric]), 7) + for metric in res_metrics} return results @classmethod def __multiclass_classification(cls, y_true, y_pred): """ - Calculates some metrics for multiclass classification problems - - Parameters - ---------- + Calculates some metrics for multiclass classification problems + + Parameters + ---------- y_true : array Target values y_pred : array Predicted values - - Returns - ------- - dict : metrics results - """ - results = {'accuracy': accuracy_score(y_true, y_pred), - 'f1': f1_score(y_true, y_pred, average='weighted'), - 'precision': precision_score(y_true, y_pred, average='weighted'), - 'recall': recall_score(y_true, y_pred, average='weighted'), - } + + Returns + ------- + dict : metrics results + """ + results = {'accuracy': accuracy_score(y_true, y_pred), + 'f1': f1_score(y_true, y_pred, average='weighted'), + 'precision': precision_score(y_true, y_pred, + average='weighted'), + 'recall': recall_score(y_true, y_pred, + average='weighted')} return results - + @classmethod def __binary_classification(cls, y_true, y_pred, y_probs): """ - Calculates some metrics for binary classification problems - - Parameters - ---------- + Calculates some metrics for binary classification problems + + Parameters + ---------- y_true : array Target values y_pred : array Predicted values - - Returns - ------- - dict : metrics results - """ - results = {'accuracy': accuracy_score(y_true, y_pred), - 'f1': f1_score(y_true, y_pred), - 'precision': precision_score(y_true, y_pred), - 'recall': recall_score(y_true, y_pred), - 'roc_auc': roc_auc_score(y_true, y_probs) - } + + Returns + ------- + dict : metrics results + """ + results = {'accuracy': accuracy_score(y_true, y_pred), + 'f1': f1_score(y_true, y_pred), + 'precision': precision_score(y_true, y_pred), + 'recall': recall_score(y_true, y_pred), + 'roc_auc': roc_auc_score(y_true, y_probs)} return results - + @classmethod def classification(cls, y_true, y_pred, y_probs): """ - Checks which classification method will be applied: binary or multiclass - - Parameters - ---------- + Checks which classification method will be applied: + binary or multiclass + + Parameters + ---------- y_true : array Target values y_pred : array Predicted values y_probs : array Probabilities values - - Returns - ------- - dict: metrics results - """ + + Returns + ------- + dict: metrics results + """ if len(set(y_true)) > 2: results = cls.__multiclass_classification(y_true, y_pred) else: results = cls.__binary_classification(y_true, y_pred, y_probs) return results - - + @classmethod def clusterization(cls, X, labels): """ - Calculates some metrics on clustering quality - - Parameters - ---------- + Calculates some metrics on clustering quality + + Parameters + ---------- X : array[array], shape (n_linha, n_colunas) Matrix with the values that were used in the cluster labels : array, shape (n_linha, 1) - Vector with labels selected by the clustering method (eg KMeans) - - Returns - ------- - dict : metrics results - """ - results = {'silhouette': silhouette_score(X, labels, metric='euclidean'), - 'calinski_harabaz': calinski_harabaz_score(X, labels) - } - return results \ No newline at end of file + Vector with labels selected by the clustering method + (eg KMeans) + + Returns + ------- + dict : metrics results + """ + results = {'silhouette': silhouette_score(X, labels, + metric='euclidean'), + 'calinski_harabaz': calinski_harabaz_score(X, labels)} + return results diff --git a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/model/trainer.py b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/model/trainer.py index 1266611..e73706c 100644 --- a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/model/trainer.py +++ b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/model/trainer.py @@ -1,104 +1,253 @@ -from abc import ABC, abstractmethod -from ml.model.wrapper import Wrapper -from ml.model.metrics import Metrics -import statsmodels.formula.api as smf -from sklearn.model_selection import train_test_split -import numpy as np - -class Trainer(ABC): - def __init__(self): - """ - Constructor - - Parameters - ---------- +from joblib import dump, load +from datetime import date +import mlflow.pyfunc +from mlflow import pyfunc +from interpret.ext.blackbox import TabularExplainer, MimicExplainer +from interpret.ext.glassbox import * +import pandas as pd + +from util import load_yaml, load_json + + +class Wrapper(mlflow.pyfunc.PythonModel): + def __init__(self, model=None, metrics=None, columns=None): + """ + Constructor + + Parameters + ---------- + model : object + If it's just a model: enter all parameters + if it is more than one model: do not enter parameters + and use the add method to add each of the models + metrics : dict + Dictionary with the metrics of the result + of the model + columns : list + list with columns names + Returns + ------- + WrapperModel + """ + self.artifacts = dict() + self.artifacts["model"] = model + self.artifacts["metrics"] = metrics + self.artifacts["columns"] = columns + self.artifacts["creation_date"] = date.today() + + def predict(self, model_input, included_input=False): + """ + Method that returns the result of the prediction on a dataset + + Parameters + ---------- + df : pd.DataFrame + Data to be predicted + + Returns + ------- + list + """ + df_processed = model_input.copy() + model = self.artifacts["model"] + columns = self.artifacts["columns"] + result = model.predict(df_processed[columns]) + if included_input: + model_input['predict'] = result + result = model_input + return result + + def predict_proba(self, model_input, binary=False): + """ + Method that returns the result of the prediction on a dataset + + Parameters + ---------- + df : pd.DataFrame + data to be predicted + + Returns + ------- + list + """ + df_processed = model_input.copy() + model = self.artifacts["model"] + columns = self.artifacts["columns"] + if binary: + return model.predict_proba(df_processed[columns])[:, 1] + else: + return model.predict_proba(df_processed[columns]) + + def save_model(self, path): + """ + Saves the model object to a specific path + + Parameters + ---------- + path : str + path where the model object will be saved + + Returns + ------- None - - Returns - ------- - Trainer - """ - - @abstractmethod - def train(self): - """ - Abstract method that should be implemented in every class that inherits TrainerModel - Parameters - ---------- + """ + dump(self, path) + + @staticmethod + def load_model(path): + """ + Loads the model object in a specific path + + Parameters + ---------- + path : str + path where the model object will be loaded. + + Returns + ------- None - - Returns - ------- - None - """ - pass - -class TrainerSklearn(Trainer): - - def train(self, train, val, y_name, - classification: bool, - algorithm, - columns = None, - **params): - """ - Method that builds the Sklearn model - - Parameters - ---------- - train : pd.Dataframe - data to train the model - val : pd.Dataframe - data to validate the model - y_name : str - target name - algorithm : Sklearn algorithm - algorithm to be trained - classification : bool - if True, classification model training takes place, otherwise Regression - columns : array - columns name to be used in the train - - Returns - ------- - Wrapper - """ - model = algorithm(**params) #model - y_train = train[y_name] - y_val = val[y_name] - X_train = train[columns] - X_val = val[columns] - model.fit(X_train,y_train) - y_pred = model.predict(X_val) - y_probs = model.predict_proba(X_val)[:,1] - if classification: - res_metrics = Metrics.classification(y_val.values, y_pred, y_probs) - else: - res_metrics = Metrics.regression(y_val.values, y_pred) - model = Wrapper(model, res_metrics, X_train.columns) - return model - - -class TrainerSklearnUnsupervised(Trainer): - - def train(self, X, - algorithm, - **params): - """ - Method that builds the Sklearn model - - Parameters - ---------- - model_name : str - model name - - Returns - ------- - Wrapper - """ - model = algorithm(**params) #model - columns = list(X.columns) - model.fit(X) - labels = model.predict(X) - res_metrics = Metrics.clusterization(X, labels) - model = Wrapper(model, res_metrics, columns) + """ + model = load(path) return model + + def save(self, path): + """ + Save model as a Wrapper class + + Parameters + ---------- + path : str + path where the model object will be loaded. + + Returns + ------- + None + """ + path_artifacts = path + "_artifacts.pkl" + dump(self.artifacts, path_artifacts) + content = load_json("config/arquivos.json") + conda_env = load_yaml(content["path_yaml"]) + mlflow.pyfunc.save_model( + path=path, + python_model=self, + artifacts={"model": path_artifacts}, + conda_env=conda_env, + ) + + def get_metrics(self): + """ + Return metrics + + Parameters + ---------- + self : object Wrapper + + Returns + ------- + dict + """ + return self.artifacts["metrics"] + + def get_columns(self): + """ + Return columns + + Parameters + ---------- + self : object Wrapper + + Returns + ------- + list + """ + return self.artifacts["columns"] + + def get_model(self): + """ + Return model + + Parameters + ---------- + self : object Wrapper + + Returns + ------- + dict + """ + return self.artifacts["model"] + + def train_interpret(self, X, model="tabular"): + """ + Train a interpret model + + Parameters + ---------- + self : object Wrapper + X : pd.DataFrame + Data that were used in the train for interpret + model : string, optional + Model to use for the interpret [tabular,mimic_LGBME, + mimic_Linear,mimic_SGDE,mimic_Dec_Tree] + Returns + ------- + None + """ + mimic_models = { + "mimic_LGBME": LGBMExplainableModel, + "mimic_Linear": LinearExplainableModel, + "mimic_SGDE": SGDExplainableModel, + "mimic_Dec_Tree": DecisionTreeExplainableModel, + } + if model == "tabular": + explainer = TabularExplainer( + self.artifacts["model"], X, features=self.artifacts["columns"] + ) + else: + explainer = MimicExplainer( + self.artifacts["model"], + X, + mimic_models[model], + augment_data=True, + max_num_of_augmentations=10, + features=self.artifacts["columns"], + ) + self.artifacts["explainer"] = explainer + + def local_interpret(self, X, n_feat=3, norm=True): + """ + Return a local interpret for each row in data + + Parameters + ---------- + self : object Wrapper + X : array[array], shape (n_linha, n_colunas) + Matrix with the data that were used to return interpret + n_feat : int, optional + Number of features to return + norm : bool, optional + if True, do normalization in the features importances + + Returns + ------- + pd.DataFrame + """ + local_explanation = self.artifacts["explainer"].explain_local(X) + n_obs = X.shape[0] + predictions = self.artifacts["model"].predict(X) + local_values = local_explanation.get_ranked_local_values() + local_values = [local_values[predictions[i]][i] for i in range(n_obs)] + local_names = local_explanation.get_ranked_local_names() + local_names = [local_names[predictions[i]][i] for i in range(n_obs)] + if norm: + local_values = [ + [(i - min(l)) / (max(l) - min(l)) for i in l] for l in local_values + ] + result = [ + (local_names[i][:n_feat] + local_values[i][:n_feat]) for i in range(n_obs) + ] + column_names = [ + f"Importance_{item}_{str(i)}" + for item in ["Name", "Value"] + for i in range(n_feat) + ] + return pd.DataFrame(result, columns=column_names) diff --git a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/model/wrapper.py b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/model/wrapper.py index 8f812cf..7aeaf19 100644 --- a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/model/wrapper.py +++ b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/model/wrapper.py @@ -249,4 +249,4 @@ def local_interpret(self, X, n_feat=3, norm=True): for item in ["Name", "Value"] for i in range(n_feat) ] - return pd.DataFrame(result, columns=column_names) \ No newline at end of file + return pd.DataFrame(result, columns=column_names) diff --git a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/Sagemaker_Processor.ipynb b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/1_Sagemaker_Processor.ipynb similarity index 92% rename from hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/Sagemaker_Processor.ipynb rename to hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/1_Sagemaker_Processor.ipynb index ebd6aae..98961ea 100644 --- a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/Sagemaker_Processor.ipynb +++ b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/1_Sagemaker_Processor.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "d1dd5820", + "id": "0080c0d0", "metadata": {}, "source": [ "# Sagemaker Processor" @@ -10,7 +10,7 @@ }, { "cell_type": "markdown", - "id": "fbaaa9e6", + "id": "7d7b0036", "metadata": {}, "source": [ "This script generates the train, val and inference files with the processor previous uploaded in ECR." @@ -18,7 +18,7 @@ }, { "cell_type": "markdown", - "id": "864a2e0f", + "id": "3f2a0229", "metadata": {}, "source": [ "## Import modules" @@ -27,7 +27,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "fa0d1522", + "id": "6e679a79", "metadata": {}, "outputs": [], "source": [ @@ -40,7 +40,7 @@ }, { "cell_type": "markdown", - "id": "43887859", + "id": "a9066e74", "metadata": {}, "source": [ "## Setup" @@ -48,7 +48,7 @@ }, { "cell_type": "markdown", - "id": "4422ac46", + "id": "60ec8b7b", "metadata": {}, "source": [ "Modify according to your configurations." @@ -57,7 +57,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "4d423fcf", + "id": "9d9b2d23", "metadata": {}, "outputs": [], "source": [ @@ -68,7 +68,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "b503dba8", + "id": "1cd1aa77", "metadata": {}, "outputs": [], "source": [ @@ -80,7 +80,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "c00d86d1", + "id": "464d9cec", "metadata": {}, "outputs": [], "source": [ @@ -91,7 +91,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "667c8bb6", + "id": "a0649d24", "metadata": {}, "outputs": [], "source": [ @@ -102,7 +102,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "3b02cf9e", + "id": "f71c6f3c", "metadata": {}, "outputs": [], "source": [ @@ -114,7 +114,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "32c8ab3d", + "id": "db98e9a2", "metadata": {}, "outputs": [], "source": [ @@ -133,8 +133,8 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "eac4ac37", + "execution_count": 8, + "id": "52ba34ff", "metadata": {}, "outputs": [], "source": [ @@ -146,8 +146,8 @@ }, { "cell_type": "code", - "execution_count": 8, - "id": "1b175317", + "execution_count": 9, + "id": "b1744737", "metadata": {}, "outputs": [], "source": [ @@ -158,7 +158,7 @@ }, { "cell_type": "markdown", - "id": "a9bcf199", + "id": "281216e9", "metadata": {}, "source": [ "## Processor - Train" @@ -166,8 +166,8 @@ }, { "cell_type": "code", - "execution_count": 9, - "id": "becf4d16", + "execution_count": 10, + "id": "3191cd98", "metadata": {}, "outputs": [], "source": [ @@ -181,8 +181,8 @@ }, { "cell_type": "code", - "execution_count": 10, - "id": "2ccaf4a1", + "execution_count": 11, + "id": "9998dd3a", "metadata": {}, "outputs": [], "source": [ @@ -214,8 +214,8 @@ }, { "cell_type": "code", - "execution_count": 11, - "id": "e0287211", + "execution_count": 12, + "id": "a0d4af1b", "metadata": {}, "outputs": [], "source": [ @@ -228,8 +228,8 @@ }, { "cell_type": "code", - "execution_count": 12, - "id": "854dc0d7", + "execution_count": 13, + "id": "065f6fca", "metadata": {}, "outputs": [ { @@ -237,11 +237,10 @@ "output_type": "stream", "text": [ "\n", - "Job Name: hermione-processor-2021-05-25-21-03-59-873\n", + "Job Name: hermione-processor-2021-07-22-19-53-22-425\n", "Inputs: [{'InputName': 'raw_data', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://hermione-sagemaker/TRAIN_RAW', 'LocalPath': '/opt/ml/processing/input/raw_data', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]\n", "Outputs: [{'OutputName': 'expectations', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://hermione-sagemaker/PREPROCESSING/EXPECTATIONS', 'LocalPath': '/opt/ml/processing/output/expectations', 'S3UploadMode': 'EndOfJob'}}, {'OutputName': 'preprocessing', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://hermione-sagemaker/PREPROCESSING/PREPROCESSING', 'LocalPath': '/opt/ml/processing/output/preprocessing', 'S3UploadMode': 'EndOfJob'}}, {'OutputName': 'train_data', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://hermione-sagemaker/PREPROCESSING/TRAIN_PROCESSED', 'LocalPath': '/opt/ml/processing/output/processed/train', 'S3UploadMode': 'EndOfJob'}}, {'OutputName': 'val_data', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://hermione-sagemaker/PREPROCESSING/VAL_PROCESSED', 'LocalPath': '/opt/ml/processing/output/processed/val', 'S3UploadMode': 'EndOfJob'}}]\n", - "......................................................\n", - "\u001b[34mINFO:root:Starting the preprocessing\u001b[0m\n", + "......................................................\u001b[34mINFO:root:Starting the preprocessing\u001b[0m\n", "\u001b[34mINFO:root:step_train: True\u001b[0m\n", "\u001b[34mINFO:root:Reading the inputs\u001b[0m\n", "\u001b[34mINFO:root:Reading file: /opt/ml/processing/input/raw_data/raw_train.csv\u001b[0m\n", @@ -255,7 +254,7 @@ "\u001b[0m\n", "\u001b[34mINFO:root:Divide train and test\u001b[0m\n", "\u001b[34mINFO:root:Normalizing\u001b[0m\n", - "\u001b[34mWARNING:py.warnings:/usr/local/lib/python3.8/dist-packages/pandas/core/indexing.py:1738: SettingWithCopyWarning: \u001b[0m\n", + "\u001b[34mWARNING:py.warnings:/usr/local/lib/python3.8/dist-packages/pandas/core/indexing.py:1835: SettingWithCopyWarning: \u001b[0m\n", "\u001b[34mA value is trying to be set on a copy of a slice from a DataFrame.\u001b[0m\n", "\u001b[34mTry using .loc[row_indexer,col_indexer] = value instead\n", "\u001b[0m\n", @@ -265,8 +264,9 @@ "\u001b[34mINFO:root:Normalizing\u001b[0m\n", "\u001b[34mINFO:root:shape train (393, 7) val (99, 7)\u001b[0m\n", "\u001b[34mINFO:root:Saving\u001b[0m\n", - "CPU times: user 1.02 s, sys: 104 ms, total: 1.13 s\n", - "Wall time: 9min 14s\n" + "\n", + "CPU times: user 1.09 s, sys: 71.1 ms, total: 1.16 s\n", + "Wall time: 9min 48s\n" ] } ], @@ -281,7 +281,7 @@ }, { "cell_type": "markdown", - "id": "0f54bf21", + "id": "5db80626", "metadata": {}, "source": [ "## Processor - Inference" @@ -289,8 +289,8 @@ }, { "cell_type": "code", - "execution_count": 13, - "id": "bb2a86dc", + "execution_count": 10, + "id": "8d08c6c9", "metadata": {}, "outputs": [], "source": [ @@ -310,8 +310,8 @@ }, { "cell_type": "code", - "execution_count": 14, - "id": "c3e8dd48", + "execution_count": 11, + "id": "4273ba95", "metadata": {}, "outputs": [], "source": [ @@ -332,8 +332,8 @@ }, { "cell_type": "code", - "execution_count": 15, - "id": "62de176e", + "execution_count": 12, + "id": "b4d816d3", "metadata": {}, "outputs": [], "source": [ @@ -346,8 +346,8 @@ }, { "cell_type": "code", - "execution_count": 16, - "id": "e9255f5a", + "execution_count": 13, + "id": "28aa9b95", "metadata": {}, "outputs": [ { @@ -355,11 +355,10 @@ "output_type": "stream", "text": [ "\n", - "Job Name: hermione-processor-2021-05-25-21-13-13-987\n", + "Job Name: hermione-processor-2021-07-22-19-40-48-848\n", "Inputs: [{'InputName': 'raw_data', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://hermione-sagemaker/TEST_RAW', 'LocalPath': '/opt/ml/processing/input/raw_data', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'preprocessing', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://hermione-sagemaker/PREPROCESSING/PREPROCESSING', 'LocalPath': '/opt/ml/processing/input/preprocessing', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}, {'InputName': 'expectations', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://hermione-sagemaker/PREPROCESSING/EXPECTATIONS', 'LocalPath': '/opt/ml/processing/input/expectations', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]\n", "Outputs: [{'OutputName': 'inference_data', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://hermione-sagemaker/PREPROCESSING/INFERENCE_PROCESSED', 'LocalPath': '/opt/ml/processing/output/processed/inference', 'S3UploadMode': 'EndOfJob'}}, {'OutputName': 'validations', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://hermione-sagemaker/PREPROCESSING/VALIDATIONS', 'LocalPath': '/opt/ml/processing/output/validations', 'S3UploadMode': 'EndOfJob'}}]\n", - "............................................................\n", - "\u001b[34mINFO:root:Starting the preprocessing\u001b[0m\n", + "...........................................................\u001b[34mINFO:root:Starting the preprocessing\u001b[0m\n", "\u001b[34mINFO:root:step_train: False\u001b[0m\n", "\u001b[34mINFO:root:Reading the inputs\u001b[0m\n", "\u001b[34mINFO:root:Reading file: /opt/ml/processing/input/raw_data/raw_test.csv\u001b[0m\n", @@ -370,8 +369,9 @@ "\u001b[34mINFO:root:Normalizing\u001b[0m\n", "\u001b[34mINFO:root:shape (222, 7)\u001b[0m\n", "\u001b[34mINFO:root:Saving\u001b[0m\n", - "CPU times: user 1.19 s, sys: 38.4 ms, total: 1.23 s\n", - "Wall time: 10min 14s\n" + "\n", + "CPU times: user 1.18 s, sys: 39.6 ms, total: 1.22 s\n", + "Wall time: 10min 15s\n" ] } ], diff --git a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/Sagemaker_Train.ipynb b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/2_Sagemaker_Train.ipynb similarity index 81% rename from hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/Sagemaker_Train.ipynb rename to hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/2_Sagemaker_Train.ipynb index b0a796f..5951690 100644 --- a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/Sagemaker_Train.ipynb +++ b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/2_Sagemaker_Train.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "577c4f6b", + "id": "0481ea58", "metadata": {}, "source": [ "# Sagemaker Train" @@ -10,7 +10,7 @@ }, { "cell_type": "markdown", - "id": "501ef5b6", + "id": "c14f3a6e", "metadata": {}, "source": [ "This script creates and trains the model with the uploaded image in ECR." @@ -18,7 +18,7 @@ }, { "cell_type": "markdown", - "id": "e66b3975", + "id": "737135a7", "metadata": {}, "source": [ "## Import modules" @@ -26,8 +26,8 @@ }, { "cell_type": "code", - "execution_count": 15, - "id": "d658fb44", + "execution_count": 1, + "id": "010b1646", "metadata": {}, "outputs": [], "source": [ @@ -39,7 +39,7 @@ }, { "cell_type": "markdown", - "id": "64036230", + "id": "ed6ec079", "metadata": {}, "source": [ "## Setup" @@ -47,7 +47,7 @@ }, { "cell_type": "markdown", - "id": "28411012", + "id": "ff8d388c", "metadata": {}, "source": [ "Modify according to your configurations." @@ -55,8 +55,8 @@ }, { "cell_type": "code", - "execution_count": 16, - "id": "7e937373", + "execution_count": 2, + "id": "6278a767", "metadata": {}, "outputs": [], "source": [ @@ -66,8 +66,8 @@ }, { "cell_type": "code", - "execution_count": 17, - "id": "16450249", + "execution_count": 3, + "id": "1fe9ed45", "metadata": {}, "outputs": [], "source": [ @@ -78,8 +78,8 @@ }, { "cell_type": "code", - "execution_count": 18, - "id": "2e144eb8", + "execution_count": 4, + "id": "f6216acf", "metadata": {}, "outputs": [], "source": [ @@ -89,8 +89,8 @@ }, { "cell_type": "code", - "execution_count": 19, - "id": "50b4a590", + "execution_count": 5, + "id": "c9a8d55b", "metadata": {}, "outputs": [], "source": [ @@ -100,8 +100,8 @@ }, { "cell_type": "code", - "execution_count": 20, - "id": "8d56e6ca", + "execution_count": 6, + "id": "f281ac39", "metadata": {}, "outputs": [], "source": [ @@ -112,8 +112,8 @@ }, { "cell_type": "code", - "execution_count": 21, - "id": "e710ea0a", + "execution_count": 7, + "id": "4eee7169", "metadata": {}, "outputs": [], "source": [ @@ -127,8 +127,8 @@ }, { "cell_type": "code", - "execution_count": 22, - "id": "f8a27026", + "execution_count": 8, + "id": "44002452", "metadata": {}, "outputs": [], "source": [ @@ -138,7 +138,7 @@ }, { "cell_type": "markdown", - "id": "b6efb8ce", + "id": "6aa3f5a8", "metadata": {}, "source": [ "## Train" @@ -146,8 +146,8 @@ }, { "cell_type": "code", - "execution_count": 23, - "id": "ed9cb39b", + "execution_count": 9, + "id": "77e64d0c", "metadata": {}, "outputs": [], "source": [ @@ -160,8 +160,8 @@ }, { "cell_type": "code", - "execution_count": 24, - "id": "34f144e0", + "execution_count": 10, + "id": "33726510", "metadata": {}, "outputs": [], "source": [ @@ -174,8 +174,8 @@ }, { "cell_type": "code", - "execution_count": 25, - "id": "a0bbbf7d", + "execution_count": 11, + "id": "1f0350b8", "metadata": {}, "outputs": [], "source": [ @@ -185,8 +185,8 @@ }, { "cell_type": "code", - "execution_count": 26, - "id": "299813d5", + "execution_count": 12, + "id": "0832ebb9", "metadata": {}, "outputs": [], "source": [ @@ -213,8 +213,8 @@ }, { "cell_type": "code", - "execution_count": 27, - "id": "4ad41d36", + "execution_count": 13, + "id": "7a2931e1", "metadata": {}, "outputs": [], "source": [ @@ -236,26 +236,23 @@ }, { "cell_type": "code", - "execution_count": 28, - "id": "62c1894f", + "execution_count": 14, + "id": "d12aa777", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "2021-05-26 12:41:29 Starting - Starting the training job...\n", - "2021-05-26 12:41:52 Starting - Launching requested ML instancesProfilerReport-1622032889: InProgress\n", - "......\n", - "2021-05-26 12:42:52 Starting - Preparing the instances for training......\n", - "2021-05-26 12:43:52 Downloading - Downloading input data\n", - "2021-05-26 12:43:52 Training - Downloading the training image.....\u001b[34m2021-05-26 09:44:41,407 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)\u001b[0m\n", - "\n", - "2021-05-26 12:45:00 Uploading - Uploading generated training model\n", - "2021-05-26 12:45:00 Completed - Training job completed\n", - "\u001b[34m2021-05-26 09:44:47,642 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)\u001b[0m\n", - "\u001b[34m2021-05-26 09:44:47,653 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)\u001b[0m\n", - "\u001b[34m2021-05-26 09:44:47,663 sagemaker-training-toolkit INFO Invoking user script\n", + "2021-07-22 20:15:35 Starting - Starting the training job...\n", + "2021-07-22 20:15:59 Starting - Launching requested ML instancesProfilerReport-1626984935: InProgress\n", + "...\n", + "2021-07-22 20:16:35 Starting - Preparing the instances for training.........\n", + "2021-07-22 20:18:00 Downloading - Downloading input data...\n", + "2021-07-22 20:18:20 Training - Downloading the training image.....\u001b[34m2021-07-22 17:19:11,614 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)\u001b[0m\n", + "\u001b[34m2021-07-22 17:19:11,630 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)\u001b[0m\n", + "\u001b[34m2021-07-22 17:19:11,640 sagemaker-training-toolkit INFO No GPUs detected (normal if no gpus installed)\u001b[0m\n", + "\u001b[34m2021-07-22 17:19:11,648 sagemaker-training-toolkit INFO Invoking user script\n", "\u001b[0m\n", "\u001b[34mTraining Env:\n", "\u001b[0m\n", @@ -288,7 +285,7 @@ " },\n", " \"input_dir\": \"/opt/ml/input\",\n", " \"is_master\": true,\n", - " \"job_name\": \"Hermione-train-2021-05-26-12-41-29-505\",\n", + " \"job_name\": \"Hermione-train-2021-07-22-20-15-35-496\",\n", " \"log_level\": 20,\n", " \"master_hostname\": \"algo-1\",\n", " \"model_dir\": \"/opt/ml/model\",\n", @@ -332,7 +329,7 @@ "\u001b[34mSM_NUM_GPUS=0\u001b[0m\n", "\u001b[34mSM_MODEL_DIR=/opt/ml/model\u001b[0m\n", "\u001b[34mSM_MODULE_DIR=/opt/ml/code\u001b[0m\n", - "\u001b[34mSM_TRAINING_ENV={\"additional_framework_parameters\":{},\"channel_input_dirs\":{\"train\":\"/opt/ml/input/data/train\",\"validation\":\"/opt/ml/input/data/validation\"},\"current_host\":\"algo-1\",\"framework_module\":null,\"hosts\":[\"algo-1\"],\"hyperparameters\":{},\"input_config_dir\":\"/opt/ml/input/config\",\"input_data_config\":{\"train\":{\"ContentType\":\"text/csv\",\"RecordWrapperType\":\"None\",\"S3DistributionType\":\"FullyReplicated\",\"TrainingInputMode\":\"File\"},\"validation\":{\"ContentType\":\"text/csv\",\"RecordWrapperType\":\"None\",\"S3DistributionType\":\"FullyReplicated\",\"TrainingInputMode\":\"File\"}},\"input_dir\":\"/opt/ml/input\",\"is_master\":true,\"job_name\":\"Hermione-train-2021-05-26-12-41-29-505\",\"log_level\":20,\"master_hostname\":\"algo-1\",\"model_dir\":\"/opt/ml/model\",\"module_dir\":\"/opt/ml/code\",\"module_name\":\"train\",\"network_interface_name\":\"eth0\",\"num_cpus\":2,\"num_gpus\":0,\"output_data_dir\":\"/opt/ml/output/data\",\"output_dir\":\"/opt/ml/output\",\"output_intermediate_dir\":\"/opt/ml/output/intermediate\",\"resource_config\":{\"current_host\":\"algo-1\",\"hosts\":[\"algo-1\"],\"network_interface_name\":\"eth0\"},\"user_entry_point\":\"train.py\"}\u001b[0m\n", + "\u001b[34mSM_TRAINING_ENV={\"additional_framework_parameters\":{},\"channel_input_dirs\":{\"train\":\"/opt/ml/input/data/train\",\"validation\":\"/opt/ml/input/data/validation\"},\"current_host\":\"algo-1\",\"framework_module\":null,\"hosts\":[\"algo-1\"],\"hyperparameters\":{},\"input_config_dir\":\"/opt/ml/input/config\",\"input_data_config\":{\"train\":{\"ContentType\":\"text/csv\",\"RecordWrapperType\":\"None\",\"S3DistributionType\":\"FullyReplicated\",\"TrainingInputMode\":\"File\"},\"validation\":{\"ContentType\":\"text/csv\",\"RecordWrapperType\":\"None\",\"S3DistributionType\":\"FullyReplicated\",\"TrainingInputMode\":\"File\"}},\"input_dir\":\"/opt/ml/input\",\"is_master\":true,\"job_name\":\"Hermione-train-2021-07-22-20-15-35-496\",\"log_level\":20,\"master_hostname\":\"algo-1\",\"model_dir\":\"/opt/ml/model\",\"module_dir\":\"/opt/ml/code\",\"module_name\":\"train\",\"network_interface_name\":\"eth0\",\"num_cpus\":2,\"num_gpus\":0,\"output_data_dir\":\"/opt/ml/output/data\",\"output_dir\":\"/opt/ml/output\",\"output_intermediate_dir\":\"/opt/ml/output/intermediate\",\"resource_config\":{\"current_host\":\"algo-1\",\"hosts\":[\"algo-1\"],\"network_interface_name\":\"eth0\"},\"user_entry_point\":\"train.py\"}\u001b[0m\n", "\u001b[34mSM_USER_ARGS=[]\u001b[0m\n", "\u001b[34mSM_OUTPUT_INTERMEDIATE_DIR=/opt/ml/output/intermediate\u001b[0m\n", "\u001b[34mSM_CHANNEL_VALIDATION=/opt/ml/input/data/validation\u001b[0m\n", @@ -344,21 +341,21 @@ "\u001b[34m/usr/bin/python3 train.py\n", "\n", "\u001b[0m\n", - "\u001b[34m/usr/local/lib/python3.8/dist-packages/interpret_community/common/gpu_kmeans.py:30: UserWarning: cuML is required to use GPU explainers. Check https://rapids.ai/start.html for more information on how to install it.\n", - " warnings.warn(\u001b[0m\n", - "\u001b[34mcuML is required to use GPU explainers. Check https://rapids.ai/start.html for more information on how to install it.\u001b[0m\n", "\u001b[34mINFO:root:Starting the training\u001b[0m\n", "\u001b[34mINFO:root:Reading the inputs\u001b[0m\n", "\u001b[34mINFO:root:Training the model\u001b[0m\n", "\u001b[34mINFO:root:Saving\u001b[0m\n", - "\u001b[34mINFO:root:accuracy=0.7373737373737373; f1=0.6976744186046512; precision=0.6382978723404256; recall=0.7692307692307693;\u001b[0m\n", + "\u001b[34mINFO:root:accuracy=0.7373737373737373; f1=0.6976744186046512; precision=0.6382978723404256; recall=0.7692307692307693;\u001b[0m\n", "\u001b[34mINFO:root:Training complete.\u001b[0m\n", - "\u001b[34m2021-05-26 09:44:51,898 sagemaker-training-toolkit INFO Reporting training SUCCESS\u001b[0m\n", - "Training seconds: 85\n", - "Billable seconds: 36\n", - "Managed Spot Training savings: 57.6%\n", - "CPU times: user 450 ms, sys: 19.9 ms, total: 470 ms\n", - "Wall time: 3min 42s\n" + "\u001b[34m2021-07-22 17:19:17,315 sagemaker-training-toolkit INFO Reporting training SUCCESS\u001b[0m\n", + "\n", + "2021-07-22 20:19:30 Uploading - Uploading generated training model\n", + "2021-07-22 20:19:30 Completed - Training job completed\n", + "Training seconds: 96\n", + "Billable seconds: 39\n", + "Managed Spot Training savings: 59.4%\n", + "CPU times: user 491 ms, sys: 48.5 ms, total: 539 ms\n", + "Wall time: 4min 12s\n" ] } ], @@ -367,6 +364,14 @@ "# Train the model and validate\n", "est.fit({'train':train_config, 'validation':val_config}, wait=True, logs=True)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf57258c", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/3_Sagemaker_Inference.ipynb b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/3_Sagemaker_Inference.ipynb new file mode 100644 index 0000000..525a5a0 --- /dev/null +++ b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/3_Sagemaker_Inference.ipynb @@ -0,0 +1,374 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "4558d673", + "metadata": {}, + "source": [ + "# Sagemaker Inference" + ] + }, + { + "cell_type": "markdown", + "id": "733a4c1b", + "metadata": {}, + "source": [ + "This script predicts new data with the uploaded image in ECR." + ] + }, + { + "cell_type": "markdown", + "id": "73ec63de", + "metadata": {}, + "source": [ + "## Import modules" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "9f4bb4b1", + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "import boto3\n", + "import sagemaker\n", + "from sagemaker import get_execution_role" + ] + }, + { + "cell_type": "markdown", + "id": "cf4f0baf", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "id": "a36daf9a", + "metadata": {}, + "source": [ + "Modify according to your configurations." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ff34a81c", + "metadata": {}, + "outputs": [], + "source": [ + "# Bucket name in S3\n", + "bucket = \"hermione-sagemaker\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "aa6732aa", + "metadata": {}, + "outputs": [], + "source": [ + "# Set session\n", + "region_name=\"us-east-1\"\n", + "boto3.setup_default_session(region_name=region_name)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0515bb41", + "metadata": {}, + "outputs": [], + "source": [ + "# Get user role\n", + "role = get_execution_role()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ef2ae3ae", + "metadata": {}, + "outputs": [], + "source": [ + "# Get AWS Account ID\n", + "account_number = boto3.client(\"sts\").get_caller_identity()[\"Account\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "31861461", + "metadata": {}, + "outputs": [], + "source": [ + "# Image previous uploaded in ECR\n", + "image_name = \"hermione-inference\"\n", + "image_uri = f\"{account_number}.dkr.ecr.{region_name}.amazonaws.com/{image_name}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "1eec0163", + "metadata": {}, + "outputs": [], + "source": [ + "# Input and output paths to execute inference\n", + "paths = {\n", + " 'inference_processed': f\"s3://{bucket}/PREPROCESSING/INFERENCE_PROCESSED/inference.csv\",\n", + " 'model': f\"s3://{bucket}/PREPROCESSING/MODEL/Hermione-train-2021-05-26-12-41-29-505/output/model.tar.gz\",\n", + " 'output_path': f\"s3://{bucket}/PREPROCESSING/OUTPUT\"\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "76ce3950", + "metadata": {}, + "outputs": [], + "source": [ + "# instance to run the code\n", + "instance_type=\"ml.m5.large\"" + ] + }, + { + "cell_type": "markdown", + "id": "f44e5b91", + "metadata": {}, + "source": [ + "## Inference" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "a78cd291", + "metadata": {}, + "outputs": [], + "source": [ + "# Receives the processed inference data in S3\n", + "input_path = paths['inference_processed']" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c8f2a674", + "metadata": {}, + "outputs": [], + "source": [ + "# Receives the model created during the training in S3\n", + "model_path = paths['model']" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "6ec78d16", + "metadata": {}, + "outputs": [], + "source": [ + "# Saves the prediction in S3\n", + "output_path = paths['output_path']" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "c167eff0", + "metadata": {}, + "outputs": [], + "source": [ + "# Creates the model to access the ECR image\n", + "model = sagemaker.model.Model(\n", + " image_uri= image_uri,\n", + " model_data=model_path,\n", + " role=role)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "0b2651c1", + "metadata": {}, + "outputs": [], + "source": [ + "# Creates a transformer object from the trained model\n", + "transformer = model.transformer(\n", + " instance_count=1,\n", + " instance_type=instance_type, \n", + " output_path=output_path,\n", + " accept = 'text/csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "1c5bd0b8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "..........................\u001b[34mWarning: MMS is using non-default JVM parameters: -XX:-UseContainerSupport\u001b[0m\n", + "\u001b[35mWarning: MMS is using non-default JVM parameters: -XX:-UseContainerSupport\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:24,272 [INFO ] main com.amazonaws.ml.mms.ModelServer - \u001b[0m\n", + "\u001b[34mMMS Home: /usr/local/lib/python3.8/dist-packages\u001b[0m\n", + "\u001b[34mCurrent directory: /\u001b[0m\n", + "\u001b[34mTemp directory: /tmp\u001b[0m\n", + "\u001b[34mNumber of GPUs: 0\u001b[0m\n", + "\u001b[34mNumber of CPUs: 2\u001b[0m\n", + "\u001b[34mMax heap size: 1726 M\u001b[0m\n", + "\u001b[34mPython executable: /usr/bin/python3\u001b[0m\n", + "\u001b[34mConfig file: /etc/sagemaker-mms.properties\u001b[0m\n", + "\u001b[34mInference address: http://0.0.0.0:8080\u001b[0m\n", + "\u001b[34mManagement address: http://0.0.0.0:8080\u001b[0m\n", + "\u001b[34mModel Store: /.sagemaker/mms/models\u001b[0m\n", + "\u001b[34mInitial Models: ALL\u001b[0m\n", + "\u001b[34mLog dir: /logs\u001b[0m\n", + "\u001b[34mMetrics dir: /logs\u001b[0m\n", + "\u001b[34mNetty threads: 0\u001b[0m\n", + "\u001b[34mNetty client threads: 0\u001b[0m\n", + "\u001b[34mDefault workers per model: 2\u001b[0m\n", + "\u001b[34mBlacklist Regex: N/A\u001b[0m\n", + "\u001b[34mMaximum Response Size: 6553500\u001b[0m\n", + "\u001b[34mMaximum Request Size: 6553500\u001b[0m\n", + "\u001b[34mPreload model: false\u001b[0m\n", + "\u001b[34mPrefer direct buffer: false\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:24,384 [WARN ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerLifeCycle - attachIOStreams() threadName=W-9000-model\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:24,452 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - model_service_worker started with args: --sock-type unix --sock-name /tmp/.mms.sock.9000 --handler serving.handler --model-path /.sagemaker/mms/models/model --model-name model --preload-model false --tmp-dir /tmp\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:24,272 [INFO ] main com.amazonaws.ml.mms.ModelServer - \u001b[0m\n", + "\u001b[35mMMS Home: /usr/local/lib/python3.8/dist-packages\u001b[0m\n", + "\u001b[35mCurrent directory: /\u001b[0m\n", + "\u001b[35mTemp directory: /tmp\u001b[0m\n", + "\u001b[35mNumber of GPUs: 0\u001b[0m\n", + "\u001b[35mNumber of CPUs: 2\u001b[0m\n", + "\u001b[35mMax heap size: 1726 M\u001b[0m\n", + "\u001b[35mPython executable: /usr/bin/python3\u001b[0m\n", + "\u001b[35mConfig file: /etc/sagemaker-mms.properties\u001b[0m\n", + "\u001b[35mInference address: http://0.0.0.0:8080\u001b[0m\n", + "\u001b[35mManagement address: http://0.0.0.0:8080\u001b[0m\n", + "\u001b[35mModel Store: /.sagemaker/mms/models\u001b[0m\n", + "\u001b[35mInitial Models: ALL\u001b[0m\n", + "\u001b[35mLog dir: /logs\u001b[0m\n", + "\u001b[35mMetrics dir: /logs\u001b[0m\n", + "\u001b[35mNetty threads: 0\u001b[0m\n", + "\u001b[35mNetty client threads: 0\u001b[0m\n", + "\u001b[35mDefault workers per model: 2\u001b[0m\n", + "\u001b[35mBlacklist Regex: N/A\u001b[0m\n", + "\u001b[35mMaximum Response Size: 6553500\u001b[0m\n", + "\u001b[35mMaximum Request Size: 6553500\u001b[0m\n", + "\u001b[35mPreload model: false\u001b[0m\n", + "\u001b[35mPrefer direct buffer: false\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:24,384 [WARN ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerLifeCycle - attachIOStreams() threadName=W-9000-model\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:24,452 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - model_service_worker started with args: --sock-type unix --sock-name /tmp/.mms.sock.9000 --handler serving.handler --model-path /.sagemaker/mms/models/model --model-name model --preload-model false --tmp-dir /tmp\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:24,454 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Listening on port: /tmp/.mms.sock.9000\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:24,454 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - [PID] 24\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:24,455 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - MMS worker started.\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:24,455 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Python runtime: 3.8.10\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:24,456 [INFO ] main com.amazonaws.ml.mms.wlm.ModelManager - Model model loaded.\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:24,460 [INFO ] main com.amazonaws.ml.mms.ModelServer - Initialize Inference server with: EpollServerSocketChannel.\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:24,472 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Connecting to: /tmp/.mms.sock.9000\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:24,476 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Connecting to: /tmp/.mms.sock.9000\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:24,536 [INFO ] main com.amazonaws.ml.mms.ModelServer - Inference API bind to: http://0.0.0.0:8080\u001b[0m\n", + "\u001b[34mModel server started.\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:24,555 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Connection accepted: /tmp/.mms.sock.9000.\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:24,555 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Connection accepted: /tmp/.mms.sock.9000.\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:24,567 [WARN ] pool-2-thread-1 com.amazonaws.ml.mms.metrics.MetricCollector - worker pid is not available yet.\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:24,454 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Listening on port: /tmp/.mms.sock.9000\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:24,454 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - [PID] 24\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:24,455 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - MMS worker started.\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:24,455 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Python runtime: 3.8.10\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:24,456 [INFO ] main com.amazonaws.ml.mms.wlm.ModelManager - Model model loaded.\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:24,460 [INFO ] main com.amazonaws.ml.mms.ModelServer - Initialize Inference server with: EpollServerSocketChannel.\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:24,472 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Connecting to: /tmp/.mms.sock.9000\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:24,476 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Connecting to: /tmp/.mms.sock.9000\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:24,536 [INFO ] main com.amazonaws.ml.mms.ModelServer - Inference API bind to: http://0.0.0.0:8080\u001b[0m\n", + "\u001b[35mModel server started.\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:24,555 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Connection accepted: /tmp/.mms.sock.9000.\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:24,555 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Connection accepted: /tmp/.mms.sock.9000.\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:24,567 [WARN ] pool-2-thread-1 com.amazonaws.ml.mms.metrics.MetricCollector - worker pid is not available yet.\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:27,441 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - generated new fontManager\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:27,450 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - generated new fontManager\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:27,839 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Loading the model\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:27,854 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Loading the model\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:27,441 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - generated new fontManager\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:27,450 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - generated new fontManager\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:27,839 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Loading the model\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:27,854 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Loading the model\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:27,886 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Model model loaded io_fd=0242a9fffefeff83-0000000a-00000000-2860f330bbe7ac20-d219266e\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:27,898 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Backend response time: 3268\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:27,900 [WARN ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerLifeCycle - attachIOStreams() threadName=W-model-1\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:27,916 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Model model loaded io_fd=0242a9fffefeff83-0000000a-00000001-9aea1030bbe7ac23-7076a78a\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:27,916 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Backend response time: 3285\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:27,916 [WARN ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerLifeCycle - attachIOStreams() threadName=W-model-2\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:27,886 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Model model loaded io_fd=0242a9fffefeff83-0000000a-00000000-2860f330bbe7ac20-d219266e\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:27,898 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Backend response time: 3268\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:27,900 [WARN ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerLifeCycle - attachIOStreams() threadName=W-model-1\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:27,916 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Model model loaded io_fd=0242a9fffefeff83-0000000a-00000001-9aea1030bbe7ac23-7076a78a\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:27,916 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Backend response time: 3285\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:27,916 [WARN ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerLifeCycle - attachIOStreams() threadName=W-model-2\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:31,830 [INFO ] pool-1-thread-4 ACCESS_LOG - /169.254.255.130:60460 \"GET /ping HTTP/1.1\" 200 15\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:31,840 [INFO ] epollEventLoopGroup-3-2 ACCESS_LOG - /169.254.255.130:60464 \"GET /execution-parameters HTTP/1.1\" 404 1\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:31,965 [INFO ] W-model-1-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Predicting...\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:31,981 [INFO ] W-model-1-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Prediction Complete\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:31,983 [INFO ] W-model-1-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Saving\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:31,985 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Backend response time: 26\u001b[0m\n", + "\u001b[34m2021-07-22 20:28:31,986 [INFO ] W-9000-model ACCESS_LOG - /169.254.255.130:60468 \"POST /invocations HTTP/1.1\" 200 30\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:31,830 [INFO ] pool-1-thread-4 ACCESS_LOG - /169.254.255.130:60460 \"GET /ping HTTP/1.1\" 200 15\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:31,840 [INFO ] epollEventLoopGroup-3-2 ACCESS_LOG - /169.254.255.130:60464 \"GET /execution-parameters HTTP/1.1\" 404 1\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:31,965 [INFO ] W-model-1-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Predicting...\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:31,981 [INFO ] W-model-1-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Prediction Complete\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:31,983 [INFO ] W-model-1-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Saving\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:31,985 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Backend response time: 26\u001b[0m\n", + "\u001b[35m2021-07-22 20:28:31,986 [INFO ] W-9000-model ACCESS_LOG - /169.254.255.130:60468 \"POST /invocations HTTP/1.1\" 200 30\u001b[0m\n", + "\u001b[32m2021-07-22T20:28:31.846:[sagemaker logs]: MaxConcurrentTransforms=1, MaxPayloadInMB=6, BatchStrategy=MULTI_RECORD\u001b[0m\n", + "\n", + "CPU times: user 602 ms, sys: 31.4 ms, total: 634 ms\n", + "Wall time: 4min 43s\n" + ] + } + ], + "source": [ + "%%time\n", + "# Predicts the data\n", + "transformer.transform(data=input_path, data_type='S3Prefix', content_type='text/csv', split_type='Line')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79b282ec", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "conda_python3", + "language": "python", + "name": "conda_python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/Sagemaker_StepFunctions_Train.ipynb b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/4_Sagemaker_StepFunctions_Train.ipynb similarity index 100% rename from hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/Sagemaker_StepFunctions_Train.ipynb rename to hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/4_Sagemaker_StepFunctions_Train.ipynb diff --git a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/Sagemaker_StepFunctions_Inference.ipynb b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/5_Sagemaker_StepFunctions_Inference.ipynb similarity index 100% rename from hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/Sagemaker_StepFunctions_Inference.ipynb rename to hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/5_Sagemaker_StepFunctions_Inference.ipynb diff --git a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/Sagemaker_Inference.ipynb b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/Sagemaker_Inference.ipynb deleted file mode 100644 index aa21796..0000000 --- a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/src/ml/notebooks/Sagemaker_Inference.ipynb +++ /dev/null @@ -1,322 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "616d65aa", - "metadata": {}, - "source": [ - "# Sagemaker Inference" - ] - }, - { - "cell_type": "markdown", - "id": "aee7320a", - "metadata": {}, - "source": [ - "This script predicts new data with the uploaded image in ECR." - ] - }, - { - "cell_type": "markdown", - "id": "ea32612e", - "metadata": {}, - "source": [ - "## Import modules" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "3f188c9f", - "metadata": {}, - "outputs": [], - "source": [ - "import time\n", - "import boto3\n", - "import sagemaker\n", - "from sagemaker import get_execution_role" - ] - }, - { - "cell_type": "markdown", - "id": "430e1eb4", - "metadata": {}, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "markdown", - "id": "ebe50488", - "metadata": {}, - "source": [ - "Modify according to your configurations." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "8893b148", - "metadata": {}, - "outputs": [], - "source": [ - "# Bucket name in S3\n", - "bucket = \"hermione-sagemaker\"" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "a6ba2451", - "metadata": {}, - "outputs": [], - "source": [ - "# Set session\n", - "region_name=\"us-east-1\"\n", - "boto3.setup_default_session(region_name=region_name)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "797c5fa6", - "metadata": {}, - "outputs": [], - "source": [ - "# Get user role\n", - "role = get_execution_role()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "d8148140", - "metadata": {}, - "outputs": [], - "source": [ - "# Get AWS Account ID\n", - "account_number = boto3.client(\"sts\").get_caller_identity()[\"Account\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "1b1fba48", - "metadata": {}, - "outputs": [], - "source": [ - "# Image previous uploaded in ECR\n", - "image_name = \"hermione-inference\"\n", - "image_uri = f\"{account_number}.dkr.ecr.{region_name}.amazonaws.com/{image_name}\"" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "f907e610", - "metadata": {}, - "outputs": [], - "source": [ - "# Input and output paths to execute inference\n", - "paths = {\n", - " 'inference_processed': f\"s3://{bucket}/PREPROCESSING/INFERENCE_PROCESSED/inference.csv\",\n", - " 'model': f\"s3://{bucket}/PREPROCESSING/MODEL/Hermione-train-2021-05-26-12-41-29-505/output/model.tar.gz\",\n", - " 'output_path': f\"s3://{bucket}/PREPROCESSING/OUTPUT\"\n", - "}" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "f5fdfdd8", - "metadata": {}, - "outputs": [], - "source": [ - "# instance to run the code\n", - "instance_type=\"ml.m5.large\"" - ] - }, - { - "cell_type": "markdown", - "id": "55fe64d7", - "metadata": {}, - "source": [ - "## Inference" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "60b7dc56", - "metadata": {}, - "outputs": [], - "source": [ - "# Receives the processed inference data in S3\n", - "input_path = paths['inference_processed']" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "e3dc913c", - "metadata": {}, - "outputs": [], - "source": [ - "# Receives the model created during the training in S3\n", - "model_path = paths['model']" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "5b69f31c", - "metadata": {}, - "outputs": [], - "source": [ - "# Saves the prediction in S3\n", - "output_path = paths['output_path']" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "29f7ce88", - "metadata": {}, - "outputs": [], - "source": [ - "# Creates the model to access the ECR image\n", - "model = sagemaker.model.Model(\n", - " image_uri= image_uri,\n", - " model_data=model_path,\n", - " role=role)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "aacdf22a", - "metadata": {}, - "outputs": [], - "source": [ - "# Creates a transformer object from the trained model\n", - "transformer = model.transformer(\n", - " instance_count=1,\n", - " instance_type=instance_type, \n", - " output_path=output_path,\n", - " accept = 'text/csv')" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "6452e276", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - ".........................\u001b[34m2021-05-26 12:57:00,312 [INFO ] main com.amazonaws.ml.mms.ModelServer - \u001b[0m\n", - "\u001b[34mMMS Home: /usr/local/lib/python3.8/dist-packages\u001b[0m\n", - "\u001b[34mCurrent directory: /\u001b[0m\n", - "\u001b[34mTemp directory: /tmp\u001b[0m\n", - "\u001b[34mNumber of GPUs: 0\u001b[0m\n", - "\u001b[34mNumber of CPUs: 2\u001b[0m\n", - "\u001b[34mMax heap size: 857 M\u001b[0m\n", - "\u001b[34mPython executable: /usr/bin/python3\u001b[0m\n", - "\u001b[34mConfig file: /etc/sagemaker-mms.properties\u001b[0m\n", - "\u001b[34mInference address: http://0.0.0.0:8080\u001b[0m\n", - "\u001b[34mManagement address: http://0.0.0.0:8080\u001b[0m\n", - "\u001b[34mModel Store: /.sagemaker/mms/models\u001b[0m\n", - "\u001b[34mInitial Models: ALL\u001b[0m\n", - "\u001b[34mLog dir: /logs\u001b[0m\n", - "\u001b[34mMetrics dir: /logs\u001b[0m\n", - "\u001b[34mNetty threads: 0\u001b[0m\n", - "\u001b[34mNetty client threads: 0\u001b[0m\n", - "\u001b[34mDefault workers per model: 2\u001b[0m\n", - "\u001b[34mBlacklist Regex: N/A\u001b[0m\n", - "\u001b[34mMaximum Response Size: 6553500\u001b[0m\n", - "\u001b[34mMaximum Request Size: 6553500\u001b[0m\n", - "\u001b[34mPreload model: false\u001b[0m\n", - "\u001b[34mPrefer direct buffer: false\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:00,419 [WARN ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerLifeCycle - attachIOStreams() threadName=W-9000-model\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:00,506 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - model_service_worker started with args: --sock-type unix --sock-name /tmp/.mms.sock.9000 --handler serving.handler --model-path /.sagemaker/mms/models/model --model-name model --preload-model false --tmp-dir /tmp\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:00,508 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Listening on port: /tmp/.mms.sock.9000\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:00,509 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - [PID] 23\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:00,509 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - MMS worker started.\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:00,509 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Python runtime: 3.8.5\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:00,512 [INFO ] main com.amazonaws.ml.mms.wlm.ModelManager - Model model loaded.\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:00,517 [INFO ] main com.amazonaws.ml.mms.ModelServer - Initialize Inference server with: EpollServerSocketChannel.\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:00,536 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Connecting to: /tmp/.mms.sock.9000\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:00,536 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Connecting to: /tmp/.mms.sock.9000\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:00,607 [INFO ] main com.amazonaws.ml.mms.ModelServer - Inference API bind to: http://0.0.0.0:8080\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:00,613 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Connection accepted: /tmp/.mms.sock.9000.\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:00,614 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Connection accepted: /tmp/.mms.sock.9000.\u001b[0m\n", - "\u001b[34mModel server started.\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:00,636 [WARN ] pool-2-thread-1 com.amazonaws.ml.mms.metrics.MetricCollector - worker pid is not available yet.\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:02,508 [WARN ] W-9000-model-stderr com.amazonaws.ml.mms.wlm.WorkerLifeCycle - /usr/local/lib/python3.8/dist-packages/interpret_community/common/gpu_kmeans.py:30: UserWarning: cuML is required to use GPU explainers. Check https://rapids.ai/start.html for more information on how to install it.\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:02,510 [WARN ] W-9000-model-stderr com.amazonaws.ml.mms.wlm.WorkerLifeCycle - warnings.warn(\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:02,510 [WARN ] W-9000-model-stderr com.amazonaws.ml.mms.wlm.WorkerLifeCycle - /usr/local/lib/python3.8/dist-packages/interpret_community/common/gpu_kmeans.py:30: UserWarning: cuML is required to use GPU explainers. Check https://rapids.ai/start.html for more information on how to install it.\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:02,510 [WARN ] W-9000-model-stderr com.amazonaws.ml.mms.wlm.WorkerLifeCycle - warnings.warn(\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:03,375 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - generated new fontManager\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:03,393 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - generated new fontManager\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:03,635 [WARN ] W-9000-model-stderr com.amazonaws.ml.mms.wlm.WorkerLifeCycle - cuML is required to use GPU explainers. Check https://rapids.ai/start.html for more information on how to install it.\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:03,658 [WARN ] W-9000-model-stderr com.amazonaws.ml.mms.wlm.WorkerLifeCycle - cuML is required to use GPU explainers. Check https://rapids.ai/start.html for more information on how to install it.\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:03,690 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Loading the model\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:03,715 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Loading the model\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:03,741 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Model model loaded io_fd=0242a9fffefeff83-00000009-00000002-e6c9db643cbfeb7b-a47635f7\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:03,750 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Backend response time: 3046\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:03,752 [WARN ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerLifeCycle - attachIOStreams() threadName=W-model-1\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:03,768 [INFO ] W-9000-model-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Model model loaded io_fd=0242a9fffefeff83-00000009-00000001-f549db643cbfeb7b-e2a66100\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:03,768 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Backend response time: 3065\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:03,769 [WARN ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerLifeCycle - attachIOStreams() threadName=W-model-2\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:09,272 [INFO ] pool-1-thread-4 ACCESS_LOG - /169.254.255.130:59054 \"GET /ping HTTP/1.1\" 200 11\u001b[0m\n", - "\u001b[35m2021-05-26 12:57:09,272 [INFO ] pool-1-thread-4 ACCESS_LOG - /169.254.255.130:59054 \"GET /ping HTTP/1.1\" 200 11\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:09,353 [INFO ] epollEventLoopGroup-3-2 ACCESS_LOG - /169.254.255.130:59058 \"GET /execution-parameters HTTP/1.1\" 404 2\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:09,462 [INFO ] W-model-1-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Predicting...\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:09,486 [INFO ] W-model-1-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Prediction Complete\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:09,491 [INFO ] W-model-1-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Saving\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:09,494 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Backend response time: 37\u001b[0m\n", - "\u001b[34m2021-05-26 12:57:09,494 [INFO ] W-9000-model ACCESS_LOG - /169.254.255.130:59068 \"POST /invocations HTTP/1.1\" 200 42\u001b[0m\n", - "\u001b[35m2021-05-26 12:57:09,353 [INFO ] epollEventLoopGroup-3-2 ACCESS_LOG - /169.254.255.130:59058 \"GET /execution-parameters HTTP/1.1\" 404 2\u001b[0m\n", - "\u001b[35m2021-05-26 12:57:09,462 [INFO ] W-model-1-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Predicting...\u001b[0m\n", - "\u001b[35m2021-05-26 12:57:09,486 [INFO ] W-model-1-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Prediction Complete\u001b[0m\n", - "\u001b[35m2021-05-26 12:57:09,491 [INFO ] W-model-1-stdout com.amazonaws.ml.mms.wlm.WorkerLifeCycle - Saving\u001b[0m\n", - "\u001b[35m2021-05-26 12:57:09,494 [INFO ] W-9000-model com.amazonaws.ml.mms.wlm.WorkerThread - Backend response time: 37\u001b[0m\n", - "\u001b[35m2021-05-26 12:57:09,494 [INFO ] W-9000-model ACCESS_LOG - /169.254.255.130:59068 \"POST /invocations HTTP/1.1\" 200 42\u001b[0m\n", - "\u001b[32m2021-05-26T12:57:09.364:[sagemaker logs]: MaxConcurrentTransforms=1, MaxPayloadInMB=6, BatchStrategy=MULTI_RECORD\u001b[0m\n", - "\n", - "CPU times: user 547 ms, sys: 59 ms, total: 606 ms\n", - "Wall time: 4min 43s\n" - ] - } - ], - "source": [ - "%%time\n", - "# Predicts the data\n", - "transformer.transform(data=input_path, data_type='S3Prefix', content_type='text/csv', split_type='Line')" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "conda_python3", - "language": "python", - "name": "conda_python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/train/train.py b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/train/train.py index bc7b4cd..183ee0a 100644 --- a/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/train/train.py +++ b/hermione/module_templates/__IMPLEMENTED_SAGEMAKER__/train/train.py @@ -2,13 +2,15 @@ sys.path.append("src/") import os +from util import * import traceback -import pandas as pd + import logging +import pandas as pd + from sklearn.metrics import * from ml.model.trainer import TrainerSklearn from sklearn.ensemble import RandomForestClassifier -from util import * logging.getLogger().setLevel('INFO') @@ -21,64 +23,80 @@ error_path = os.path.join(prefix, 'output') model_path = os.environ['SM_MODEL_DIR'] + +def read_input(file_path): + """ + Take the set of train files and read them all + into a single pandas dataframe + + Parameters + ---------- + file_path : string + Path of the file + + Returns + ------- + pd.Dataframe : pandas DataFrame + """ + input_files = [os.path.join(file_path, file) + for file in os.listdir(file_path)] + if len(input_files) == 0: + raise ValueError(('There are no files in {}.\n' + + 'This usually indicates that the channel ({}) was \ + incorrectly specified,\n' + + 'the data specification in S3 was incorrectly \ + specified or the role specified\n' + + 'does not have permission to access \ + the data.').format(file_path, channel_name)) + raw_data = [pd.read_csv(file) for file in input_files] + return pd.concat(raw_data) + + def train(): """ - Execute the train step in the virtual environment - + Execute the train step in the virtual environment + """ logging.info('Starting the training') try: logging.info('Reading the inputs') - # Take the set of train files and read them all into a single pandas dataframe - input_files = [ os.path.join(training_path, file) for file in os.listdir(training_path) ] - if len(input_files) == 0: - raise ValueError(('There are no files in {}.\n' + - 'This usually indicates that the channel ({}) was incorrectly specified,\n' + - 'the data specification in S3 was incorrectly specified or the role specified\n' + - 'does not have permission to access the data.').format(training_path, channel_name)) - raw_data = [ pd.read_csv(file) for file in input_files ] - train = pd.concat(raw_data) - - # Take the set of val files and read them all into a single pandas dataframe - input_files = [ os.path.join(val_path, file) for file in os.listdir(val_path) ] - if len(input_files) == 0: - raise ValueError(('There are no files in {}.\n' + - 'This usually indicates that the channel ({}) was incorrectly specified,\n' + - 'the data specification in S3 was incorrectly specified or the role specified\n' + - 'does not have permission to access the data.').format(val_path, channel_name)) - raw_data = [ pd.read_csv(file) for file in input_files ] - val = pd.concat(raw_data) - + train = read_input(training_path) + val = read_input(val_path) + # Define the target and columns to be used in the train target = "Survived" columns = train.columns.drop(target) logging.info("Training the model") - model = TrainerSklearn().train(train, val, target, classification=True, + model = TrainerSklearn().train(train, val, target, classification=True, algorithm=RandomForestClassifier, columns=columns) - + # Salve the model and metrics logging.info("Saving") model.save_model(os.path.join(model_path, 'model.pkl')) metrics = model.artifacts["metrics"] - logging.info(f"accuracy={metrics['accuracy']}; f1={metrics['f1']}; precision={metrics['precision']}; recall={metrics['recall']};") - pd.DataFrame(model.artifacts["metrics"].items(), columns=['Metric', 'Value']).to_csv(os.path.join(model_path, 'metrics.csv'), index=False) + logging.info(f"accuracy={metrics['accuracy']}; \ + f1={metrics['f1']}; \ + precision={metrics['precision']}; \ + recall={metrics['recall']};") + pd.DataFrame(model.artifacts["metrics"].items(), + columns=['Metric', 'Value']).to_csv( + os.path.join(model_path, 'metrics.csv'), index=False) logging.info('Training complete.') - + except Exception as e: - # Write out an error file. This will be returned as the failureReason in the - # DescribeTrainingJob result. + # Write out an error file trc = traceback.format_exc() with open(os.path.join(error_path, 'failure'), 'w') as s: s.write('Exception during training: ' + str(e) + '\n' + trc) - # Printing this causes the exception to be in the training job logs, as well. - logging.info('Exception during training: ' + str(e) + '\n' + trc, file=sys.stderr) - # A non-zero exit code causes the training job to be marked as Failed. + logging.info('Exception during training: ' + str(e) + '\n' + trc, + file=sys.stderr) + # A non-zero exit code causes the training job to be marked as Failed sys.exit(255) + if __name__ == '__main__': train() - # A zero exit code causes the job to be marked a Succeeded. sys.exit(0)