diff --git a/ml-models/anomaly-detection/pipeline/deployment.py b/ml-models/anomaly-detection/pipeline/deployment.py new file mode 100644 index 0000000..63eeaa0 --- /dev/null +++ b/ml-models/anomaly-detection/pipeline/deployment.py @@ -0,0 +1,32 @@ +def get_deployment_resource(model_artifact_id): + deployment_resource = { + 'apiVersion': 'serving.kserve.io/v1beta1', + 'kind': 'InferenceService', + 'metadata': { + 'name': 'inference-service', + 'labels': { + 'opendatahub.io/dashboard': 'true' + }, + 'annotations': { + 'serving.kserve.io/deploymentMode': 'ModelMesh' + }, + }, + 'spec': { + 'predictor': { + 'model': { + 'modelFormat': { + 'name': 'sklearn', + 'version': '0', + }, + 'runtime': 'anomaly-detection-model-server', + 'storage': { + 'key': 'aws-connection-user-bucket', + 'path': model_artifact_id, + } + } + } + } + } + return deployment_resource + + diff --git a/ml-models/anomaly-detection/pipeline/preprocessing.py b/ml-models/anomaly-detection/pipeline/preprocessing.py index b7816f8..672407b 100644 --- a/ml-models/anomaly-detection/pipeline/preprocessing.py +++ b/ml-models/anomaly-detection/pipeline/preprocessing.py @@ -7,8 +7,6 @@ df.set_index('time', inplace=True) df.drop(columns=['ts'], inplace=True) -df.head(20) - df1 = df.loc[df['id'] == 'pump-1'] df1 = df1.drop(columns=['id', 'label']) @@ -18,8 +16,6 @@ df1 = df.loc[df['id'] == 'pump-1'] df1 = df1.drop(columns=['id']) -df1.head(10) - df1 = df.loc[df['id'] == 'pump-1'] df1 = df1.drop(columns=['id']) @@ -31,6 +27,8 @@ # # Get list with column names: F1, F2, Fn, L + + def get_columns(n): f = [] for x in range(1, n+1): @@ -55,6 +53,7 @@ def create_df(vals: list, label: int = 0): dfx = pd.DataFrame([vals+[label]], columns=get_columns(len(vals))) return dfx + length = 5 # Episode length df_epis = create_empty_df(length) @@ -70,13 +69,9 @@ def create_df(vals: list, label: int = 0): epi.append(row['value']) if len(epi) == length: df_row = create_df(epi, row['label']) - df_epis = df_epis.append(df_row, ignore_index=True) + df_epis = pd.concat([df_epis, df_row], ignore_index=True) del(epi[0]) -df_epis.head(20) - -df_epis.describe() - # Calculate number of episodes n_episodes = df_epis.shape[0] @@ -107,7 +102,7 @@ def create_df(vals: list, label: int = 0): dfi = df_epis.copy() dfi['F5'] = np.where(dfi['L'] == 1, dfi['F5']*f, dfi['F5']) - dfr = dfr.append(dfi) + dfr = pd.concat([dfr, dfi]) df_epis = dfr.copy() @@ -135,4 +130,4 @@ def create_df(vals: list, label: int = 0): df_epis.to_csv( 'sensor-training-data.csv', index=False, header=True, float_format='%.2f' -) +) \ No newline at end of file diff --git a/ml-models/anomaly-detection/pipeline/push-model.py b/ml-models/anomaly-detection/pipeline/push-model.py index dc2d376..8dc21de 100644 --- a/ml-models/anomaly-detection/pipeline/push-model.py +++ b/ml-models/anomaly-detection/pipeline/push-model.py @@ -1,30 +1,28 @@ from datetime import datetime -import json -import os +from os import environ import boto3 +import git +import yaml -s3_endpoint_url = os.environ.get('S3_ENDPOINT_URL') -s3_access_key = os.environ.get('S3_ACCESS_KEY') -s3_secret_key = os.environ.get('S3_SECRET_KEY') -s3_bucket_name = os.environ.get('S3_BUCKET_NAME') +from deployment import get_deployment_resource -timestamp = datetime.now().strftime('%y%m%d%H%M') -model_name = f'model-{timestamp}.joblib' -s3_model_location = f's3://{s3_bucket_name}/{model_name}' -metrics = { - 'metrics': [ - { - 'name': 'model-version', - 'numberValue': timestamp, - 'format': 'RAW' - } - ] -} +s3_endpoint_url = environ.get('S3_ENDPOINT_URL') +s3_access_key = environ.get('S3_ACCESS_KEY') +s3_secret_key = environ.get('S3_SECRET_KEY') +s3_bucket_name = environ.get('S3_BUCKET_NAME') -with open('mlpipeline-metrics.json', 'w') as f: - json.dump(metrics, f) +timestamp = datetime.now().strftime('%y%m%d%H%M') +git_server_url = 'http://gitea-in-cluster-http.vp-gitea.svc.cluster.local:3000' +git_user = environ.get('username') +git_password = environ.get('password') +git_branch = environ.get('branch', 'main') +ops_repo_location = f'{git_server_url}/{git_user}/industrial-edge.git' +ops_repo_url = ( + f'http://{git_user}:{git_password}@{ops_repo_location.lstrip("http://")}' +) +model_artifact_id = 'model.joblib' print(f'Uploading model to bucket {s3_bucket_name}' @@ -33,5 +31,48 @@ 's3', endpoint_url=s3_endpoint_url, aws_access_key_id=s3_access_key, aws_secret_access_key=s3_secret_key ) +try: + s3_client.create_bucket(Bucket=s3_bucket_name) +except Exception: + print(f'Failed to create new bucket with name "{s3_bucket_name}". Continuing.') with open('model.joblib', 'rb') as model_file: - s3_client.upload_fileobj(model_file, s3_bucket_name, model_name) + s3_client.upload_fileobj(model_file, s3_bucket_name, model_artifact_id) + + +print(f'Checking out repo at {ops_repo_location} with user {git_user}') +ops_repository_local = '/opt/app-root/src/industrial-edge' +try: + repository = git.Repo.clone_from(ops_repo_url, ops_repository_local) +except git.GitCommandError as error: + print(f'Git clone failed: {error}\nChecking out local repository.') + repository = git.Repo(ops_repository_local) + +print(f'Checking out branch {git_branch}.') +repository.git.checkout(git_branch) +with repository.config_writer() as git_config: + git_config.set_value('user', 'name', git_user) + +inference_service_cr = get_deployment_resource(model_artifact_id) + +print(f'Writing updated Inference Service CR: {inference_service_cr}') + +inference_service_manifest_location_dev = ( + f'{ops_repository_local}/charts/datacenter/data-science-project/templates/' + f'anomaly-detection/anomaly-detection-service.yaml' +) + +with open(inference_service_manifest_location_dev, 'w') as outputfile: + yaml.safe_dump(inference_service_cr, outputfile) + +inference_service_manifest_location_tst = ( + f'{ops_repository_local}/charts/datacenter/manuela-tst/templates/' + f'anomaly-detection/anomaly-detection-service.yaml' +) + +with open(inference_service_manifest_location_tst, 'w') as outputfile: + yaml.safe_dump(inference_service_cr, outputfile) + +repository.index.add(inference_service_manifest_location_dev) +repository.index.add(inference_service_manifest_location_tst) +repository.index.commit(f'Model update {timestamp} in test environment.') +repository.remotes.origin.push() diff --git a/ml-models/anomaly-detection/pipeline/training.pipeline b/ml-models/anomaly-detection/pipeline/training.pipeline index 4c90b51..073faea 100644 --- a/ml-models/anomaly-detection/pipeline/training.pipeline +++ b/ml-models/anomaly-detection/pipeline/training.pipeline @@ -31,7 +31,7 @@ "label": "", "ui_data": { "label": "preprocessing.py", - "image": "/notebook/object-detection/industrial-edge/static/elyra/python.svg", + "image": "/notebook/ml-development/jupyterlab/static/elyra/python.svg", "x_pos": 61, "y_pos": 287, "description": "Run Python script" @@ -96,7 +96,7 @@ "label": "", "ui_data": { "label": "feature_extraction.py", - "image": "/notebook/object-detection/industrial-edge/static/elyra/python.svg", + "image": "/notebook/ml-development/jupyterlab/static/elyra/python.svg", "x_pos": 298, "y_pos": 286, "description": "Run Python script" @@ -160,7 +160,7 @@ "label": "", "ui_data": { "label": "training.py", - "image": "/notebook/object-detection/industrial-edge/static/elyra/python.svg", + "image": "/notebook/ml-development/jupyterlab/static/elyra/python.svg", "x_pos": 539, "y_pos": 287, "description": "Run Python script" @@ -222,7 +222,7 @@ "label": "", "ui_data": { "label": "verification.py", - "image": "/notebook/object-detection/industrial-edge/static/elyra/python.svg", + "image": "/notebook/ml-development/jupyterlab/static/elyra/python.svg", "x_pos": 471, "y_pos": 461, "description": "Run Python script" @@ -270,7 +270,9 @@ "op": "execute-python-node", "app_data": { "component_parameters": { - "dependencies": [], + "dependencies": [ + "deployment.py" + ], "include_subdirectories": false, "outputs": [], "env_vars": [], @@ -296,18 +298,32 @@ "env_var": "S3_BUCKET_NAME", "name": "aws-connection-user-bucket", "key": "AWS_S3_BUCKET" + }, + { + "env_var": "username", + "name": "gitea-admin-secret-and-branch", + "key": "username" + }, + { + "env_var": "password", + "name": "gitea-admin-secret-and-branch", + "key": "password" + }, + { + "env_var": "branch", + "name": "gitea-admin-secret-and-branch", + "key": "branch" } ], "kubernetes_shared_mem_size": {}, "kubernetes_tolerations": [], "mounted_volumes": [], - "filename": "push-model.py", - "runtime_image": "quay.io/mmurakam/runtimes:timeseries-v0.1.0" + "filename": "push-model.py" }, "label": "", "ui_data": { "label": "push-model.py", - "image": "/notebook/object-detection/industrial-edge/static/elyra/python.svg", + "image": "/notebook/ml-development/jupyterlab/static/elyra/python.svg", "x_pos": 879, "y_pos": 493, "description": "Run Python script" @@ -393,7 +409,7 @@ "label": "", "ui_data": { "label": "data_ingestion.py", - "image": "/notebook/object-detection/industrial-edge/static/elyra/python.svg", + "image": "/notebook/ml-development/jupyterlab/static/elyra/python.svg", "x_pos": 156, "y_pos": 176, "description": "Run Python script" @@ -444,7 +460,7 @@ "kubernetes_pod_labels": [], "env_vars": [], "kubernetes_secrets": [], - "runtime_image": "quay.io/mmurakam/runtimes:industrial-edge-v0.1.0" + "runtime_image": "quay.io/hybridcloudpatterns/manuela-runtime:main" }, "name": "training", "runtime": "Data Science Pipelines"