From 37d3d1c8f0c9f6992279c6ec493a40e25099ba28 Mon Sep 17 00:00:00 2001
From: Jennifer Cwagenberg <candiedcode@gmail.com>
Date: Sun, 21 Apr 2024 12:17:18 +0000
Subject: [PATCH] ci: add tests to xgboost_diabetes_classification.ipynb

---
 Makefile                                      |  3 +-
 .../xgboost_diabetes_classification.ipynb     | 98 +++++++++++++------
 2 files changed, 69 insertions(+), 32 deletions(-)

diff --git a/Makefile b/Makefile
index dd40393..db123e1 100644
--- a/Makefile
+++ b/Makefile
@@ -20,8 +20,7 @@ test:
 	poetry run pytest
 
 test-notebooks:
-	poetry run pytest --nbmake notebooks/pytorch_sentiment_analysis.ipynb
-
+	poetry run pytest --nbmake notebooks/pytorch_sentiment_analysis.ipynb notebooks/xgboost_diabetes_classification.ipynb
 build:
 	poetry build
 
diff --git a/notebooks/xgboost_diabetes_classification.ipynb b/notebooks/xgboost_diabetes_classification.ipynb
index abeea89..c96911d 100644
--- a/notebooks/xgboost_diabetes_classification.ipynb
+++ b/notebooks/xgboost_diabetes_classification.ipynb
@@ -18,12 +18,13 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "modelscan, version 0.5.0\n"
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "modelscan, version 0.0.0\n"
      ]
     }
    ],
    "source": [
-    "!pip install -q modelscan\n",
+    "%pip install -q modelscan\n",
     "!modelscan -v"
    ]
   },
@@ -32,10 +33,19 @@
    "execution_count": 2,
    "id": "eb656ce5",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
    "source": [
-    "!pip install -q xgboost==1.7.6\n",
-    "!pip install -U -q scikit-learn==1.3.0"
+    "%pip install -q xgboost==2.0.3\n",
+    "%pip install -U -q scikit-learn==1.4.2"
    ]
   },
   {
@@ -122,15 +132,31 @@
    "cell_type": "code",
    "execution_count": 6,
    "id": "ccfeee08",
-   "metadata": {},
+   "metadata": {
+    "nbmake": {
+     "post_cell_execute": [
+      "import json\n",
+      "!modelscan --path XGBoostModels/safe_model.pkl --reporting-format=json --output-file XGBoostModels/safe_model_results.json\n",
+      "with open('XGBoostModels/safe_model_results.json', 'rb') as f:\n",
+      "    data = json.load(f)\n",
+      "summary = data['summary']\n",
+      "assert summary['input_path'] == 'XGBoostModels/safe_model.pkl'\n",
+      "assert summary['total_issues_by_severity'] == {'LOW': 0, 'MEDIUM': 0, 'HIGH': 0, 'CRITICAL': 0}\n",
+      "assert summary['total_issues'] == 0\n",
+      "assert summary['scanned']['total_scanned'] == 1\n",
+      "assert data['issues'] == []\n",
+      "assert data['errors'] == []"
+     ]
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "No settings file detected at /Users/mehrinkiani/Documents/modelscan/notebooks/modelscan-settings.toml. Using defaults. \n",
+      "No settings file detected at /workspaces/modelscan/notebooks/modelscan-settings.toml. Using defaults. \n",
       "\n",
-      "Scanning /Users/mehrinkiani/Documents/modelscan/notebooks/XGBoostModels/safe_model.pkl using modelscan.scanners.PickleUnsafeOpScan model scan\n",
+      "Scanning /workspaces/modelscan/notebooks/XGBoostModels/safe_model.pkl using modelscan.scanners.PickleUnsafeOpScan model scan\n",
       "\n",
       "\u001b[34m--- Summary ---\u001b[0m\n",
       "\n",
@@ -201,8 +227,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "aws_access_key_id=<access_key_id>\n",
-      "aws_secret_access_key=<aws_secret_key>\n",
+      "aws_access_key_id=AKIAIOSFODNN7EXAMPLE\n",
+      "aws_secret_access_key=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY\n",
       "The model predicts: [0, 1, 1]\n",
       "The true labels are: [0. 1. 1.]\n"
      ]
@@ -237,9 +263,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "No settings file detected at /Users/mehrinkiani/Documents/modelscan/notebooks/modelscan-settings.toml. Using defaults. \n",
+      "No settings file detected at /workspaces/modelscan/notebooks/modelscan-settings.toml. Using defaults. \n",
       "\n",
-      "Scanning /Users/mehrinkiani/Documents/modelscan/notebooks/XGBoostModels/unsafe_model.pkl using modelscan.scanners.PickleUnsafeOpScan model scan\n",
+      "Scanning /workspaces/modelscan/notebooks/XGBoostModels/unsafe_model.pkl using modelscan.scanners.PickleUnsafeOpScan model scan\n",
       "\n",
       "\u001b[34m--- Summary ---\u001b[0m\n",
       "\n",
@@ -259,7 +285,7 @@
       "Unsafe operator found:\n",
       "  - Severity: CRITICAL\n",
       "  - Description: Use of unsafe operator 'system' from module 'posix'\n",
-      "  - Source: /Users/mehrinkiani/Documents/modelscan/notebooks/XGBoostModels/unsafe_model.pkl\n"
+      "  - Source: /workspaces/modelscan/notebooks/XGBoostModels/unsafe_model.pkl\n"
      ]
     }
    ],
@@ -290,33 +316,45 @@
    "cell_type": "code",
    "execution_count": 11,
    "id": "6df55b3e",
-   "metadata": {},
+   "metadata": {
+    "nbmake": {
+     "post_cell_execute": [
+      "import json\n",
+      "with open('XGBoostModels/xgboost-model-scan-results.json', 'rb') as f:\n",
+      "    data = json.load(f)\n",
+      "summary = data['summary']\n",
+      "assert summary['input_path'] == 'XGBoostModels/unsafe_model.pkl'\n",
+      "assert summary['total_issues_by_severity'] == {'LOW': 0, 'MEDIUM': 0, 'HIGH': 0, 'CRITICAL': 1}\n",
+      "assert summary['total_issues'] == 1\n",
+      "assert summary['scanned']['total_scanned'] == 1\n",
+      "assert data['issues'] == [{'description': 'Use of unsafe operator \\'system\\' from module \\'posix\\'', 'operator': 'system', 'module': 'posix', 'source': 'unsafe_model.pkl', 'scanner': 'modelscan.scanners.PickleUnsafeOpScan', 'severity': 'CRITICAL'}]\n",
+      "assert len(data['errors']) == 0"
+     ]
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "No settings file detected at /Users/mehrinkiani/Documents/modelscan/notebooks/modelscan-settings.toml. Using defaults. \n",
+      "No settings file detected at /workspaces/modelscan/notebooks/modelscan-settings.toml. Using defaults. \n",
       "\n",
-      "Scanning /Users/mehrinkiani/Documents/modelscan/notebooks/XGBoostModels/unsafe_model.pkl using modelscan.scanners.PickleUnsafeOpScan model scan\n",
-      "\u001b[1m{\u001b[0m\u001b[32m\"modelscan_version\"\u001b[0m: \u001b[32m\"0.5.0\"\u001b[0m, \u001b[32m\"timestamp\"\u001b[0m: \u001b[32m\"2024-01-25T17:56:00.855056\"\u001b[0m, \n",
-      "\u001b[32m\"input_path\"\u001b[0m: \n",
-      "\u001b[32m\"/Users/mehrinkiani/Documents/modelscan/notebooks/XGBoostModels/unsafe_model.pkl\u001b[0m\n",
-      "\u001b[32m\"\u001b[0m, \u001b[32m\"total_issues\"\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m\"summary\"\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m\"total_issues_by_severity\"\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m\"LOW\"\u001b[0m: \u001b[1;36m0\u001b[0m, \n",
-      "\u001b[32m\"MEDIUM\"\u001b[0m: \u001b[1;36m0\u001b[0m, \u001b[32m\"HIGH\"\u001b[0m: \u001b[1;36m0\u001b[0m, \u001b[32m\"CRITICAL\"\u001b[0m: \u001b[1;36m1\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m\"issues_by_severity\"\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m\"CRITICAL\"\u001b[0m: \n",
-      "\u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m\"description\"\u001b[0m: \u001b[32m\"Use of unsafe operator 'system' from module 'posix'\"\u001b[0m, \n",
-      "\u001b[32m\"operator\"\u001b[0m: \u001b[32m\"system\"\u001b[0m, \u001b[32m\"module\"\u001b[0m: \u001b[32m\"posix\"\u001b[0m, \u001b[32m\"source\"\u001b[0m: \n",
-      "\u001b[32m\"/Users/mehrinkiani/Documents/modelscan/notebooks/XGBoostModels/unsafe_model.pkl\u001b[0m\n",
-      "\u001b[32m\"\u001b[0m, \u001b[32m\"scanner\"\u001b[0m: \u001b[32m\"modelscan.scanners.PickleUnsafeOpScan\"\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m\"errors\"\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \n",
-      "\u001b[32m\"scanned\"\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m\"total_scanned\"\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m\"scanned_files\"\u001b[0m: \n",
-      "\u001b[1m[\u001b[0m\u001b[32m\"/Users/mehrinkiani/Documents/modelscan/notebooks/XGBoostModels/unsafe_model.pk\u001b[0m\n",
-      "\u001b[32ml\"\u001b[0m\u001b[1m]\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m\n"
+      "Scanning /workspaces/modelscan/notebooks/XGBoostModels/unsafe_model.pkl using modelscan.scanners.PickleUnsafeOpScan model scan\n",
+      "\u001b[1m{\u001b[0m\u001b[32m\"summary\"\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m\"total_issues_by_severity\"\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m\"LOW\"\u001b[0m: \u001b[1;36m0\u001b[0m, \u001b[32m\"MEDIUM\"\u001b[0m: \u001b[1;36m0\u001b[0m, \u001b[32m\"HIGH\"\u001b[0m: \u001b[1;36m0\u001b[0m, \n",
+      "\u001b[32m\"CRITICAL\"\u001b[0m: \u001b[1;36m1\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m\"total_issues\"\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m\"input_path\"\u001b[0m: \n",
+      "\u001b[32m\"XGBoostModels/unsafe_model.pkl\"\u001b[0m, \u001b[32m\"absolute_path\"\u001b[0m: \n",
+      "\u001b[32m\"/workspaces/modelscan/notebooks/XGBoostModels\"\u001b[0m, \u001b[32m\"modelscan_version\"\u001b[0m: \u001b[32m\"0.0.0\"\u001b[0m, \n",
+      "\u001b[32m\"timestamp\"\u001b[0m: \u001b[32m\"2024-04-21T12:13:42.698872\"\u001b[0m, \u001b[32m\"scanned\"\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m\"total_scanned\"\u001b[0m: \u001b[1;36m1\u001b[0m, \n",
+      "\u001b[32m\"scanned_files\"\u001b[0m: \u001b[1m[\u001b[0m\u001b[32m\"unsafe_model.pkl\"\u001b[0m\u001b[1m]\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m, \u001b[32m\"issues\"\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m{\u001b[0m\u001b[32m\"description\"\u001b[0m: \u001b[32m\"Use of \u001b[0m\n",
+      "\u001b[32munsafe operator 'system' from module 'posix'\"\u001b[0m, \u001b[32m\"operator\"\u001b[0m: \u001b[32m\"system\"\u001b[0m, \u001b[32m\"module\"\u001b[0m: \n",
+      "\u001b[32m\"posix\"\u001b[0m, \u001b[32m\"source\"\u001b[0m: \u001b[32m\"unsafe_model.pkl\"\u001b[0m, \u001b[32m\"scanner\"\u001b[0m: \n",
+      "\u001b[32m\"modelscan.scanners.PickleUnsafeOpScan\"\u001b[0m, \u001b[32m\"severity\"\u001b[0m: \u001b[32m\"CRITICAL\"\u001b[0m\u001b[1m}\u001b[0m\u001b[1m]\u001b[0m, \u001b[32m\"errors\"\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m]\u001b[0m\u001b[1m}\u001b[0m\n"
      ]
     }
    ],
    "source": [
     "# This will save the scan results in file: xgboost-model-scan-results.json\n",
-    "!modelscan --path  XGBoostModels/unsafe_model.pkl -r json -o xgboost-model-scan-results.json"
+    "!modelscan --path  XGBoostModels/unsafe_model.pkl -r json -o XGBoostModels/xgboost-model-scan-results.json"
    ]
   }
  ],
@@ -336,7 +374,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.9.19"
   },
   "vscode": {
    "interpreter": {