diff --git a/notebooks/Example Notebook.ipynb b/notebooks/Example Notebook.ipynb index 1975ef9..bd3c6b4 100644 --- a/notebooks/Example Notebook.ipynb +++ b/notebooks/Example Notebook.ipynb @@ -18,7 +18,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: pycatcher in /Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages (0.0.65)\n", + "Requirement already satisfied: pycatcher in /Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages (0.0.62)\n", "Requirement already satisfied: flask<4.0.0,>=3.1.0 in /Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages (from pycatcher) (3.1.0)\n", "Requirement already satisfied: mkdocs<2.0.0,>=1.5.0 in /Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages (from pycatcher) (1.6.1)\n", "Requirement already satisfied: mkdocs-material<10.0.0,>=9.1.3 in /Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages (from pycatcher) (9.5.49)\n", @@ -89,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "374be8cd-818a-4278-9b30-37c5ae8f0fc4", "metadata": {}, "outputs": [], @@ -133,7 +133,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "527a6ff4-f37f-47cf-b2e1-ced8735bb205", "metadata": {}, "outputs": [], @@ -162,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "3bf8d294-d016-4e10-85fa-16e140093585", "metadata": {}, "outputs": [ @@ -170,10 +170,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: fsspec in /Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages (2024.10.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + "Requirement already satisfied: fsspec in /Users/sarika/Documents/GitHub/pycatcher/venv/lib/python3.12/site-packages (2024.12.0)\n", + "\u001b[33mWARNING: There was an error checking the latest version of pip.\u001b[0m\u001b[33m\n", + "\u001b[0m" ] } ], @@ -183,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "f069b52e-bbc8-467d-bcaa-6ac92be5ab0c", "metadata": {}, "outputs": [ @@ -191,10 +190,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: certifi in /Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages (2024.12.14)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + "Requirement already satisfied: certifi in /Users/sarika/Documents/GitHub/pycatcher/venv/lib/python3.12/site-packages (2024.12.14)\n", + "\u001b[33mWARNING: There was an error checking the latest version of pip.\u001b[0m\u001b[33m\n", + "\u001b[0m" ] } ], @@ -204,7 +202,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "6223faab-f088-40ac-b202-fcb6433a6b89", "metadata": {}, "outputs": [ @@ -272,7 +270,7 @@ "4 2019-01-05 2049460" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -286,7 +284,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "efb49480-5727-4537-b365-5824fc745c0a", "metadata": {}, "outputs": [ @@ -354,7 +352,7 @@ "4 1949-05 121" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -369,7 +367,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "f0189874-9b6c-4517-9fab-eb31dfbc7604", "metadata": {}, "outputs": [ @@ -467,7 +465,7 @@ "9 2004-10 14" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -481,7 +479,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "c1976911-a165-4abe-ad21-8e193a96f78c", "metadata": {}, "outputs": [ @@ -549,7 +547,7 @@ "4 2019-01-05 2049460" ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -560,7 +558,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "5b8ead86-dcf8-4f52-b52a-0f9e2eb1c670", "metadata": { "editable": true, @@ -583,9 +581,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", + "/Users/sarika/Documents/GitHub/pycatcher/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", " warnings.warn(\n", - "/Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", + "/Users/sarika/Documents/GitHub/pycatcher/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", " warnings.warn(\n" ] } @@ -597,7 +595,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "90791fa6-752e-43fc-a887-49c49d2a90e4", "metadata": { "editable": true, @@ -678,9 +676,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", + "/Users/sarika/Documents/GitHub/pycatcher/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", " warnings.warn(\n", - "/Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", + "/Users/sarika/Documents/GitHub/pycatcher/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", " warnings.warn(\n" ] } @@ -692,7 +690,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "ec8822a0-9257-4b93-8c2e-262223239d19", "metadata": {}, "outputs": [ @@ -707,9 +705,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", + "/Users/sarika/Documents/GitHub/pycatcher/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", " warnings.warn(\n", - "/Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", + "/Users/sarika/Documents/GitHub/pycatcher/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", " warnings.warn(\n" ] } @@ -721,7 +719,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "d0e67bc3-b498-44ae-9395-fe79787685e1", "metadata": {}, "outputs": [ @@ -738,9 +736,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", + "/Users/sarika/Documents/GitHub/pycatcher/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", " warnings.warn(\n", - "/Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", + "/Users/sarika/Documents/GitHub/pycatcher/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", " warnings.warn(\n" ] } @@ -752,7 +750,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "id": "17ddb90a-e780-46a9-beed-8ff9a7abc5de", "metadata": {}, "outputs": [ @@ -760,13 +758,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:12,612 - INFO - Building outlier plot using classical seasonal decomposition.\n", - "/Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", + "2025-01-20 06:35:19,331 - INFO - Building outlier plot using classical seasonal decomposition.\n", + "/Users/sarika/Documents/GitHub/pycatcher/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", " warnings.warn(\n", - "/Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", + "/Users/sarika/Documents/GitHub/pycatcher/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", " warnings.warn(\n", - "2025-01-19 15:52:12,622 - INFO - Completing outlier detection using classical seasonal decomposition\n", - "2025-01-19 15:52:12,674 - INFO - Completing outlier plot using classical seasonal decomposition.\n" + "2025-01-20 06:35:19,343 - INFO - Completing outlier detection using classical seasonal decomposition\n", + "2025-01-20 06:35:19,357 - INFO - Completing outlier plot using classical seasonal decomposition.\n" ] }, { @@ -854,7 +852,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "id": "b53fb76c-423f-49a2-8156-ab80928e9cbd", "metadata": {}, "outputs": [ @@ -862,9 +860,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", + "/Users/sarika/Documents/GitHub/pycatcher/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", " warnings.warn(\n", - "/Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", + "/Users/sarika/Documents/GitHub/pycatcher/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", " warnings.warn(\n" ] }, @@ -916,7 +914,7 @@ "1960-03-01 419" ] }, - "execution_count": 15, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -927,7 +925,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "id": "ba210863-aa93-4b32-a3da-92bbea6856e7", "metadata": {}, "outputs": [ @@ -935,13 +933,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:12,954 - INFO - Building outlier plot using classical seasonal decomposition.\n", - "/Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", + "2025-01-20 06:35:19,956 - INFO - Building outlier plot using classical seasonal decomposition.\n", + "/Users/sarika/Documents/GitHub/pycatcher/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", " warnings.warn(\n", - "/Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", + "/Users/sarika/Documents/GitHub/pycatcher/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", " warnings.warn(\n", - "2025-01-19 15:52:12,960 - INFO - Completing outlier detection using classical seasonal decomposition\n", - "2025-01-19 15:52:12,968 - INFO - Completing outlier plot using classical seasonal decomposition.\n" + "2025-01-20 06:35:19,969 - INFO - Completing outlier detection using classical seasonal decomposition\n", + "2025-01-20 06:35:19,984 - INFO - Completing outlier plot using classical seasonal decomposition.\n" ] }, { @@ -972,7 +970,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "id": "c55e34f4-63e4-45ed-9a3d-9e4e56ff50da", "metadata": {}, "outputs": [ @@ -980,9 +978,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:13,159 - INFO - Building month-wise box plot.\n", - "2025-01-19 15:52:13,191 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n", - "2025-01-19 15:52:13,197 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n" + "2025-01-20 06:35:20,419 - INFO - Building month-wise box plot.\n", + "2025-01-20 06:35:20,473 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n", + "2025-01-20 06:35:20,480 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n" ] }, { @@ -1003,7 +1001,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "id": "2ece94ba-c56c-437b-bcde-a96d42594c68", "metadata": {}, "outputs": [ @@ -1011,13 +1009,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:13,618 - INFO - Building time-series plot for seasonal decomposition.\n", - "2025-01-19 15:52:13,623 - INFO - Time frequency: D\n", - "2025-01-19 15:52:13,623 - INFO - Length of time index: 2017\n", - "2025-01-19 15:52:13,623 - INFO - Building seasonal plot based on classical seasonal decomposition\n", - "2025-01-19 15:52:13,644 - INFO - Plotting seasonal decomposition with title: Additive\n", - "2025-01-19 15:52:13,715 - INFO - Completing seasonal plot based on classical seasonal decomposition.\n", - "2025-01-19 15:52:13,715 - INFO - Completing Time series frequency detection\n" + "2025-01-20 06:35:20,988 - INFO - Building time-series plot for seasonal decomposition.\n", + "2025-01-20 06:35:21,002 - INFO - Time frequency: D\n", + "2025-01-20 06:35:21,003 - INFO - Length of time index: 2017\n", + "2025-01-20 06:35:21,004 - INFO - Building seasonal plot based on classical seasonal decomposition\n", + "2025-01-20 06:35:21,036 - INFO - Plotting seasonal decomposition with title: Additive\n", + "2025-01-20 06:35:21,096 - INFO - Completing seasonal plot based on classical seasonal decomposition.\n", + "2025-01-20 06:35:21,097 - INFO - Completing Time series frequency detection\n" ] }, { @@ -1045,7 +1043,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "id": "9bd68203-7593-4b16-bd24-879dbf0c76dc", "metadata": {}, "outputs": [ @@ -1053,13 +1051,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:14,367 - INFO - Building time-series plot for seasonal decomposition.\n", - "2025-01-19 15:52:14,369 - INFO - Time frequency: MS\n", - "2025-01-19 15:52:14,369 - INFO - Length of time index: 144\n", - "2025-01-19 15:52:14,370 - INFO - Building seasonal plot based on classical seasonal decomposition\n", - "2025-01-19 15:52:14,390 - INFO - Plotting seasonal decomposition with title: Multiplicative\n", - "2025-01-19 15:52:14,421 - INFO - Completing seasonal plot based on classical seasonal decomposition.\n", - "2025-01-19 15:52:14,422 - INFO - Completing Time series frequency detection\n" + "2025-01-20 06:35:21,615 - INFO - Building time-series plot for seasonal decomposition.\n", + "2025-01-20 06:35:21,618 - INFO - Time frequency: MS\n", + "2025-01-20 06:35:21,618 - INFO - Length of time index: 144\n", + "2025-01-20 06:35:21,619 - INFO - Building seasonal plot based on classical seasonal decomposition\n", + "2025-01-20 06:35:21,645 - INFO - Plotting seasonal decomposition with title: Multiplicative\n", + "2025-01-20 06:35:21,674 - INFO - Completing seasonal plot based on classical seasonal decomposition.\n", + "2025-01-20 06:35:21,674 - INFO - Completing Time series frequency detection\n" ] }, { @@ -1086,7 +1084,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "id": "cf225d11-5719-4e9a-bb50-248991b49562", "metadata": {}, "outputs": [ @@ -1105,9 +1103,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", + "/Users/sarika/Documents/GitHub/pycatcher/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", " warnings.warn(\n", - "/Users/aseem/Documents/GitHub/outlier_detection/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", + "/Users/sarika/Documents/GitHub/pycatcher/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", " warnings.warn(\n" ] } @@ -1127,7 +1125,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 20, "id": "0ea6e306-6dd2-4346-aefb-8f344d2af956", "metadata": {}, "outputs": [ @@ -1135,17 +1133,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:15,008 - INFO - Starting outlier detection using STL\n", - "2025-01-19 15:52:15,009 - INFO - Time frequency: MS\n", - "2025-01-19 15:52:15,009 - INFO - Length of time index: 200.00\n", - "2025-01-19 15:52:15,009 - INFO - Detected Period: 12\n", - "2025-01-19 15:52:15,010 - INFO - Derived Seasonal: 13\n", - "2025-01-19 15:52:15,017 - INFO - Additive model detected\n", - "2025-01-19 15:52:15,018 - INFO - Generating outlier detection using STL\n", - "2025-01-19 15:52:15,018 - INFO - Outlier detection using STL Additive Model\n", - "2025-01-19 15:52:15,675 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", - "2025-01-19 15:52:15,676 - INFO - Generated outlier detection using STL\n", - "2025-01-19 15:52:15,677 - INFO - Completing outlier detection using STL\n" + "2025-01-20 06:35:22,937 - INFO - Detected Period: 12\n", + "2025-01-20 06:35:22,938 - INFO - Derived Seasonal: 13\n", + "2025-01-20 06:35:22,952 - INFO - Additive model detected\n", + "2025-01-20 06:35:22,953 - INFO - Generating outlier detection using STL\n", + "2025-01-20 06:35:22,953 - INFO - Outlier detection using STL Additive Model\n", + "2025-01-20 06:35:23,407 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", + "2025-01-20 06:35:23,408 - INFO - Generated outlier detection using STL\n", + "2025-01-20 06:35:23,408 - INFO - Completing outlier detection using STL\n" ] }, { @@ -1206,7 +1201,7 @@ "3 2019-07-01 100" ] }, - "execution_count": 21, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1217,7 +1212,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 21, "id": "65c3023f-91d8-4499-bedf-b3bc378a5bad", "metadata": {}, "outputs": [ @@ -1225,12 +1220,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:15,681 - INFO - Starting outlier detection using STL\n", - "2025-01-19 15:52:15,683 - INFO - Time frequency: MS\n", - "2025-01-19 15:52:15,683 - INFO - Length of time index: 200.00\n", - "2025-01-19 15:52:15,690 - INFO - Additive model detected\n", - "2025-01-19 15:52:15,690 - INFO - Generating outlier detection using STL\n", - "2025-01-19 15:52:15,691 - INFO - Outlier detection using STL Additive Model\n" + "2025-01-20 06:35:23,429 - INFO - Starting outlier detection using STL\n", + "2025-01-20 06:35:23,431 - INFO - Time frequency: MS\n", + "2025-01-20 06:35:23,431 - INFO - Length of time index: 200.00\n", + "2025-01-20 06:35:23,436 - INFO - Additive model detected\n", + "2025-01-20 06:35:23,437 - INFO - Generating outlier detection using STL\n", + "2025-01-20 06:35:23,437 - INFO - Outlier detection using STL Additive Model\n" ] }, { @@ -1245,9 +1240,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:16,348 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", - "2025-01-19 15:52:16,349 - INFO - Generated outlier detection using STL\n", - "2025-01-19 15:52:16,350 - INFO - Completing outlier detection using STL\n" + "2025-01-20 06:35:23,870 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", + "2025-01-20 06:35:23,871 - INFO - Generated outlier detection using STL\n", + "2025-01-20 06:35:23,872 - INFO - Completing outlier detection using STL\n" ] }, { @@ -1265,10 +1260,10 @@ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 22, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" }, @@ -1289,7 +1284,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 22, "id": "e5beaecd-f6ac-4066-847d-294b6ae2da23", "metadata": {}, "outputs": [ @@ -1297,17 +1292,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:16,455 - INFO - Starting outlier detection using STL\n", - "2025-01-19 15:52:16,457 - INFO - Time frequency: D\n", - "2025-01-19 15:52:16,458 - INFO - Length of time index: 2017.00\n", - "2025-01-19 15:52:16,458 - INFO - Detected Period: 365\n", - "2025-01-19 15:52:16,458 - INFO - Derived Seasonal: 365\n", - "2025-01-19 15:52:17,357 - INFO - Multiplicative model detected\n", - "2025-01-19 15:52:17,358 - INFO - Generating outlier detection using STL\n", - "2025-01-19 15:52:17,358 - INFO - Outlier detection using STL Multiplicative Model\n", - "2025-01-19 15:52:19,111 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", - "2025-01-19 15:52:19,113 - INFO - Generated outlier detection using STL\n", - "2025-01-19 15:52:19,113 - INFO - Completing outlier detection using STL\n" + "2025-01-20 06:35:23,948 - INFO - Detected Period: 365\n", + "2025-01-20 06:35:23,948 - INFO - Derived Seasonal: 365\n", + "2025-01-20 06:35:24,490 - INFO - Multiplicative model detected\n", + "2025-01-20 06:35:24,490 - INFO - Generating outlier detection using STL\n", + "2025-01-20 06:35:24,491 - INFO - Outlier detection using STL Multiplicative Model\n", + "2025-01-20 06:35:25,559 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", + "2025-01-20 06:35:25,560 - INFO - Generated outlier detection using STL\n", + "2025-01-20 06:35:25,560 - INFO - Completing outlier detection using STL\n" ] }, { @@ -1644,7 +1636,7 @@ "49 2024-06-10 2815172" ] }, - "execution_count": 23, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -1655,7 +1647,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 23, "id": "b66c0ef0-f418-4470-8f58-fe872800bb6f", "metadata": {}, "outputs": [ @@ -1663,19 +1655,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:19,118 - INFO - Detecting today's outliers.\n", - "2025-01-19 15:52:19,119 - INFO - Starting outlier detection using STL\n", - "2025-01-19 15:52:19,121 - INFO - Time frequency: D\n", - "2025-01-19 15:52:19,121 - INFO - Length of time index: 2017.00\n", - "2025-01-19 15:52:19,121 - INFO - Detected Period: 365\n", - "2025-01-19 15:52:19,122 - INFO - Derived Seasonal: 365\n", - "2025-01-19 15:52:20,035 - INFO - Multiplicative model detected\n", - "2025-01-19 15:52:20,035 - INFO - Generating outlier detection using STL\n", - "2025-01-19 15:52:20,036 - INFO - Outlier detection using STL Multiplicative Model\n", - "2025-01-19 15:52:21,747 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", - "2025-01-19 15:52:21,748 - INFO - Generated outlier detection using STL\n", - "2025-01-19 15:52:21,749 - INFO - Completing outlier detection using STL\n", - "2025-01-19 15:52:21,749 - INFO - No outliers detected today.\n" + "2025-01-20 06:35:25,564 - INFO - Detecting today's outliers.\n", + "2025-01-20 06:35:25,566 - INFO - Detected Period: 365\n", + "2025-01-20 06:35:25,566 - INFO - Derived Seasonal: 365\n", + "2025-01-20 06:35:26,107 - INFO - Multiplicative model detected\n", + "2025-01-20 06:35:26,108 - INFO - Generating outlier detection using STL\n", + "2025-01-20 06:35:26,108 - INFO - Outlier detection using STL Multiplicative Model\n", + "2025-01-20 06:35:27,179 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", + "2025-01-20 06:35:27,180 - INFO - Generated outlier detection using STL\n", + "2025-01-20 06:35:27,180 - INFO - Completing outlier detection using STL\n", + "2025-01-20 06:35:27,181 - INFO - No outliers detected today.\n" ] }, { @@ -1684,7 +1673,7 @@ "'No Outliers Today!'" ] }, - "execution_count": 24, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -1695,21 +1684,10 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 24, "id": "d2956632-26ea-4f32-b901-2018f8f9f5c7", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2025-01-19 15:52:21,753 - INFO - Starting outlier detection using STL\n", - "2025-01-19 15:52:21,754 - INFO - Time frequency: D\n", - "2025-01-19 15:52:21,755 - INFO - Length of time index: 5.00\n", - "2025-01-19 15:52:21,755 - INFO - Less than 2 years of data - Use Moving Average or IQR Method\n", - "2025-01-19 15:52:21,755 - INFO - Default - Using IQR method for outlier detection.\n" - ] - }, { "data": { "text/html": [ @@ -1750,7 +1728,7 @@ "4 2024-01-05 450" ] }, - "execution_count": 25, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -1769,7 +1747,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 25, "id": "0c010e16-144c-4bf3-bcc5-cc92f2939667", "metadata": {}, "outputs": [ @@ -1777,9 +1755,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:21,761 - INFO - Starting outlier detection using STL\n", - "2025-01-19 15:52:21,766 - INFO - Time frequency: D\n", - "2025-01-19 15:52:21,766 - INFO - Length of time index: 2017.00\n" + "2025-01-20 06:35:27,189 - INFO - Starting outlier detection using STL\n", + "2025-01-20 06:35:27,193 - INFO - Time frequency: D\n", + "2025-01-20 06:35:27,193 - INFO - Length of time index: 2017.00\n" ] }, { @@ -1794,12 +1772,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:22,665 - INFO - Multiplicative model detected\n", - "2025-01-19 15:52:22,666 - INFO - Generating outlier detection using STL\n", - "2025-01-19 15:52:22,666 - INFO - Outlier detection using STL Multiplicative Model\n", - "2025-01-19 15:52:24,383 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", - "2025-01-19 15:52:24,384 - INFO - Generated outlier detection using STL\n", - "2025-01-19 15:52:24,385 - INFO - Completing outlier detection using STL\n" + "2025-01-20 06:35:27,766 - INFO - Multiplicative model detected\n", + "2025-01-20 06:35:27,766 - INFO - Generating outlier detection using STL\n", + "2025-01-20 06:35:27,767 - INFO - Outlier detection using STL Multiplicative Model\n", + "2025-01-20 06:35:28,831 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", + "2025-01-20 06:35:28,831 - INFO - Generated outlier detection using STL\n", + "2025-01-20 06:35:28,832 - INFO - Completing outlier detection using STL\n" ] }, { @@ -1863,10 +1841,10 @@ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 26, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" }, @@ -1887,7 +1865,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 26, "id": "3da55501-3978-43d4-86a5-6ef5056c90cb", "metadata": {}, "outputs": [ @@ -1895,12 +1873,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:24,497 - INFO - Starting outlier detection using STL\n", - "2025-01-19 15:52:24,499 - INFO - Time frequency: MS\n", - "2025-01-19 15:52:24,499 - INFO - Length of time index: 144.00\n", - "2025-01-19 15:52:24,506 - INFO - Additive model detected\n", - "2025-01-19 15:52:24,506 - INFO - Generating outlier detection using STL\n", - "2025-01-19 15:52:24,506 - INFO - Outlier detection using STL Additive Model\n" + "2025-01-20 06:35:28,906 - INFO - Starting outlier detection using STL\n", + "2025-01-20 06:35:28,907 - INFO - Time frequency: MS\n", + "2025-01-20 06:35:28,907 - INFO - Length of time index: 144.00\n", + "2025-01-20 06:35:28,911 - INFO - Additive model detected\n", + "2025-01-20 06:35:28,912 - INFO - Generating outlier detection using STL\n", + "2025-01-20 06:35:28,912 - INFO - Outlier detection using STL Additive Model\n" ] }, { @@ -1915,9 +1893,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:25,133 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", - "2025-01-19 15:52:25,134 - INFO - Generated outlier detection using STL\n", - "2025-01-19 15:52:25,135 - INFO - Completing outlier detection using STL\n" + "2025-01-20 06:35:29,317 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", + "2025-01-20 06:35:29,318 - INFO - Generated outlier detection using STL\n", + "2025-01-20 06:35:29,319 - INFO - Completing outlier detection using STL\n" ] }, { @@ -1939,10 +1917,10 @@ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 27, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" }, @@ -1963,7 +1941,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 27, "id": "5f4eb96c-2458-4947-b341-3733d9b0a015", "metadata": {}, "outputs": [ @@ -1971,9 +1949,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:25,233 - INFO - Starting outlier detection using STL\n", - "2025-01-19 15:52:25,235 - INFO - Time frequency: D\n", - "2025-01-19 15:52:25,235 - INFO - Length of time index: 2017.00\n" + "2025-01-20 06:35:29,382 - INFO - Starting outlier detection using STL\n", + "2025-01-20 06:35:29,384 - INFO - Time frequency: D\n", + "2025-01-20 06:35:29,384 - INFO - Length of time index: 2017.00\n" ] }, { @@ -1988,8 +1966,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:26,136 - INFO - Multiplicative model detected\n", - "2025-01-19 15:52:26,263 - INFO - Completing seasonal decomposition plot using STL\n" + "2025-01-20 06:35:29,928 - INFO - Multiplicative model detected\n", + "2025-01-20 06:35:30,011 - INFO - Completing seasonal decomposition plot using STL\n" ] }, { @@ -2010,7 +1988,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 28, "id": "e69ffa96-218e-4068-837d-02681a059959", "metadata": {}, "outputs": [ @@ -2018,11 +1996,11 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:26,864 - INFO - Starting outlier detection using STL\n", - "2025-01-19 15:52:26,866 - INFO - Time frequency: MS\n", - "2025-01-19 15:52:26,866 - INFO - Length of time index: 144.00\n", - "2025-01-19 15:52:26,873 - INFO - Additive model detected\n", - "2025-01-19 15:52:27,001 - INFO - Completing seasonal decomposition plot using STL\n" + "2025-01-20 06:35:30,389 - INFO - Starting outlier detection using STL\n", + "2025-01-20 06:35:30,390 - INFO - Time frequency: MS\n", + "2025-01-20 06:35:30,391 - INFO - Length of time index: 144.00\n", + "2025-01-20 06:35:30,395 - INFO - Additive model detected\n", + "2025-01-20 06:35:30,483 - INFO - Completing seasonal decomposition plot using STL\n" ] }, { @@ -2059,7 +2037,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 29, "id": "9c2c5c40-60d4-4bcb-8ab5-8b51b18cd7c0", "metadata": {}, "outputs": [ @@ -2067,15 +2045,15 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:27,547 - INFO - Starting outlier detection using MSTL\n", - "2025-01-19 15:52:27,549 - INFO - Time frequency: MS\n", - "2025-01-19 15:52:27,549 - INFO - Length of time index: 200.00\n", - "2025-01-19 15:52:27,549 - INFO - Derived Period: 12\n", - "2025-01-19 15:52:27,556 - INFO - Generating outlier detection using MSTL\n", - "2025-01-19 15:52:27,556 - INFO - Outlier detection using MSTL Additive Model\n", - "2025-01-19 15:52:28,227 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", - "2025-01-19 15:52:28,228 - INFO - Generated outlier detection using STL\n", - "2025-01-19 15:52:28,228 - INFO - Completing outlier detection using MSTL\n" + "2025-01-20 06:35:30,820 - INFO - Starting outlier detection using MSTL\n", + "2025-01-20 06:35:30,821 - INFO - Time frequency: MS\n", + "2025-01-20 06:35:30,821 - INFO - Length of time index: 200.00\n", + "2025-01-20 06:35:30,822 - INFO - Derived Period: 12\n", + "2025-01-20 06:35:30,826 - INFO - Generating outlier detection using MSTL\n", + "2025-01-20 06:35:30,826 - INFO - Outlier detection using MSTL Additive Model\n", + "2025-01-20 06:35:31,259 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", + "2025-01-20 06:35:31,259 - INFO - Generated outlier detection using STL\n", + "2025-01-20 06:35:31,260 - INFO - Completing outlier detection using MSTL\n" ] }, { @@ -2136,7 +2114,7 @@ "3 2019-07-01 100" ] }, - "execution_count": 30, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -2147,7 +2125,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 30, "id": "9e2e7508-5da2-4c5b-b2aa-81b0a819958f", "metadata": {}, "outputs": [ @@ -2155,11 +2133,11 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:28,233 - INFO - Starting outlier detection using MSTL\n", - "2025-01-19 15:52:28,235 - INFO - Time frequency: D\n", - "2025-01-19 15:52:28,235 - INFO - Length of time index: 5.00\n", - "2025-01-19 15:52:28,235 - INFO - Less than 2 years of data - Use IQR or Moving Average Method\n", - "2025-01-19 15:52:28,236 - INFO - Default - Using IQR method for outlier detection.\n" + "2025-01-20 06:35:31,264 - INFO - Starting outlier detection using MSTL\n", + "2025-01-20 06:35:31,265 - INFO - Time frequency: D\n", + "2025-01-20 06:35:31,265 - INFO - Length of time index: 5.00\n", + "2025-01-20 06:35:31,265 - INFO - Less than 2 years of data - Use IQR or Moving Average Method\n", + "2025-01-20 06:35:31,265 - INFO - Default - Using IQR method for outlier detection.\n" ] }, { @@ -2202,7 +2180,7 @@ "3 2024-01-04 750" ] }, - "execution_count": 31, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -2221,7 +2199,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 31, "id": "13832c81-19c6-43a3-b755-6bfd32655125", "metadata": {}, "outputs": [ @@ -2229,16 +2207,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:28,241 - INFO - Starting outlier detection using MSTL\n", - "2025-01-19 15:52:28,243 - INFO - Time frequency: MS\n", - "2025-01-19 15:52:28,243 - INFO - Length of time index: 200.00\n", - "2025-01-19 15:52:28,243 - INFO - Derived Period: 12\n", - "2025-01-19 15:52:28,250 - INFO - Additive model detected\n", - "2025-01-19 15:52:28,250 - INFO - Generating outlier detection using MSTL\n", - "2025-01-19 15:52:28,251 - INFO - Outlier detection using MSTL Additive Model\n", - "2025-01-19 15:52:28,916 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", - "2025-01-19 15:52:28,917 - INFO - Generated outlier detection using STL\n", - "2025-01-19 15:52:28,918 - INFO - Completing outlier detection using MSTL\n" + "2025-01-20 06:35:31,850 - INFO - Starting outlier detection using MSTL\n", + "2025-01-20 06:35:31,856 - INFO - Time frequency: MS\n", + "2025-01-20 06:35:31,857 - INFO - Length of time index: 200.00\n", + "2025-01-20 06:35:31,857 - INFO - Derived Period: 12\n", + "2025-01-20 06:35:31,870 - INFO - Additive model detected\n", + "2025-01-20 06:35:31,870 - INFO - Generating outlier detection using MSTL\n", + "2025-01-20 06:35:31,871 - INFO - Outlier detection using MSTL Additive Model\n", + "2025-01-20 06:35:32,321 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", + "2025-01-20 06:35:32,321 - INFO - Generated outlier detection using STL\n", + "2025-01-20 06:35:32,322 - INFO - Completing outlier detection using MSTL\n" ] }, { @@ -2256,10 +2234,10 @@ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 32, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" }, @@ -2281,7 +2259,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 32, "id": "70a8d0da-e287-4a7d-a349-5075dfbdabb0", "metadata": {}, "outputs": [ @@ -2289,16 +2267,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:29,026 - INFO - Starting outlier detection using MSTL\n", - "2025-01-19 15:52:29,031 - INFO - Time frequency: D\n", - "2025-01-19 15:52:29,031 - INFO - Length of time index: 2017.00\n", - "2025-01-19 15:52:29,032 - INFO - Derived Period: (7, 365)\n", - "2025-01-19 15:52:30,989 - INFO - Multiplicative model detected\n", - "2025-01-19 15:52:30,989 - INFO - Generating outlier detection using MSTL\n", - "2025-01-19 15:52:30,990 - INFO - Outlier detection using MSTL Multiplicative Model\n", - "2025-01-19 15:52:33,281 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", - "2025-01-19 15:52:33,282 - INFO - Generated outlier detection using STL\n", - "2025-01-19 15:52:33,283 - INFO - Completing outlier detection using MSTL\n" + "2025-01-20 06:35:32,493 - INFO - Starting outlier detection using MSTL\n", + "2025-01-20 06:35:32,500 - INFO - Time frequency: D\n", + "2025-01-20 06:35:32,500 - INFO - Length of time index: 2017.00\n", + "2025-01-20 06:35:32,501 - INFO - Derived Period: (7, 365)\n", + "2025-01-20 06:35:33,685 - INFO - Multiplicative model detected\n", + "2025-01-20 06:35:33,686 - INFO - Generating outlier detection using MSTL\n", + "2025-01-20 06:35:33,686 - INFO - Outlier detection using MSTL Multiplicative Model\n", + "2025-01-20 06:35:35,105 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", + "2025-01-20 06:35:35,106 - INFO - Generated outlier detection using STL\n", + "2025-01-20 06:35:35,107 - INFO - Completing outlier detection using MSTL\n" ] }, { @@ -2325,10 +2303,10 @@ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 33, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" }, @@ -2349,7 +2327,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 33, "id": "6f508005-e282-4ddd-a7d2-3155c63e2f8c", "metadata": {}, "outputs": [ @@ -2357,17 +2335,17 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:33,396 - INFO - Detecting the latest outliers.\n", - "2025-01-19 15:52:33,397 - INFO - Starting outlier detection using MSTL\n", - "2025-01-19 15:52:33,399 - INFO - Time frequency: D\n", - "2025-01-19 15:52:33,399 - INFO - Length of time index: 2017.00\n", - "2025-01-19 15:52:33,400 - INFO - Derived Period: (7, 365)\n", - "2025-01-19 15:52:35,351 - INFO - Generating outlier detection using MSTL\n", - "2025-01-19 15:52:35,352 - INFO - Outlier detection using MSTL Multiplicative Model\n", - "2025-01-19 15:52:37,692 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", - "2025-01-19 15:52:37,693 - INFO - Generated outlier detection using STL\n", - "2025-01-19 15:52:37,694 - INFO - Completing outlier detection using MSTL\n", - "2025-01-19 15:52:37,694 - INFO - Detected the latest outlier!\n" + "2025-01-20 06:35:35,181 - INFO - Detecting the latest outliers.\n", + "2025-01-20 06:35:35,181 - INFO - Starting outlier detection using MSTL\n", + "2025-01-20 06:35:35,183 - INFO - Time frequency: D\n", + "2025-01-20 06:35:35,183 - INFO - Length of time index: 2017.00\n", + "2025-01-20 06:35:35,183 - INFO - Derived Period: (7, 365)\n", + "2025-01-20 06:35:36,420 - INFO - Generating outlier detection using MSTL\n", + "2025-01-20 06:35:36,420 - INFO - Outlier detection using MSTL Multiplicative Model\n", + "2025-01-20 06:35:37,819 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", + "2025-01-20 06:35:37,820 - INFO - Generated outlier detection using STL\n", + "2025-01-20 06:35:37,820 - INFO - Completing outlier detection using MSTL\n", + "2025-01-20 06:35:37,821 - INFO - Detected the latest outlier!\n" ] }, { @@ -2386,7 +2364,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 34, "id": "95e83be5-e1f9-4ac4-8c6a-0449347c0291", "metadata": {}, "outputs": [ @@ -2394,17 +2372,17 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:37,698 - INFO - Detecting today's outliers.\n", - "2025-01-19 15:52:37,699 - INFO - Starting outlier detection using MSTL\n", - "2025-01-19 15:52:37,702 - INFO - Time frequency: D\n", - "2025-01-19 15:52:37,702 - INFO - Length of time index: 2017.00\n", - "2025-01-19 15:52:37,702 - INFO - Derived Period: (7, 365)\n", - "2025-01-19 15:52:39,876 - INFO - Generating outlier detection using MSTL\n", - "2025-01-19 15:52:39,877 - INFO - Outlier detection using MSTL Multiplicative Model\n", - "2025-01-19 15:52:42,126 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", - "2025-01-19 15:52:42,128 - INFO - Generated outlier detection using STL\n", - "2025-01-19 15:52:42,128 - INFO - Completing outlier detection using MSTL\n", - "2025-01-19 15:52:42,129 - INFO - No outliers detected today.\n" + "2025-01-20 06:35:37,823 - INFO - Detecting today's outliers.\n", + "2025-01-20 06:35:37,824 - INFO - Starting outlier detection using MSTL\n", + "2025-01-20 06:35:37,825 - INFO - Time frequency: D\n", + "2025-01-20 06:35:37,825 - INFO - Length of time index: 2017.00\n", + "2025-01-20 06:35:37,826 - INFO - Derived Period: (7, 365)\n", + "2025-01-20 06:35:39,011 - INFO - Generating outlier detection using MSTL\n", + "2025-01-20 06:35:39,011 - INFO - Outlier detection using MSTL Multiplicative Model\n", + "2025-01-20 06:35:40,405 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", + "2025-01-20 06:35:40,406 - INFO - Generated outlier detection using STL\n", + "2025-01-20 06:35:40,407 - INFO - Completing outlier detection using MSTL\n", + "2025-01-20 06:35:40,407 - INFO - No outliers detected today.\n" ] }, { @@ -2413,7 +2391,7 @@ "'No Outliers Today!'" ] }, - "execution_count": 35, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -2424,7 +2402,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 35, "id": "8998f62a-2243-46ce-9f1f-e001f29a10e0", "metadata": {}, "outputs": [ @@ -2432,12 +2410,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:42,132 - INFO - Starting outlier detection using MSTL\n", - "2025-01-19 15:52:42,133 - INFO - Time frequency: MS\n", - "2025-01-19 15:52:42,134 - INFO - Length of time index: 200.00\n", - "2025-01-19 15:52:42,134 - INFO - Derived Period: 12\n", - "2025-01-19 15:52:42,141 - INFO - Additive model detected\n", - "2025-01-19 15:52:42,302 - INFO - Completing seasonal decomposition plot using MSTL\n" + "2025-01-20 06:35:40,410 - INFO - Starting outlier detection using MSTL\n", + "2025-01-20 06:35:40,411 - INFO - Time frequency: MS\n", + "2025-01-20 06:35:40,411 - INFO - Length of time index: 200.00\n", + "2025-01-20 06:35:40,411 - INFO - Derived Period: 12\n", + "2025-01-20 06:35:40,415 - INFO - Additive model detected\n", + "2025-01-20 06:35:40,510 - INFO - Completing seasonal decomposition plot using MSTL\n" ] }, { @@ -2458,7 +2436,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 36, "id": "49125b79-43a7-439b-a636-6a818c08911f", "metadata": {}, "outputs": [ @@ -2466,12 +2444,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:42,884 - INFO - Starting outlier detection using MSTL\n", - "2025-01-19 15:52:42,886 - INFO - Time frequency: MS\n", - "2025-01-19 15:52:42,886 - INFO - Length of time index: 144.00\n", - "2025-01-19 15:52:42,886 - INFO - Derived Period: 12\n", - "2025-01-19 15:52:42,892 - INFO - Additive model detected\n", - "2025-01-19 15:52:43,022 - INFO - Completing seasonal decomposition plot using MSTL\n" + "2025-01-20 06:35:40,937 - INFO - Starting outlier detection using MSTL\n", + "2025-01-20 06:35:40,938 - INFO - Time frequency: MS\n", + "2025-01-20 06:35:40,938 - INFO - Length of time index: 144.00\n", + "2025-01-20 06:35:40,938 - INFO - Derived Period: 12\n", + "2025-01-20 06:35:40,944 - INFO - Additive model detected\n", + "2025-01-20 06:35:41,030 - INFO - Completing seasonal decomposition plot using MSTL\n" ] }, { @@ -2492,7 +2470,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 37, "id": "85e90d87-2a5a-4cf3-a155-38ea171e372c", "metadata": {}, "outputs": [ @@ -2500,16 +2478,16 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:43,575 - INFO - Starting outlier detection using MSTL\n", - "2025-01-19 15:52:43,578 - INFO - Time frequency: MS\n", - "2025-01-19 15:52:43,578 - INFO - Length of time index: 144.00\n", - "2025-01-19 15:52:43,578 - INFO - Derived Period: 12\n", - "2025-01-19 15:52:43,585 - INFO - Additive model detected\n", - "2025-01-19 15:52:43,585 - INFO - Generating outlier detection using MSTL\n", - "2025-01-19 15:52:43,585 - INFO - Outlier detection using MSTL Additive Model\n", - "2025-01-19 15:52:44,209 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", - "2025-01-19 15:52:44,210 - INFO - Generated outlier detection using STL\n", - "2025-01-19 15:52:44,211 - INFO - Completing outlier detection using MSTL\n" + "2025-01-20 06:35:41,371 - INFO - Starting outlier detection using MSTL\n", + "2025-01-20 06:35:41,373 - INFO - Time frequency: MS\n", + "2025-01-20 06:35:41,373 - INFO - Length of time index: 144.00\n", + "2025-01-20 06:35:41,373 - INFO - Derived Period: 12\n", + "2025-01-20 06:35:41,377 - INFO - Additive model detected\n", + "2025-01-20 06:35:41,378 - INFO - Generating outlier detection using MSTL\n", + "2025-01-20 06:35:41,378 - INFO - Outlier detection using MSTL Additive Model\n", + "2025-01-20 06:35:41,783 - INFO - Residuals Likely Normally Distributed - Using Z Score\n", + "2025-01-20 06:35:41,784 - INFO - Generated outlier detection using STL\n", + "2025-01-20 06:35:41,784 - INFO - Completing outlier detection using MSTL\n" ] }, { @@ -2531,10 +2509,10 @@ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 38, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" }, @@ -2556,7 +2534,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 38, "id": "d3477c2e-730d-4f1e-bcbb-b54af2cb8495", "metadata": {}, "outputs": [ @@ -2564,12 +2542,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:44,315 - INFO - Starting outlier detection using MSTL\n", - "2025-01-19 15:52:44,318 - INFO - Time frequency: D\n", - "2025-01-19 15:52:44,318 - INFO - Length of time index: 2017.00\n", - "2025-01-19 15:52:44,319 - INFO - Derived Period: (7, 365)\n", - "2025-01-19 15:52:46,287 - INFO - Multiplicative model detected\n", - "2025-01-19 15:52:46,438 - INFO - Completing seasonal decomposition plot using MSTL\n" + "2025-01-20 06:35:41,848 - INFO - Starting outlier detection using MSTL\n", + "2025-01-20 06:35:41,850 - INFO - Time frequency: D\n", + "2025-01-20 06:35:41,850 - INFO - Length of time index: 2017.00\n", + "2025-01-20 06:35:41,850 - INFO - Derived Period: (7, 365)\n", + "2025-01-20 06:35:43,033 - INFO - Multiplicative model detected\n", + "2025-01-20 06:35:43,130 - INFO - Completing seasonal decomposition plot using MSTL\n" ] }, { @@ -2597,7 +2575,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 39, "id": "9c29bbea-b2bd-4e7c-87f1-d64100147ed2", "metadata": {}, "outputs": [ @@ -2605,11 +2583,11 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:47,131 - INFO - Checking for Normality - Shapiro-Wilk Test Results:\n", - "2025-01-19 15:52:47,132 - INFO - Starting Time series frequency detection\n", - "2025-01-19 15:52:47,136 - INFO - Time frequency: D\n", - "2025-01-19 15:52:47,136 - INFO - Length of time index: 2017.00\n", - "2025-01-19 15:52:47,136 - INFO - Completing Time series frequency detection\n" + "2025-01-20 06:35:43,562 - INFO - Checking for Normality - Shapiro-Wilk Test Results:\n", + "2025-01-20 06:35:43,562 - INFO - Starting Time series frequency detection\n", + "2025-01-20 06:35:43,565 - INFO - Time frequency: D\n", + "2025-01-20 06:35:43,565 - INFO - Length of time index: 2017.00\n", + "2025-01-20 06:35:43,565 - INFO - Completing Time series frequency detection\n" ] }, { @@ -2623,8 +2601,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:50,562 - INFO - Generated outliers by Seasonal ESD Method\n", - "2025-01-19 15:52:50,563 - INFO - Outliers detected by Seasonal ESD Method\n" + "2025-01-20 06:35:45,618 - INFO - Generated outliers by Seasonal ESD Method\n", + "2025-01-20 06:35:45,619 - INFO - Outliers detected by Seasonal ESD Method\n" ] }, { @@ -2730,7 +2708,7 @@ "[100 rows x 2 columns]" ] }, - "execution_count": 40, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } @@ -2741,7 +2719,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 40, "id": "4544d0cf-3676-491e-b2c6-ec2b4d735ea9", "metadata": {}, "outputs": [ @@ -2749,13 +2727,13 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:50,569 - INFO - Testing for Normality - Shapiro-Wilk Test Results:\n", - "2025-01-19 15:52:50,569 - INFO - Statistic: 0.893\n", - "2025-01-19 15:52:50,570 - INFO - p-value: 0.000\n", - "2025-01-19 15:52:50,570 - INFO - Starting Time series frequency detection\n", - "2025-01-19 15:52:50,574 - INFO - Time frequency: D\n", - "2025-01-19 15:52:50,574 - INFO - Length of time index: 2017.00\n", - "2025-01-19 15:52:50,574 - INFO - Completing Time series frequency detection\n" + "2025-01-20 06:35:45,623 - INFO - Testing for Normality - Shapiro-Wilk Test Results:\n", + "2025-01-20 06:35:45,623 - INFO - Statistic: 0.893\n", + "2025-01-20 06:35:45,623 - INFO - p-value: 0.000\n", + "2025-01-20 06:35:45,623 - INFO - Starting Time series frequency detection\n", + "2025-01-20 06:35:45,626 - INFO - Time frequency: D\n", + "2025-01-20 06:35:45,626 - INFO - Length of time index: 2017.00\n", + "2025-01-20 06:35:45,626 - INFO - Completing Time series frequency detection\n" ] }, { @@ -2769,9 +2747,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:53,989 - INFO - Generated outliers by Seasonal ESD Method\n", - "2025-01-19 15:52:54,006 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n", - "2025-01-19 15:52:54,010 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n" + "2025-01-20 06:35:47,690 - INFO - Generated outliers by Seasonal ESD Method\n", + "2025-01-20 06:35:47,699 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n", + "2025-01-20 06:35:47,702 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.\n" ] }, { @@ -2820,7 +2798,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 41, "id": "6427fa56-800a-49b7-95a8-713a397e3592", "metadata": {}, "outputs": [ @@ -2867,7 +2845,7 @@ "2024-01-04 450" ] }, - "execution_count": 42, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } @@ -2886,7 +2864,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 42, "id": "ac9c9f45-55f2-4d70-8473-503d78b28e16", "metadata": {}, "outputs": [ @@ -2894,7 +2872,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:54,477 - INFO - Building IQR plot to see outliers\n" + "2025-01-20 06:35:48,019 - INFO - Building IQR plot to see outliers\n" ] }, { @@ -2904,7 +2882,7 @@ "
" ] }, - "execution_count": 43, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } @@ -2923,7 +2901,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 43, "id": "2cdb91db-757b-4a05-b677-861d5563c50c", "metadata": {}, "outputs": [ @@ -2954,83 +2932,83 @@ " \n", " \n", " 0\n", - " -1.423114\n", + " 0.288971\n", " \n", " \n", " 1\n", - " 1.055215\n", + " -1.375526\n", " \n", " \n", " 2\n", - " -1.802483\n", + " 1.873845\n", " \n", " \n", " 3\n", - " -0.257056\n", + " 0.163573\n", " \n", " \n", " 4\n", - " 1.585707\n", + " 0.969924\n", " \n", " \n", " 5\n", - " 0.070369\n", + " 0.019614\n", " \n", " \n", " 6\n", - " 0.572173\n", + " 1.444810\n", " \n", " \n", " 7\n", - " 1.204303\n", + " 1.092741\n", " \n", " \n", " 8\n", - " -1.601769\n", + " 0.347193\n", " \n", " \n", " 9\n", - " 0.383073\n", + " 0.000703\n", " \n", " \n", " 10\n", - " 0.803868\n", + " 1.620270\n", " \n", " \n", " 11\n", - " -0.628674\n", + " 0.462871\n", " \n", " \n", " 12\n", - " 0.141689\n", + " -1.518049\n", " \n", " \n", " 13\n", - " 0.339351\n", + " -0.901780\n", " \n", " \n", " 14\n", - " -2.220027\n", + " 0.173406\n", " \n", " \n", " 15\n", - " 1.753066\n", + " -0.218665\n", " \n", " \n", " 16\n", - " -0.458946\n", + " -1.751572\n", " \n", " \n", " 17\n", - " -0.136383\n", + " -0.665632\n", " \n", " \n", " 18\n", - " 1.090813\n", + " -0.288242\n", " \n", " \n", " 19\n", - " -0.262419\n", + " -0.154105\n", " \n", " \n", "\n", @@ -3038,29 +3016,29 @@ ], "text/plain": [ " random numbers\n", - "0 -1.423114\n", - "1 1.055215\n", - "2 -1.802483\n", - "3 -0.257056\n", - "4 1.585707\n", - "5 0.070369\n", - "6 0.572173\n", - "7 1.204303\n", - "8 -1.601769\n", - "9 0.383073\n", - "10 0.803868\n", - "11 -0.628674\n", - "12 0.141689\n", - "13 0.339351\n", - "14 -2.220027\n", - "15 1.753066\n", - "16 -0.458946\n", - "17 -0.136383\n", - "18 1.090813\n", - "19 -0.262419" + "0 0.288971\n", + "1 -1.375526\n", + "2 1.873845\n", + "3 0.163573\n", + "4 0.969924\n", + "5 0.019614\n", + "6 1.444810\n", + "7 1.092741\n", + "8 0.347193\n", + "9 0.000703\n", + "10 1.620270\n", + "11 0.462871\n", + "12 -1.518049\n", + "13 -0.901780\n", + "14 0.173406\n", + "15 -0.218665\n", + "16 -1.751572\n", + "17 -0.665632\n", + "18 -0.288242\n", + "19 -0.154105" ] }, - "execution_count": 44, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -3075,22 +3053,10 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 44, "id": "b9e514f7-07fe-44c3-9db3-274234a953d3", "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2025-01-19 15:52:54,582 - INFO - Starting outlier detection using Moving Average method\n", - "2025-01-19 15:52:54,583 - INFO - Starting optimal window size calculation\n", - "2025-01-19 15:52:54,583 - INFO - Starting RMSE calculation\n", - "2025-01-19 15:52:54,686 - INFO - RMSE calculation completed\n", - "2025-01-19 15:52:54,686 - INFO - Optimal Window Size: 2\n", - "2025-01-19 15:52:54,687 - INFO - Outlier detection using Moving Average method completed\n" - ] - }, { "data": { "text/html": [ @@ -3194,7 +3160,7 @@ "[98 rows x 2 columns]" ] }, - "execution_count": 45, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -3205,7 +3171,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 45, "id": "9af7ee2f-d670-4fe6-bfa0-38479750848c", "metadata": {}, "outputs": [ @@ -3213,12 +3179,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:54,692 - INFO - Plotting outliers using Moving Average method\n", - "2025-01-19 15:52:54,692 - INFO - Starting optimal window size calculation\n", - "2025-01-19 15:52:54,693 - INFO - Starting RMSE calculation\n", - "2025-01-19 15:52:54,794 - INFO - RMSE calculation completed\n", - "2025-01-19 15:52:54,794 - INFO - Optimal Window Size: 2\n", - "2025-01-19 15:52:54,807 - INFO - Completed outliers plotting using Moving Average method\n" + "2025-01-20 06:35:48,163 - INFO - Plotting outliers using Moving Average method\n", + "2025-01-20 06:35:48,241 - INFO - Completed outliers plotting using Moving Average method\n" ] }, { @@ -3267,7 +3229,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 46, "id": "f4ef24f3-1b4c-4b04-83f6-026cf6c549e3", "metadata": {}, "outputs": [ @@ -3275,28 +3237,28 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-01-19 15:52:55,062 - INFO - Starting ADF stationarity check\n", - "2025-01-19 15:52:55,071 - INFO - ADF Statistic: -0.072707\n", - "2025-01-19 15:52:55,072 - INFO - p-value: 0.952090\n", - "2025-01-19 15:52:55,072 - INFO - Critical Values:\n", - "2025-01-19 15:52:55,072 - INFO - \t1%: -3.466\n", - "2025-01-19 15:52:55,072 - INFO - \t5%: -2.877\n", - "2025-01-19 15:52:55,072 - INFO - \t10%: -2.575\n", - "2025-01-19 15:52:55,073 - INFO - Completed ADF stationarity check\n", - "2025-01-19 15:52:55,073 - INFO - Starting KPSS stationarity check\n", - "/Users/aseem/Documents/GitHub/outlier_detection/src/pycatcher/diagnostics.py:308: InterpolationWarning: The test statistic is outside of the range of p-values available in the\n", + "2025-01-20 06:35:48,404 - INFO - Starting ADF stationarity check\n", + "2025-01-20 06:35:48,407 - INFO - ADF Statistic: -0.072707\n", + "2025-01-20 06:35:48,408 - INFO - p-value: 0.952090\n", + "2025-01-20 06:35:48,408 - INFO - Critical Values:\n", + "2025-01-20 06:35:48,408 - INFO - \t1%: -3.466\n", + "2025-01-20 06:35:48,408 - INFO - \t5%: -2.877\n", + "2025-01-20 06:35:48,408 - INFO - \t10%: -2.575\n", + "2025-01-20 06:35:48,408 - INFO - Completed ADF stationarity check\n", + "2025-01-20 06:35:48,409 - INFO - Starting KPSS stationarity check\n", + "/Users/sarika/Documents/GitHub/pycatcher/src/pycatcher/diagnostics.py:308: InterpolationWarning: The test statistic is outside of the range of p-values available in the\n", "look-up table. The actual p-value is smaller than the p-value returned.\n", "\n", " statistic, p_value, n_lags, critical_values = kpss(df_pandas.iloc[:, -1])\n", - "2025-01-19 15:52:55,074 - INFO - KPSS Statistic: 2.040868\n", - "2025-01-19 15:52:55,074 - INFO - p-value: 0.010000\n", - "2025-01-19 15:52:55,075 - INFO - n_lags: 8.000000\n", - "2025-01-19 15:52:55,075 - INFO - Critical Values:\n", - "2025-01-19 15:52:55,075 - INFO - 10% : 0.347\n", - "2025-01-19 15:52:55,075 - INFO - 5% : 0.463\n", - "2025-01-19 15:52:55,076 - INFO - 2.5% : 0.574\n", - "2025-01-19 15:52:55,076 - INFO - 1% : 0.739\n", - "2025-01-19 15:52:55,076 - INFO - Completed KPSS stationarity check\n" + "2025-01-20 06:35:48,410 - INFO - KPSS Statistic: 2.040868\n", + "2025-01-20 06:35:48,410 - INFO - p-value: 0.010000\n", + "2025-01-20 06:35:48,410 - INFO - n_lags: 8.000000\n", + "2025-01-20 06:35:48,411 - INFO - Critical Values:\n", + "2025-01-20 06:35:48,411 - INFO - 10% : 0.347\n", + "2025-01-20 06:35:48,411 - INFO - 5% : 0.463\n", + "2025-01-20 06:35:48,411 - INFO - 2.5% : 0.574\n", + "2025-01-20 06:35:48,411 - INFO - 1% : 0.739\n", + "2025-01-20 06:35:48,411 - INFO - Completed KPSS stationarity check\n" ] }, { @@ -3331,7 +3293,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.3" + "version": "3.12.0" } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index f3588c6..1dbba7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "pycatcher" -version = "0.0.66" +version = "0.0.67" description = "This package identifies outlier(s) for a given time-series dataset in simple steps. It supports day, week, month and quarter level time-series data." authors = ["Aseem Anand "] maintainers = ["Jagadish Pamarthi "] diff --git a/src/pycatcher/catch.py b/src/pycatcher/catch.py index bf98e54..4e93c8e 100644 --- a/src/pycatcher/catch.py +++ b/src/pycatcher/catch.py @@ -824,28 +824,57 @@ def calculate_optimal_window_size(df: pd.DataFrame) -> str: Returns: str: optimal window size + + Raises: + DataValidationError: If df is None, empty, has invalid format, or contains invalid numeric data + ValueError: If all RMSE values are NaN + TypeError: If input is not a DataFrame or cannot be converted to one """ + if df is None: + logger.error("Input DataFrame is None") + raise DataValidationError("Input DataFrame cannot be None") + + try: + logger.info("Starting optimal window size calculation") + + if len(df.index) == 0: + logger.error("Input DataFrame has no rows") + raise DataValidationError("Input DataFrame cannot have zero rows") + + if len(df.columns) == 0: + logger.error("DataFrame has no columns") + raise DataValidationError("DataFrame must contain at least one value column") - logging.info("Starting optimal window size calculation") + # Try different window sizes + window_sizes = range(2, 21) + rmse_values = [] - # Try different window sizes - window_sizes = range(2, 21) - rmse_values = [] + logger.info("Starting RMSE calculation") - logging.info("Starting RMSE calculation") - for window_size in window_sizes: - rmse = calculate_rmse(df, window_size) - rmse_values.append(rmse) - logging.info("RMSE calculation completed") + for window_size in window_sizes: + logger.debug("Calculating RMSE for window size: %d", window_size) + try: + rmse = calculate_rmse(df, window_size) + rmse_values.append(rmse) + except Exception as e: + logger.warning("Failed to calculate RMSE for window size %d: %s", window_size, str(e)) + rmse_values.append(np.nan) - # Check if all rmse_values are NaN - if np.all(np.isnan(rmse_values)): - raise ValueError("All RMSE values are NaN. Check your data for issues.") + logger.info("RMSE calculation completed") - # Find the window size with the lowest RMSE - optimal_window_size = window_sizes[np.nanargmin(rmse_values)] - logging.info("Optimal Window Size: %d", optimal_window_size) - return optimal_window_size + # Check if all rmse_values are NaN + if np.all(np.isnan(rmse_values)): + logger.error("All RMSE values are NaN") + raise ValueError("All RMSE values are NaN. Check your data for issues.") + + # Find the window size with the lowest RMSE + optimal_window_size = window_sizes[np.nanargmin(rmse_values)] + logger.info("Optimal Window Size: %d", optimal_window_size) + return optimal_window_size + + except Exception as e: + logger.error("Unexpected error in optimal window size calculation: %s", str(e)) + raise def detect_outliers_moving_average(df: pd.DataFrame) -> str: @@ -858,37 +887,64 @@ def detect_outliers_moving_average(df: pd.DataFrame) -> str: Returns: str: A message with None found or with detected outliers. + + Raises: + DataValidationError: If df is None, empty, has invalid format, or contains invalid numeric data + TypeError: If input is not a DataFrame or cannot be converted to one + ValueError: If numeric conversion fails or optimal window size calculation fails """ + if df is None: + logger.error("Input DataFrame is None") + raise DataValidationError("Input DataFrame cannot be None") - logging.info("Starting outlier detection using Moving Average method") + try: + logger.info("Starting outlier detection using Moving Average method") - # Check whether the argument is Pandas dataframe - if not isinstance(df, pd.DataFrame): - # Convert to Pandas dataframe for easy manipulation - df_pandas = df.toPandas() - else: - df_pandas = df + # Check whether the argument is Pandas dataframe + df_pandas = df.toPandas() if not isinstance(df, pd.DataFrame) else df - # Calculate optimal window size - optimal_window_size = calculate_optimal_window_size(df_pandas) + if len(df_pandas.index) == 0: + logger.error("Input DataFrame has no rows") + raise DataValidationError("Input DataFrame cannot have zero rows") - # Calculate moving average - df_pandas.iloc[:, -1] = pd.to_numeric(df_pandas.iloc[:, -1]) - df1 = df_pandas.copy() - df1['moving_average'] = df_pandas.iloc[:, -1].rolling(window=optimal_window_size).mean() + if len(df_pandas.columns) == 0: + logger.error("DataFrame has no columns") + raise DataValidationError("DataFrame must contain at least one value column") - # Call Z-score algorithm to detect anomalies - z_scores = anomaly_zscore(df1['moving_average']) - outliers = df1[np.abs(z_scores) > 2] + # Calculate optimal window size + logger.info("Calculating optimal window size") + optimal_window_size = calculate_optimal_window_size(df_pandas) + logger.info("Optimal window size calculated: %d", optimal_window_size) - if outliers.empty: - print("No outlier detected using Moving Average method") - return - else: - return_outliers = outliers.iloc[:, :2] - return_outliers.reset_index(drop=True, inplace=True) - logging.info("Outlier detection using Moving Average method completed") - return return_outliers + # Calculate moving average + logger.debug("Converting last column to numeric") + try: + df_pandas.iloc[:, -1] = pd.to_numeric(df_pandas.iloc[:, -1]) + except (ValueError, TypeError) as e: + logger.error("Failed to convert last column to numeric: %s", str(e)) + raise DataValidationError("Last column must be convertible to numeric values") + + df1 = df_pandas.copy() + df1['moving_average'] = df_pandas.iloc[:, -1].rolling(window=optimal_window_size).mean() + logger.info("Moving average calculation completed") + + # Call Z-score algorithm to detect anomalies + logger.debug("Calculating Z-scores for anomaly detection") + z_scores = anomaly_zscore(df1['moving_average']) + outliers = df1[np.abs(z_scores) > 2] + + if outliers.empty: + logger.info("No outliers detected using Moving Average method") + print("No outlier detected using Moving Average method") + return + else: + return_outliers = outliers.iloc[:, :2] + return_outliers.reset_index(drop=True, inplace=True) + logger.info("Outlier detection using Moving Average method completed") + return return_outliers + except Exception as e: + logger.error("Unexpected error in Moving Average outlier detection: %s", str(e)) + raise def detect_outliers_stl(df) -> Union[pd.DataFrame, str]: @@ -902,76 +958,100 @@ def detect_outliers_stl(df) -> Union[pd.DataFrame, str]: Returns: str or pd.DataFrame: A message with None found or a DataFrame with detected outliers. - """ - logging.info("Starting outlier detection using STL") - # Check whether the argument is Pandas dataframe - if not isinstance(df, pd.DataFrame): - # Convert to Pandas dataframe for easy manipulation - df_pandas = df.toPandas() - else: - df_pandas = df + Raises: + DataValidationError: If df is None, empty, has invalid format, or contains invalid datetime data + TimeSeriesError: If time series frequency cannot be determined or data is insufficient + TypeError: If input is not a DataFrame or cannot be converted to one + ValueError: If date conversion fails or index has duplicates + """ + if df is None: + logger.error("Input DataFrame is None") + raise DataValidationError("Input DataFrame cannot be None") - # Ensure the first column is in datetime format and set it as index - df_stl = df_pandas.copy() - # Ensure the DataFrame is indexed correctly - if not isinstance(df_stl.index, pd.DatetimeIndex): - df_stl = df_stl.set_index(pd.to_datetime(df_stl.iloc[:, 0])).dropna() + try: + logger.info("Starting outlier detection using STL") - # Ensure the datetime index is unique (no duplicate dates) - if df_stl.index.is_unique: - # Find the time frequency (daily, weekly etc.) and length of the index column - inferred_frequency = df_stl.index.inferred_freq - logging.info("Time frequency: %s", inferred_frequency) + # Check whether the argument is Pandas dataframe + df_pandas = df.toPandas() if not isinstance(df, pd.DataFrame) else df - length_index = len(df_stl.index) - logging.info("Length of time index: %.2f", length_index) + if len(df_pandas.index) == 0: + logger.error("Input DataFrame has no rows") + raise DataValidationError("Input DataFrame cannot have zero rows") - # If the dataset contains at least 2 years of data, use Seasonal Trend Decomposition + if len(df_pandas.columns) == 0: + logger.error("DataFrame has no columns") + raise DataValidationError("DataFrame must contain at least one value column") - # Set parameter for Week check - regex_week_check = r'[W-Za-z]' + # Create a copy for STL processing + logger.debug("Creating DataFrame copy for STL processing") + df_stl = df_pandas.copy() - match inferred_frequency: - case 'H' if length_index >= 17520: - # logging.info("Using seasonal trend decomposition for for outlier detection in - # hour level time-series.") - detected_period = 24 # Hourly seasonality - case 'D' if length_index >= 730: - # logging.info("Using seasonal trend decomposition for for outlier detection in - # day level time-series.") - detected_period = 365 # Yearly seasonality - case 'B' if length_index >= 520: - # logging.info("Using seasonal trend decomposition for outlier detection in business - # day level time-series.") - detected_period = 365 # Yearly seasonality - case 'MS' if length_index >= 24: - # logging.info("Using seasonal trend decomposition for for outlier detection in - # month level time-series.") - detected_period = 12 - case 'M' if length_index >= 24: - # logging.info("Using seasonal trend decomposition for for outlier detection in - # month level time-series.") - detected_period = 12 - case 'Q' if length_index >= 8: - # logging.info("Using seasonal trend decomposition for for outlier detection in - # quarter level time-series.") - detected_period = 4 # Quarterly seasonality - case 'A' if length_index >= 2: - # logging.info("Using seasonal trend decomposition for for outlier detection in - # annual level time-series.") - detected_period = 1 # Annual seasonality - case _: - if regex.match(regex_week_check, inferred_frequency) and length_index >= 104: - detected_period = 52 # Week level seasonality - else: - # If less than 2 years of data, Use Inter Quartile Range (IQR) or Moving Average method - logging.info("Less than 2 years of data - Use Moving Average or IQR Method") - logging.info("Default - Using IQR method for outlier detection.") - return detect_outliers_iqr(df_pandas) - return detect_outliers_stl_extended(df_stl, detected_period) - else: - print("Duplicate date index values. Check your data.") + try: + # Ensure the first column is in datetime format and set it as index + # Ensure the DataFrame is indexed correctly + if not isinstance(df_stl.index, pd.DatetimeIndex): + df_stl = df_stl.set_index(pd.to_datetime(df_stl.iloc[:, 0])).dropna() + except Exception as e: + logger.error("Failed to convert to datetime index: %s", str(e)) + raise DataValidationError("Failed to convert first column to datetime format") from e + + # Ensure the datetime index is unique (no duplicate dates) + if df_stl.index.is_unique: + # Find the time frequency (daily, weekly etc.) and length of the index column + inferred_frequency = df_stl.index.inferred_freq + logger.info("Time frequency: %s", inferred_frequency) + + length_index = len(df_stl.index) + logger.info("Length of time index: %.2f", length_index) + + # If the dataset contains at least 2 years of data, use Seasonal Trend Decomposition + # Set parameter for Week check + regex_week_check = r'[W-Za-z]' + + match inferred_frequency: + case 'H' if length_index >= 17520: + # logger.info("Using seasonal trend decomposition for for outlier detection in + # hour level time-series.") + detected_period = 24 # Hourly seasonality + case 'D' if length_index >= 730: + # logger.info("Using seasonal trend decomposition for for outlier detection in + # day level time-series.") + detected_period = 365 # Yearly seasonality + case 'B' if length_index >= 520: + # logger.info("Using seasonal trend decomposition for outlier detection in business + # day level time-series.") + detected_period = 365 # Yearly seasonality + case 'MS' if length_index >= 24: + # logger.info("Using seasonal trend decomposition for for outlier detection in + # month level time-series.") + detected_period = 12 + case 'M' if length_index >= 24: + # logger.info("Using seasonal trend decomposition for for outlier detection in + # month level time-series.") + detected_period = 12 + case 'Q' if length_index >= 8: + # logger.info("Using seasonal trend decomposition for for outlier detection in + # quarter level time-series.") + detected_period = 4 # Quarterly seasonality + case 'A' if length_index >= 2: + # logger.info("Using seasonal trend decomposition for for outlier detection in + # annual level time-series.") + detected_period = 1 # Annual seasonality + case _: + if regex.match(regex_week_check, inferred_frequency) and length_index >= 104: + detected_period = 52 # Week level seasonality + else: + # If less than 2 years of data, Use Inter Quartile Range (IQR) or Moving Average method + logger.info("Less than 2 years of data - Use Moving Average or IQR Method") + logger.info("Default - Using IQR method for outlier detection.") + return detect_outliers_iqr(df_pandas) + return detect_outliers_stl_extended(df_stl, detected_period) + else: + print("Duplicate date index values. Check your data.") + except Exception as e: + logger.error("Unexpected error in STL outlier detection: %s", str(e)) + raise def detect_outliers_stl_extended(df, detected_period) -> Union[pd.DataFrame, str]: diff --git a/tests/__init__.py b/tests/__init__.py index c972241..c496127 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -23,6 +23,9 @@ TestDecomposeAndDetect, TestDetectOutliersIQR, TestCalculateRmse, + TestCalculateOptimalWindowSize, + TestDetectOutliersMovingAverage, + TestDetectOutliersSTL, # Common fixture sample_df diff --git a/tests/test_catch.py b/tests/test_catch.py index c78e443..62b638f 100644 --- a/tests/test_catch.py +++ b/tests/test_catch.py @@ -7,7 +7,7 @@ from src.pycatcher.catch import (TimeSeriesError, DataValidationError, check_and_convert_date, find_outliers_iqr, anomaly_mad, get_residuals, sum_of_squares, get_ssacf, detect_outliers_today_classic, detect_outliers_latest_classic, detect_outliers_classic, decompose_and_detect, detect_outliers_iqr, - calculate_rmse) + calculate_rmse, calculate_optimal_window_size, detect_outliers_moving_average, detect_outliers_stl) @pytest.fixture @@ -681,3 +681,280 @@ def test_missing_value_column(self): df = pd.DataFrame(index=pd.date_range(start='2023-01-01', periods=5)) with pytest.raises(DataValidationError, match="DataFrame must contain at least one value column"): calculate_rmse(df, window_size=3) + + +class TestCalculateOptimalWindowSize: + """Test cases for calculate_optimal_window_size function.""" + + @pytest.fixture + def sample_df(self): + """Fixture for sample DataFrame with clean time series data.""" + dates = pd.date_range(start='2022-01-01', periods=100) + # Creating a simple time series with a trend + values = np.linspace(10, 100, 100) + np.random.normal(0, 5, 100) + return pd.DataFrame({ + 'date': dates, + 'value': values + }) + + def test_valid_calculation(self, sample_df): + """Test with valid DataFrame containing clean time series data.""" + result = calculate_optimal_window_size(sample_df) + assert isinstance(result, int) + assert 2 <= result <= 20 # Window size should be within the range defined in the function + + def test_none_input(self): + """Test with None input.""" + with pytest.raises(DataValidationError, match="Input DataFrame cannot be None"): + calculate_optimal_window_size(None) + + def test_empty_dataframe_no_rows(self): + """Test with DataFrame having no rows.""" + empty_df = pd.DataFrame(columns=['date', 'value']) + with pytest.raises(DataValidationError, match="Input DataFrame cannot have zero rows"): + calculate_optimal_window_size(empty_df) + + def test_empty_dataframe_no_columns(self): + """Test with DataFrame having no data and no columns.""" + empty_df = pd.DataFrame() + with pytest.raises(DataValidationError, match="Input DataFrame cannot have zero rows"): + calculate_optimal_window_size(empty_df) + + def test_dataframe_with_rows_no_columns(self): + """Test with DataFrame having rows but no columns.""" + # Create DataFrame with index but no columns + df = pd.DataFrame(index=range(5)) + with pytest.raises(DataValidationError, match="DataFrame must contain at least one value column"): + calculate_optimal_window_size(df) + + def test_small_dataset(self): + """Test with a small but valid dataset.""" + # Create a dataset that's large enough for 5 splits but still relatively small + dates = pd.date_range(start='2022-01-01', periods=25) + # Creating simple linear trend with some noise + values = np.linspace(1, 25, 25) + np.random.normal(0, 0.5, 25) + small_df = pd.DataFrame({ + 'date': dates, + 'value': values + }) + result = calculate_optimal_window_size(small_df) + assert isinstance(result, int) + assert 2 <= result <= 20 # Window size should be within the defined range + + def test_minimum_size_dataset(self): + """Test with a dataset at the minimum size that should work.""" + # TimeSeriesSplit with n_splits=5 requires at least n_splits + 2 samples + dates = pd.date_range(start='2022-01-01', periods=7) + values = range(1, 8) # Simple increasing sequence + min_df = pd.DataFrame({ + 'date': dates, + 'value': values + }) + with pytest.raises(ValueError, match="All RMSE values are NaN"): + calculate_optimal_window_size(min_df) + + def test_constant_values(self): + """Test with constant values where RMSE might be all zero.""" + dates = pd.date_range(start='2022-01-01', periods=50) + constant_df = pd.DataFrame({ + 'date': dates, + 'value': [10] * 50 + }) + result = calculate_optimal_window_size(constant_df) + assert isinstance(result, int) + assert 2 <= result <= 20 + + @patch('src.pycatcher.catch.calculate_rmse') + def test_all_nan_rmse_values(self, mock_calculate_rmse): + """Test handling of all NaN RMSE values.""" + mock_calculate_rmse.return_value = np.nan + dates = pd.date_range(start='2022-01-01', periods=50) + df = pd.DataFrame({ + 'date': dates, + 'value': np.random.normal(0, 1, 50) + }) + with pytest.raises(ValueError, match="All RMSE values are NaN"): + calculate_optimal_window_size(df) + + def test_non_numeric_values(self): + """Test with non-numeric values in the value column.""" + df = pd.DataFrame({ + 'date': pd.date_range(start='2022-01-01', periods=10), + 'value': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] + }) + with pytest.raises(Exception): # Should raise an error when trying to calculate RMSE + calculate_optimal_window_size(df) + + def test_highly_seasonal_data(self): + """Test with highly seasonal data.""" + dates = pd.date_range(start='2022-01-01', periods=100) + # Create seasonal pattern with period of 7 + seasonal = np.sin(np.linspace(0, 10 * np.pi, 100)) * 10 + values = seasonal + np.random.normal(0, 1, 100) + seasonal_df = pd.DataFrame({ + 'date': dates, + 'value': values + }) + result = calculate_optimal_window_size(seasonal_df) + assert isinstance(result, int) + assert 2 <= result <= 20 + + +class TestDetectOutliersMovingAverage: + """Test cases for detect_outliers_moving_average function.""" + + @pytest.fixture + def sample_df(self): + """Fixture for sample DataFrame with time series data.""" + dates = pd.date_range(start='2023-01-01', periods=10, freq='D') + return pd.DataFrame({ + 'date': dates, + 'value': [10, 12, 11, 13, 100, 11, 12, 13, 11, 12] # 100 is an outlier + }) + + def test_valid_detection(self, sample_df, monkeypatch): + """Test with valid DataFrame containing outliers.""" + + # Mock calculate_optimal_window_size to return a fixed window size + def mock_optimal_window(df): + return 3 + + # Mock anomaly_zscore to return known z-scores + def mock_zscore(series): + return pd.Series([0, 0, 0, 0, 5, 0, 0, 0, 0, 0]) # High z-score for outlier + + monkeypatch.setattr("src.pycatcher.catch.calculate_optimal_window_size", mock_optimal_window) + monkeypatch.setattr("src.pycatcher.catch.anomaly_zscore", mock_zscore) + + result = detect_outliers_moving_average(sample_df) + assert isinstance(result, pd.DataFrame) + assert len(result) == 1 + assert result.iloc[0]['value'] == 100 + + def test_none_input(self): + """Test with None input.""" + with pytest.raises(DataValidationError, match="Input DataFrame cannot be None"): + detect_outliers_moving_average(None) + + def test_empty_dataframe(self): + """Test with empty DataFrame.""" + empty_df = pd.DataFrame(columns=['date', 'value']) + with pytest.raises(DataValidationError, match="Input DataFrame cannot have zero rows"): + detect_outliers_moving_average(empty_df) + + def test_optimal_window_calculation_error(self, sample_df, monkeypatch): + """Test handling of errors from optimal window size calculation.""" + + def mock_optimal_window(df): + raise ValueError("Error calculating optimal window size") + + monkeypatch.setattr("src.pycatcher.catch.calculate_optimal_window_size", mock_optimal_window) + + with pytest.raises(ValueError, match="Error calculating optimal window size"): + detect_outliers_moving_average(sample_df) + + +class TestDetectOutliersSTL: + """Test cases for detect_outliers_stl function.""" + + @pytest.fixture + def hourly_df(self): + """Fixture for hourly data with sufficient length.""" + dates = pd.date_range(start='2020-01-01', periods=17520, freq='H') # 2 years of hourly data + # Generate positive values: base of 100 + sine wave + small positive noise + values = 100 + np.sin(np.linspace(0, 100, 17520)) * 50 + np.random.uniform(0, 10, 17520) + values[1000] = 1000 # Insert an outlier + return pd.DataFrame({'date': dates, 'value': values}) + + @pytest.fixture + def daily_df(self): + """Fixture for daily data with sufficient length.""" + dates = pd.date_range(start='2020-01-01', periods=730, freq='D') # 2 years of daily data + # Generate positive values: base of 100 + sine wave + small positive noise + values = 100 + np.sin(np.linspace(0, 10, 730)) * 50 + np.random.uniform(0, 10, 730) + values[100] = 1000 # Insert an outlier + return pd.DataFrame({'date': dates, 'value': values}) + + @pytest.fixture + def monthly_df(self): + """Fixture for monthly data with sufficient length.""" + dates = pd.date_range(start='2020-01-01', periods=24, freq='M') # 2 years of monthly data + # Generate positive values: base of 100 + sine wave + small positive noise + values = 100 + np.sin(np.linspace(0, 2, 24)) * 50 + np.random.uniform(0, 10, 24) + values[5] = 1000 # Insert an outlier + return pd.DataFrame({'date': dates, 'value': values}) + + def test_hourly_data(self, hourly_df): + """Test with hourly data.""" + result = detect_outliers_stl(hourly_df) + assert isinstance(result, pd.DataFrame) + assert len(result) > 0 + assert 1000 in result['value'].values # Should detect our inserted outlier + + def test_daily_data(self, daily_df): + """Test with daily data.""" + result = detect_outliers_stl(daily_df) + assert isinstance(result, pd.DataFrame) + assert len(result) > 0 + assert 1000 in result['value'].values # Should detect our inserted outlier + + def test_monthly_data(self, monthly_df): + """Test with monthly data.""" + result = detect_outliers_stl(monthly_df) + assert isinstance(result, pd.DataFrame) + assert len(result) > 0 + assert 1000 in result['value'].values # Should detect our inserted outlier + + def test_insufficient_data(self): + """Test with insufficient data length.""" + dates = pd.date_range(start='2020-01-01', periods=10, freq='D') + df = pd.DataFrame({'date': dates, 'value': np.random.uniform(1, 10, 10)}) # Positive values + result = detect_outliers_stl(df) + # Should fall back to IQR method for insufficient data + assert isinstance(result, (pd.DataFrame, str)) + + def test_none_input(self): + """Test with None input.""" + with pytest.raises(DataValidationError, match="Input DataFrame cannot be None"): + detect_outliers_stl(None) + + def test_empty_dataframe(self): + """Test with empty DataFrame.""" + df = pd.DataFrame() + with pytest.raises(DataValidationError, match="Input DataFrame cannot have zero rows"): + detect_outliers_stl(df) + + def test_invalid_date_format(self): + """Test with invalid date format.""" + df = pd.DataFrame({ + 'date': ['invalid', 'dates'], + 'value': [1, 2] + }) + with pytest.raises(DataValidationError): + detect_outliers_stl(df) + + def test_non_numeric_values(self): + """Test with non-numeric values.""" + dates = pd.date_range(start='2020-01-01', periods=730, freq='D') + df = pd.DataFrame({ + 'date': dates, + 'value': ['a'] * 730 + }) + with pytest.raises(Exception): # Should raise some kind of error for non-numeric data + detect_outliers_stl(df) + + @patch('src.pycatcher.catch.detect_outliers_iqr') + def test_fallback_to_iqr(self, mock_iqr): + """Test fallback to IQR method for short time series.""" + # Create a short time series that doesn't meet any seasonal criteria + dates = pd.date_range(start='2020-01-01', periods=5, freq='D') + df = pd.DataFrame({'date': dates, 'value': np.random.uniform(1, 10, 5)}) # Positive values + + # Set up the mock return value + mock_iqr.return_value = "IQR method used" + + result = detect_outliers_stl(df) + + # Verify that IQR method was called + mock_iqr.assert_called_once() + assert result == "IQR method used"