From 40db789e02bbe4cc6bb807d108a39648c308b6b0 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Thu, 18 Apr 2024 21:47:24 +0200 Subject: [PATCH 1/4] CI: Test Jupyter Notebooks on Python 3.10 This optimizes testing for Google Colab. Because we are aiming to run a signifcant portion of the assets here on Google Colab, most notably the Jupyter Notebooks, we may want to follow their cadence of Python updates. Currently, Google Colab still seems to be on Python 3.10 [1], so we may want to adjust the corresponding CI jobs to validate just that, in order to avoid any surprises. [1]: https://colab.google/articles/py3.10 --- .github/workflows/dataframe-dask.yml | 2 +- .github/workflows/dataframe-pandas.yml | 2 +- .github/workflows/ml-automl.yml | 2 +- .github/workflows/ml-langchain.yml | 2 +- .github/workflows/ml-mlflow.yml | 2 +- .github/workflows/timeseries.yml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/dataframe-dask.yml b/.github/workflows/dataframe-dask.yml index 8b42d543..06eabcd1 100644 --- a/.github/workflows/dataframe-dask.yml +++ b/.github/workflows/dataframe-dask.yml @@ -37,7 +37,7 @@ jobs: fail-fast: false matrix: os: [ 'ubuntu-latest' ] - python-version: [ '3.11', '3.12' ] + python-version: [ '3.10', '3.11', '3.12' ] cratedb-version: [ 'nightly' ] services: diff --git a/.github/workflows/dataframe-pandas.yml b/.github/workflows/dataframe-pandas.yml index 704fbb05..e4870802 100644 --- a/.github/workflows/dataframe-pandas.yml +++ b/.github/workflows/dataframe-pandas.yml @@ -37,7 +37,7 @@ jobs: fail-fast: false matrix: os: [ 'ubuntu-latest' ] - python-version: [ '3.11', '3.12' ] + python-version: [ '3.10', '3.11', '3.12' ] cratedb-version: [ 'nightly' ] services: diff --git a/.github/workflows/ml-automl.yml b/.github/workflows/ml-automl.yml index 9f13bf01..cbe2f65e 100644 --- a/.github/workflows/ml-automl.yml +++ b/.github/workflows/ml-automl.yml @@ -37,7 +37,7 @@ jobs: fail-fast: false matrix: os: [ 'ubuntu-latest' ] - python-version: [ '3.11' ] + python-version: [ '3.10', '3.11' ] cratedb-version: [ 'nightly' ] services: diff --git a/.github/workflows/ml-langchain.yml b/.github/workflows/ml-langchain.yml index 052c5471..c67a8f4d 100644 --- a/.github/workflows/ml-langchain.yml +++ b/.github/workflows/ml-langchain.yml @@ -37,7 +37,7 @@ jobs: fail-fast: false matrix: os: [ 'ubuntu-latest' ] - python-version: [ '3.11' ] + python-version: [ '3.10', '3.11' ] cratedb-version: [ 'nightly' ] services: diff --git a/.github/workflows/ml-mlflow.yml b/.github/workflows/ml-mlflow.yml index 6a2aa1e5..2ecd5aec 100644 --- a/.github/workflows/ml-mlflow.yml +++ b/.github/workflows/ml-mlflow.yml @@ -37,7 +37,7 @@ jobs: fail-fast: false matrix: os: [ 'ubuntu-latest' ] - python-version: [ '3.11' ] + python-version: [ '3.10', '3.11' ] cratedb-version: [ 'nightly' ] services: diff --git a/.github/workflows/timeseries.yml b/.github/workflows/timeseries.yml index dce9a6ab..49c91b9a 100644 --- a/.github/workflows/timeseries.yml +++ b/.github/workflows/timeseries.yml @@ -37,7 +37,7 @@ jobs: fail-fast: false matrix: os: [ 'ubuntu-latest' ] - python-version: [ '3.11' ] + python-version: [ '3.10', '3.11' ] cratedb-version: [ 'nightly' ] services: From 27d93639699c3554e1c6a5efd308e20d44775ab9 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Thu, 18 Apr 2024 22:34:07 +0200 Subject: [PATCH 2/4] TSML: Fix database resets for `timeseries-anomaly-detection.ipynb` The database table `machine_data` has not been made part of the test infrastructure to reset tables before invoking test cases, causing a hiccup on developer workstations. --- topic/timeseries/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/topic/timeseries/conftest.py b/topic/timeseries/conftest.py index fac0c41f..16daacd6 100644 --- a/topic/timeseries/conftest.py +++ b/topic/timeseries/conftest.py @@ -27,6 +27,7 @@ def reset_database_tables(): reset_tables = [ "cities", + "machine_data", "weather_data", "weather_stations", ] From dfe15cf52c768a60f387db75ff67df987ef1bcd1 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Thu, 18 Apr 2024 22:35:57 +0200 Subject: [PATCH 3/4] TSML: Chore: Shorten GitHub URL to `topic/timeseries/requirements.txt` --- topic/timeseries/exploratory_data_analysis.ipynb | 2 +- topic/timeseries/time-series-decomposition.ipynb | 2 +- topic/timeseries/timeseries-anomaly-detection.ipynb | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/topic/timeseries/exploratory_data_analysis.ipynb b/topic/timeseries/exploratory_data_analysis.ipynb index 2b9615ad..1887cc91 100644 --- a/topic/timeseries/exploratory_data_analysis.ipynb +++ b/topic/timeseries/exploratory_data_analysis.ipynb @@ -45,7 +45,7 @@ "#!pip install -r requirements.txt\n", "\n", "# Note: If you are running in an environment like Google Colab, please use the absolute path of the requirements:\n", - "#!pip install -r https://raw.githubusercontent.com/crate/cratedb-examples/main/topic/timeseries/requirements.txt" + "#!pip install -r https://github.com/crate/cratedb-examples/raw/main/topic/timeseries/requirements.txt" ] }, { diff --git a/topic/timeseries/time-series-decomposition.ipynb b/topic/timeseries/time-series-decomposition.ipynb index 71a051e6..e3893d09 100644 --- a/topic/timeseries/time-series-decomposition.ipynb +++ b/topic/timeseries/time-series-decomposition.ipynb @@ -49,7 +49,7 @@ "#!pip install -r requirements.txt\n", "\n", "# Note: If you are running in an environment like Google Colab, please use the absolute path of the requirements:\n", - "#!pip install -r https://raw.githubusercontent.com/crate/cratedb-examples/main/topic/timeseries/requirements.txt" + "#!pip install -r https://github.com/crate/cratedb-examples/raw/main/topic/timeseries/requirements.txt" ] }, { diff --git a/topic/timeseries/timeseries-anomaly-detection.ipynb b/topic/timeseries/timeseries-anomaly-detection.ipynb index 2a6ab906..ebcb7015 100644 --- a/topic/timeseries/timeseries-anomaly-detection.ipynb +++ b/topic/timeseries/timeseries-anomaly-detection.ipynb @@ -43,10 +43,10 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install -r requirements.txt\n", + "%pip install -r requirements.txt\n", "\n", "# Note: If you are running in an environment like Google Colab, please use the absolute path of the requirements:\n", - "#!pip install -r https://raw.githubusercontent.com/crate/cratedb-examples/main/topic/timeseries/requirements.txt" + "#%pip install -r https://github.com/crate/cratedb-examples/raw/main/topic/timeseries/requirements.txt" ] }, { From b290bae9dfb945d49c71f9424df09b8d9378dc9f Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 19 Apr 2024 10:20:48 +0200 Subject: [PATCH 4/4] TSML: Improve `timeseries-anomaly-detection.ipynb` notebook - Use `REFRESH TABLE` to synchronize data - Improve formatting around `CONNECTION_STRING` --- .../timeseries-anomaly-detection.ipynb | 25 ++++++++----------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/topic/timeseries/timeseries-anomaly-detection.ipynb b/topic/timeseries/timeseries-anomaly-detection.ipynb index ebcb7015..85a70de1 100644 --- a/topic/timeseries/timeseries-anomaly-detection.ipynb +++ b/topic/timeseries/timeseries-anomaly-detection.ipynb @@ -88,7 +88,7 @@ "In this step, we will create the table and populate it with the dataset from [nab-machine-failure.csv]. If you are using CrateDB Cloud, you can use the [URL import] available in the console, otherwise, use the `COPY FROM` statement used below. You can run it in the console in the Admin UI or you can use [Crash], in this case, we are using SQLAlchemy.\n", "\n", "[URL import]: https://cratedb.com/docs/cloud/en/latest/reference/overview.html#import-from-url\n", - "[nab-machine-failure.csv]: https://github.com/crate/cratedb-datasets/raw/main/timeseries/nab-machine-failure.csv\",\n", + "[nab-machine-failure.csv]: https://github.com/crate/cratedb-datasets/raw/main/timeseries/nab-machine-failure.csv\n", "[Crash]: https://cratedb.com/docs/crate/crash/en/latest/getting-started.html" ] }, @@ -98,25 +98,22 @@ "metadata": {}, "outputs": [], "source": [ - "# Use this connection string template for CrateDB Cloud Clusters\n", + "# crate://:@\n", "CONNECTION_STRING = os.environ.get(\n", " \"CRATEDB_CONNECTION_STRING\",\n", - " \"crate://:@\",\n", - ")\n", - "\n", - "# Use this connection string template for CrateDB running locally\n", - "#CONNECTION_STRING = os.environ.get(\n", - "# \"CRATEDB_CONNECTION_STRING\",\n", - "# \"crate://localhost:4200\",\n", - "# )\n", + " \"crate://localhost:4200\",\n", + " )\n", "\n", "engine = sa.create_engine(CONNECTION_STRING, echo=os.environ.get('DEBUG'))\n", "\n", - "query_create_table = 'CREATE TABLE machine_data (\"timestamp\" TIMESTAMP, \"value\" DOUBLE PRECISION)' \n", - "query_copy_from = \"COPY machine_data FROM 'https://github.com/crate/cratedb-datasets/raw/main/timeseries/nab-machine-failure.csv';\"\n", + "sql_ddl = 'CREATE TABLE machine_data (\"timestamp\" TIMESTAMP, \"value\" DOUBLE PRECISION)'\n", + "sql_load = \"COPY machine_data FROM 'https://github.com/crate/cratedb-datasets/raw/main/timeseries/nab-machine-failure.csv';\"\n", + "sql_refresh = \"REFRESH TABLE machine_data;\"\n", + "\n", "with engine.connect() as conn:\n", - " conn.execute(sa.text(query_create_table))\n", - " conn.execute(sa.text(query_copy_from))\n" + " conn.execute(sa.text(sql_ddl))\n", + " conn.execute(sa.text(sql_load))\n", + " conn.execute(sa.text(sql_refresh))" ] }, {