Merge pull request #205 from ibm-granite/cve

CVE
ibm-granite · Nov 21, 2024 · 0c3430c · 0c3430c
2 parents 15599d3 + 1c9b3ec
commit 0c3430c
Show file tree

Hide file tree

Showing 19 changed files with 2,869 additions and 1,270 deletions.
diff --git a/services/boilerplate/inference_payloads.py b/services/boilerplate/inference_payloads.py
@@ -100,6 +100,10 @@ class ForecastingMetadataInput(BaseMetadataInput):
     )
 
 
+class BaseParameters(BaseModel):
+    model_config = ConfigDict(extra="forbid", protected_namespaces=())
+
+
 class ForecastingParameters(BaseModel):
     model_config = ConfigDict(extra="forbid", protected_namespaces=())
 
@@ -142,7 +146,9 @@ class ForecastingInferenceInput(BaseInferenceInput):
         description="An object of ForecastingMetadataInput that contains the schema" " metadata of the data input.",
     )
 
-    parameters: ForecastingParameters
+    parameters: ForecastingParameters = Field(
+        description="additional parameters affecting behavior of the forecast.", default_factory=dict
+    )
 
     data: Dict[str, List[Any]] = Field(
         description="A payload of data matching the schema provided."
@@ -277,3 +283,6 @@ class PredictOutput(BaseModel):
         description="List of prediction results.",
         default=None,
     )
+
+    input_data_points: int = Field(description="Count of input data points.", default=None)
+    output_data_points: int = Field(description="Count of output data points.", default=None)
diff --git a/services/finetuning/poetry.lock b/services/finetuning/poetry.lock
diff --git a/services/finetuning/pyproject.toml b/services/finetuning/pyproject.toml
@@ -30,7 +30,7 @@ __version_tuple__ = (0, 0, 0)
 # including 3.9 causes poetry lock to run forever
 python = ">=3.10,<3.13"
 numpy = { version = "<2" }
-tsfm_public = { git = "https://github.com/IBM-granite/granite-tsfm.git", tag = "v0.2.13", markers = "sys_platform != 'win32'" }
+tsfm_public = { git = "https://github.com/IBM-granite/granite-tsfm.git", tag = "v0.2.16", markers = "sys_platform != 'win32'" }
 
 
 # trying to pick up cpu version for tsfmfinetuning

diff --git a/services/inference/Makefile b/services/inference/Makefile
@@ -15,6 +15,7 @@ create_prometheus_metrics_dir:
 # starts the tsfminference service (used mainly for test cases)
 start_service_local: create_prometheus_metrics_dir boilerplate
 	PROMETHEUS_MULTIPROC_DIR=./prometheus_metrics \
+	TSFM_PYTHON_LOGGING_LEVEL="ERROR" \
 	TSFM_MODEL_DIR=./mytest-tsfm \
 	TSFM_ALLOW_LOAD_FROM_HF_HUB=1 \
 	python -m gunicorn \
@@ -23,7 +24,7 @@ start_service_local: create_prometheus_metrics_dir boilerplate
 	--bind 127.0.0.1:8000 \
 	tsfminference.main:app && true &
 stop_service_local:
-	pkill  -f 'python.*tsfminference.*'
+	pkill  -f 'python.*gunicorn.*tsfminference\.main\:app'
 
 image: boilerplate
 	$(CONTAINER_BUILDER) build -t tsfminference -f Dockerfile .
@@ -45,9 +46,10 @@ stop_service_image:
 	$(CONTAINER_BUILDER) stop tsfmserver
 
 test_local: clone_models boilerplate start_service_local
-	pytest -s tests ../tests
+	pytest --cov=tsfminference --cov-report term-missing tests ../tests
 	$(MAKE) stop_service_local
 	$(MAKE) delete_models
+	$(MAKE) stop_service_local
 
 test_image: clone_models start_service_image
 	pytest -s tests ../tests

diff --git a/services/inference/openapi.json b/services/inference/openapi.json
diff --git a/services/inference/poetry.lock b/services/inference/poetry.lock
diff --git a/services/inference/pyproject.toml b/services/inference/pyproject.toml
@@ -30,7 +30,7 @@ __version_tuple__ = (0, 0, 0)
 # including 3.9 causes poetry lock to run forever
 python = ">=3.10,<3.13"
 numpy = { version = "<2" }
-tsfm_public = { git = "https://github.com/IBM-granite/granite-tsfm.git", tag = "v0.2.13", markers = "sys_platform != 'win32'" }
+tsfm_public = { git = "https://github.com/IBM-granite/granite-tsfm.git", tag = "v0.2.16", markers = "sys_platform != 'win32'" }
 
 # trying to pick up cpu version for tsfminference
 # to make image smaller
@@ -48,7 +48,7 @@ prometheus_client = { version = "*" }
 starlette = { version = ">=0.40.0" }
 Werkzeug = { version = ">=3.0.6" }
 urllib3 = { version = ">=1.26.19,<2" } # see https://github.com/urllib3/urllib3/security/advisories/GHSA-34jh-p97f-mpxf
-
+aiohttp = { version = ">=3.10.11" }
 
 [[tool.poetry.source]]
 name = "pytorch"
@@ -60,6 +60,7 @@ optional = true
 [tool.poetry.group.dev.dependencies]
 pytest = "*"
 locust = "*"
+pytest-coverage = "*"
 
 [build-system]
 requires = ["poetry-core>=1.0.0", "poetry-dynamic-versioning>=1.0.0,<2.0.0"]

diff --git a/services/inference/tests/locust/payload.json b/services/inference/tests/locust/payload.json
@@ -1,5 +1,5 @@
 {
-  "model_id": "ibm/test-ttm-v1",
+  "model_id": "mytest-tsfm/ttm-1024-96-r2",
   "parameters": {
     "prediction_length": 1
   },

diff --git a/services/inference/tests/test_inference_lib.py b/services/inference/tests/test_inference_lib.py
@@ -0,0 +1,204 @@
+# Copyright contributors to the TSFM project
+#
+
+import copy
+from datetime import timedelta
+
+import numpy as np
+import pandas as pd
+import pytest
+import yaml
+from fastapi import HTTPException
+from tsfminference import TSFM_CONFIG_FILE
+from tsfminference.inference import InferenceRuntime
+from tsfminference.inference_payloads import (
+    ForecastingInferenceInput,
+    ForecastingMetadataInput,
+    ForecastingParameters,
+    PredictOutput,
+)
+
+
+SERIES_LENGTH = 512
+FORECAST_LENGTH = 96
+MODEL_ID = "mytest-tsfm/ttm-r1"
+
+
+@pytest.fixture(scope="module")
+def ts_data_base() -> pd.DataFrame:
+    # Generate a date range
+    length = SERIES_LENGTH
+    date_range = pd.date_range(start="2023-10-01", periods=length, freq="H")
+
+    # Create a DataFrame
+    df = pd.DataFrame(
+        {
+            "date": date_range,
+            "ID": "1",
+            "VAL": np.random.rand(length),
+        }
+    )
+
+    return df
+
+
+if TSFM_CONFIG_FILE:
+    with open(TSFM_CONFIG_FILE, "r") as file:
+        config = yaml.safe_load(file)
+else:
+    config = {}
+
+
+@pytest.fixture(scope="module")
+def forecasting_input_base() -> ForecastingInferenceInput:
+    # df: pd.DataFrame = ts_data_base
+    schema: ForecastingMetadataInput = ForecastingMetadataInput(
+        timestamp_column="date", id_columns=["ID"], target_columns=["VAL"]
+    )
+    parameters: ForecastingParameters = ForecastingParameters()
+    input: ForecastingInferenceInput = ForecastingInferenceInput(
+        model_id=MODEL_ID,
+        schema=schema,
+        parameters=parameters,
+        data={
+            "date": [
+                "2024-10-18T01:00:21+00:00",
+            ],
+            "ID1": [
+                "I1",
+            ],
+            "VAL": [
+                10.0,
+            ],
+        },  # this should get replaced in each test case anyway,
+    )
+    return input
+
+
+def _basic_result_checks(results: PredictOutput, df: pd.DataFrame):
+    # expected length
+    assert len(results) == FORECAST_LENGTH
+    # expected start time
+    assert results["date"].iloc[0] - df["date"].iloc[-1] == timedelta(hours=1)
+    # expected end time
+    assert results["date"].iloc[-1] - df["date"].iloc[-1] == timedelta(hours=FORECAST_LENGTH)
+
+
+def test_forecast_with_good_data(ts_data_base: pd.DataFrame, forecasting_input_base: ForecastingInferenceInput):
+    input = forecasting_input_base
+    df = copy.deepcopy(ts_data_base)
+    input.data = df.to_dict(orient="list")
+    runtime: InferenceRuntime = InferenceRuntime(config=config)
+    po: PredictOutput = runtime.forecast(input=input)
+    results = pd.DataFrame.from_dict(po.results[0])
+    _basic_result_checks(results, df)
+
+
+def test_forecast_with_schema_missing_target_columns(
+    ts_data_base: pd.DataFrame, forecasting_input_base: ForecastingInferenceInput
+):
+    input = forecasting_input_base
+    input.schema.target_columns = []
+    df = copy.deepcopy(ts_data_base)
+    input.data = df.to_dict(orient="list")
+    runtime: InferenceRuntime = InferenceRuntime(config=config)
+    po: PredictOutput = runtime.forecast(input=input)
+    results = pd.DataFrame.from_dict(po.results[0])
+    _basic_result_checks(results, df)
+
+
+def test_forecast_with_integer_timestamps(
+    ts_data_base: pd.DataFrame, forecasting_input_base: ForecastingInferenceInput
+):
+    input: ForecastingInferenceInput = copy.deepcopy(forecasting_input_base)
+    df = copy.deepcopy(ts_data_base)
+
+    timestamp_column = input.schema.timestamp_column
+    df[timestamp_column] = df[timestamp_column].astype(int)
+    df[timestamp_column] = range(1, SERIES_LENGTH + 1)
+    input.data = df.to_dict(orient="list")
+    runtime: InferenceRuntime = InferenceRuntime(config=config)
+    po: PredictOutput = runtime.forecast(input=input)
+    results = pd.DataFrame.from_dict(po.results[0])
+    assert results[timestamp_column].iloc[0] == SERIES_LENGTH + 1
+    assert results[timestamp_column].iloc[-1] - df[timestamp_column].iloc[-1] == FORECAST_LENGTH
+    assert results.dtypes[timestamp_column] == df.dtypes[timestamp_column]
+
+
+def test_forecast_with_bogus_timestamps(ts_data_base: pd.DataFrame, forecasting_input_base: ForecastingInferenceInput):
+    input: ForecastingInferenceInput = copy.deepcopy(forecasting_input_base)
+    df = copy.deepcopy(ts_data_base)
+
+    timestamp_column = input.schema.timestamp_column
+    df[timestamp_column] = df[timestamp_column].astype(str)
+    df[timestamp_column] = [str(x) for x in range(1, SERIES_LENGTH + 1)]
+    input.data = df.to_dict(orient="list")
+    runtime: InferenceRuntime = InferenceRuntime(config=config)
+    with pytest.raises(ValueError) as _:
+        runtime.forecast(input=input)
+
+
+def test_forecast_with_bogus_values(ts_data_base: pd.DataFrame, forecasting_input_base: ForecastingInferenceInput):
+    input: ForecastingInferenceInput = copy.deepcopy(forecasting_input_base)
+    df = copy.deepcopy(ts_data_base)
+    df["VAL"] = df["VAL"].astype(str)
+    df["VAL"] = [str(x) for x in range(1, SERIES_LENGTH + 1)]
+    input.data = df.to_dict(orient="list")
+    runtime: InferenceRuntime = InferenceRuntime(config=config)
+    with pytest.raises(HTTPException) as _:
+        runtime.forecast(input=input)
+
+
+def test_forecast_with_bogus_model_id(ts_data_base: pd.DataFrame, forecasting_input_base: ForecastingInferenceInput):
+    input: ForecastingInferenceInput = copy.deepcopy(forecasting_input_base)
+    df = copy.deepcopy(ts_data_base)
+    input.data = df.to_dict(orient="list")
+    input.model_id = "hoo-hah"
+
+    runtime: InferenceRuntime = InferenceRuntime(config=config)
+    with pytest.raises(HTTPException) as _:
+        runtime.forecast(input=input)
+
+
+def test_forecast_with_insufficient_context_length(
+    ts_data_base: pd.DataFrame, forecasting_input_base: ForecastingInferenceInput
+):
+    input: ForecastingInferenceInput = copy.deepcopy(forecasting_input_base)
+    df = copy.deepcopy(ts_data_base)
+    df = df.iloc[0:-100]
+
+    input.data = df.to_dict(orient="list")
+
+    runtime: InferenceRuntime = InferenceRuntime(config=config)
+    with pytest.raises(HTTPException) as _:
+        runtime.forecast(input=input)
+
+
+@pytest.mark.skip
+def test_forecast_with_nan_data(ts_data_base: pd.DataFrame, forecasting_input_base: ForecastingInferenceInput):
+    input: ForecastingInferenceInput = copy.deepcopy(forecasting_input_base)
+    df = copy.deepcopy(ts_data_base)
+    df.iloc[0, df.columns.get_loc("VAL")] = np.nan
+
+    input.data = df.to_dict(orient="list")
+
+    runtime: InferenceRuntime = InferenceRuntime(config=config)
+    # with pytest.raises(HTTPException) as _:
+    runtime.forecast(input=input)
+
+
+# @pytest.mark.skip
+def test_forecast_with_missing_row(ts_data_base: pd.DataFrame, forecasting_input_base: ForecastingInferenceInput):
+    input: ForecastingInferenceInput = copy.deepcopy(forecasting_input_base)
+    df = copy.deepcopy(ts_data_base)
+    df = df.drop(index=10)
+
+    # append a row to give it the correct length
+    # don't forget to update the timestamp accordingly in the
+    # appended row
+
+    input.data = df.to_dict(orient="list")
+
+    runtime: InferenceRuntime = InferenceRuntime(config=config)
+    with pytest.raises(HTTPException) as _:
+        runtime.forecast(input=input)