From c518c82f36f68c5ee7f300e2b1d7522a8a33fdf1 Mon Sep 17 00:00:00 2001
From: Ben Browning <bbrownin@redhat.com>
Date: Fri, 8 Nov 2024 13:12:12 -0500
Subject: [PATCH] Add simple and full knowledge pipeline functional tests

This is a port of the old `scripts/test_knowledge.py` into functional
tests that we can run with CI. These tests need to run on a GPU, so are
marked as 'gpu' in pytest and only execute with a new py3-functional-gpu
tox environment. This also adds a new workflow file to execute these
tests on a GPU runner.

Signed-off-by: Ben Browning <bbrownin@redhat.com>
---
 .../workflows/functional-gpu-nvidia-t4-x1.yml | 155 ++++++++++++++++++
 pyproject.toml                                |   5 +
 requirements-dev-gpu.txt                      |   5 +
 requirements-dev.txt                          |   1 +
 scripts/test_knowledge.py                     |  52 ------
 tests/functional/conftest.py                  |  97 +++++++++++
 tests/functional/test_full_pipeline.py        |  55 +++++++
 tests/functional/test_simple_pipeline.py      |  51 ++++++
 tox.ini                                       |  19 ++-
 9 files changed, 379 insertions(+), 61 deletions(-)
 create mode 100644 .github/workflows/functional-gpu-nvidia-t4-x1.yml
 create mode 100644 requirements-dev-gpu.txt
 delete mode 100644 scripts/test_knowledge.py
 create mode 100644 tests/functional/test_full_pipeline.py
 create mode 100644 tests/functional/test_simple_pipeline.py

diff --git a/.github/workflows/functional-gpu-nvidia-t4-x1.yml b/.github/workflows/functional-gpu-nvidia-t4-x1.yml
new file mode 100644
index 00000000..78bb259c
--- /dev/null
+++ b/.github/workflows/functional-gpu-nvidia-t4-x1.yml
@@ -0,0 +1,155 @@
+# SPDX-License-Identifier: Apache-2.0
+
+name: Functional GPU (NVIDIA Tesla T4 x1)
+
+on:
+  # temporarily run for this PR for anything that changes this workflow file
+  pull_request:
+    paths:
+      - ".github/workflows/functional-gpu-nvidia-t4-x1.yml" # This workflow
+  # run against every merge commit to 'main' and release branches
+  push:
+    branches:
+      - main
+      - release-*
+  # only run on PRs that touch certain regex paths
+  pull_request_target:
+    branches:
+      - main
+      - release-*
+    paths:
+      #  note this should match the merging criteria in 'mergify.yml'
+      - "**.py"
+      - "pyproject.toml"
+      - "requirements**.txt"
+      - ".github/workflows/functional-gpu-nvidia-t4-x1.yml" # This workflow
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+env:
+  LC_ALL: en_US.UTF-8
+
+defaults:
+  run:
+    shell: bash
+
+permissions:
+  contents: read
+
+jobs:
+  start-small-ec2-runner:
+    runs-on: ubuntu-latest
+    outputs:
+      label: ${{ steps.start-ec2-runner.outputs.label }}
+      ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
+    steps:
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.AWS_REGION }}
+
+      - name: Start EC2 runner
+        id: start-ec2-runner
+        uses: machulav/ec2-github-runner@1827d6ca7544d7044ddbd2e9360564651b463da2 # v2.3.7
+        with:
+          mode: start
+          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
+          ec2-image-id: ${{ vars.AWS_EC2_AMI }}
+          ec2-instance-type: g4dn.2xlarge
+          subnet-id: subnet-02d230cffd9385bd4
+          security-group-id: sg-06300447c4a5fbef3
+          iam-role-name: instructlab-ci-runner
+          aws-resource-tags: >
+            [
+              {"Key": "Name", "Value": "instructlab-ci-github-small-runner"},
+              {"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
+              {"Key": "GitHubRef", "Value": "${{ github.ref }}"},
+              {"Key": "GitHubPR", "Value": "${{ github.event.number }}"}
+            ]
+
+  functional-gpu-small-test:
+    needs:
+      - start-small-ec2-runner
+    runs-on: ${{ needs.start-small-ec2-runner.outputs.label }}
+
+    # It is important that this job has no write permissions and has
+    # no access to any secrets. This part is where we are running
+    # untrusted code from PRs.
+    permissions: {}
+
+    steps:
+      - name: Install Packages
+        run: |
+          cat /etc/os-release
+          sudo dnf install -y gcc gcc-c++ make git python3.11 python3.11-devel
+
+      - name: Checkout instructlab/sdg
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          # https://github.com/actions/checkout/issues/249
+          fetch-depth: 0
+
+      - name: Fetch and checkout PR
+        if: github.event_name == 'pull_request_target'
+        run: |
+          git fetch origin pull/${{ github.event.pull_request.number }}/head:pr-${{ github.event.pull_request.number }}
+          git checkout pr-${{ github.event.pull_request.number }}
+
+      - name: Install instructlab/sdg
+        run: |
+          export PATH="/home/ec2-user/.local/bin:/usr/local/cuda/bin:$PATH"
+          python3.11 -m venv --upgrade-deps venv
+          . venv/bin/activate
+          nvidia-smi
+          python3.11 -m pip install tox tox-gh>=1.2
+          python3.11 -m pip cache remove llama_cpp_python
+
+          CMAKE_ARGS="-DLLAMA_CUDA=on" python3.11 -m pip install -r requirements-dev.txt
+
+      - name: Check disk before tests
+        run: |
+          df -h
+
+      - name: Run functional gpu tests with tox
+        run: |
+          . venv/bin/activate
+          tox -e py3-functional-gpu
+
+      - name: Check disk after tests
+        run: |
+          df -h
+
+  stop-small-ec2-runner:
+    needs:
+      - start-small-ec2-runner
+      - functional-gpu-small-test
+    runs-on: ubuntu-latest
+    if: ${{ always() }}
+    steps:
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ vars.AWS_REGION }}
+
+      - name: Stop EC2 runner
+        uses: machulav/ec2-github-runner@1827d6ca7544d7044ddbd2e9360564651b463da2 # v2.3.7
+        with:
+          mode: stop
+          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
+          label: ${{ needs.start-small-ec2-runner.outputs.label }}
+          ec2-instance-id: ${{ needs.start-small-ec2-runner.outputs.ec2-instance-id }}
+
+  functional-gpu-small-workflow-complete:
+    # we don't want to block PRs on failed EC2 cleanup
+    # so not requiring "stop-small-ec2-runner" as well
+    needs: ["start-small-ec2-runner", "functional-gpu-small-test"]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Functional GPU Workflow Complete
+        run: echo "Functional GPU Workflow Complete"
diff --git a/pyproject.toml b/pyproject.toml
index aceddb3c..fdb3fce7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -102,3 +102,8 @@ exclude = [
 ]
 # honor excludes by not following there through imports
 follow_imports = "silent"
+
+[tool.pytest.ini_options]
+markers = [
+    "gpu: marks tests that should run with gpus (deselect with '-m \"not gpu\"')",
+]
diff --git a/requirements-dev-gpu.txt b/requirements-dev-gpu.txt
new file mode 100644
index 00000000..59800e83
--- /dev/null
+++ b/requirements-dev-gpu.txt
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: Apache-2.0
+
+-r requirements-dev.txt
+
+llama-cpp-python[server]>=0.3.0,<1.0.0
diff --git a/requirements-dev.txt b/requirements-dev.txt
index a5b4ea5b..8f1f3df7 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -2,6 +2,7 @@
 
 -r requirements.txt
 
+llama-cpp-python[server]>=0.3.0,<1.0.0
 pre-commit>=3.0.4,<5.0
 pylint>=2.16.2,<4.0
 pylint-pydantic
diff --git a/scripts/test_knowledge.py b/scripts/test_knowledge.py
deleted file mode 100644
index fec6fe52..00000000
--- a/scripts/test_knowledge.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# Standard
-from importlib import resources
-import operator
-
-# Third Party
-from datasets import Dataset
-from openai import OpenAI
-
-# First Party
-from src.instructlab.sdg.pipeline import (
-    FULL_PIPELINES_PACKAGE,
-    Pipeline,
-    PipelineContext,
-)
-
-# Please don't add you vLLM endpoint key here
-openai_api_key = "EMPTY"
-openai_api_base = "Add model endpoint here"
-
-client = OpenAI(
-    api_key=openai_api_key,
-    base_url=openai_api_base,
-)
-
-models = client.models.list()
-teacher_model = models.data[0].id
-
-samples = [
-    {
-        "icl_query_1": "what is the location of the tubal tonsils?",
-        "icl_response_1": "The location of the tubal tonsils is the roof of the pharynx.",
-        "icl_query_2": "How long does the adenoid grow?",
-        "task_description": "Teaching about human anatomy, specifically tonsils",
-        "icl_response_2": "The adenoid grows until the age of 5, starts to shrink at the age of 7 and becomes small in adulthood.",
-        "icl_query_3": "What is the immune systems first line of defense against ingested or inhaled foreign pathogens?",
-        "icl_response_3": "The tonsils are the immune systems first line of defense.",
-        "document": "The **tonsils** are a set of lymphoid organs facing into the aerodigestive tract, which is known as Waldeyer's tonsillar ring and consists of the adenoid tonsil or pharyngeal tonsil, two tubal tonsils, two palatine tonsils, and the lingual tonsils. These organs play an important role in the immune system. When used unqualified, the term most commonly refers specifically to the palatine tonsils, which are two lymphoid organs situated at either side of the back of the human throat. The palatine tonsils and the adenoid tonsil are organs consisting of lymphoepithelial tissue located near the oropharynx and nasopharynx parts of the throat",
-        "domain": "textbook",
-    }
-]
-
-ds = Dataset.from_list(samples)
-
-ctx = PipelineContext(client, "mixtral", teacher_model, 1)
-
-with resources.path(FULL_PIPELINES_PACKAGE, "knowledge.yaml") as yaml_path:
-    knowledge_pipe = Pipeline.from_file(ctx, yaml_path)
-
-mmlubench_data = knowledge_pipe.generate(ds)
-
-print(mmlubench_data)
-print(mmlubench_data[0])
diff --git a/tests/functional/conftest.py b/tests/functional/conftest.py
index c1793a94..b776c949 100644
--- a/tests/functional/conftest.py
+++ b/tests/functional/conftest.py
@@ -1,10 +1,19 @@
 # Standard
+from importlib import resources
 import pathlib
 import typing
 
 # Third Party
+from datasets import Dataset
+from llama_cpp.server.app import create_app
+from llama_cpp.server.settings import ModelSettings, ServerSettings
+from openai import OpenAI
+from starlette.testclient import TestClient
 import pytest
 
+# First Party
+from src.instructlab.sdg.pipeline import Pipeline, PipelineContext
+
 TESTS_PATH = pathlib.Path(__file__).parent.parent.absolute()
 
 
@@ -12,3 +21,91 @@
 def testdata_path() -> typing.Generator[pathlib.Path, None, None]:
     """Path to local test data directory"""
     yield TESTS_PATH / "testdata"
+
+
+@pytest.fixture
+def num_gpu_layers():
+    return -1  # all the GPU
+
+
+@pytest.fixture
+def openai_client(model, model_repo_id, num_gpu_layers):
+    server_settings = ServerSettings()
+    model_settings = [
+        ModelSettings(
+            model=model,
+            hf_model_repo_id=model_repo_id,
+            n_gpu_layers=num_gpu_layers,
+            verbose=True,
+        )
+    ]
+    app = create_app(
+        server_settings=server_settings,
+        model_settings=model_settings,
+    )
+
+    @app.get("/")
+    def read_root():
+        return {"message": "Hello from InstructLab! Visit us at https://instructlab.ai"}
+
+    test_client = TestClient(app)
+    return OpenAI(
+        api_key="EMPTY",
+        base_url="http://localhost:8000/v1",
+        http_client=test_client,
+    )
+
+
+@pytest.fixture
+def teacher_model(openai_client):
+    models = openai_client.models.list()
+    return models.data[0].id
+
+
+@pytest.fixture
+def max_num_tokens():
+    return 1024
+
+
+@pytest.fixture
+def pipeline_context(
+    openai_client,
+    model_family,
+    teacher_model,
+    num_instructions_to_generate,
+    max_num_tokens,
+):
+    return PipelineContext(
+        openai_client,
+        model_family,
+        teacher_model,
+        num_instructions_to_generate,
+        max_num_tokens=max_num_tokens,
+    )
+
+
+@pytest.fixture
+def knowledge_dataset():
+    return Dataset.from_list(
+        [
+            {
+                "icl_query_1": "what is the location of the tubal tonsils?",
+                "icl_response_1": "The location of the tubal tonsils is the roof of the pharynx.",
+                "icl_query_2": "How long does the adenoid grow?",
+                "task_description": "Teaching about human anatomy, specifically tonsils",
+                "icl_response_2": "The adenoid grows until the age of 5, starts to shrink at the age of 7 and becomes small in adulthood.",
+                "icl_query_3": "What is the immune systems first line of defense against ingested or inhaled foreign pathogens?",
+                "icl_response_3": "The tonsils are the immune systems first line of defense.",
+                "document": "The **tonsils** are a set of lymphoid organs facing into the aerodigestive tract, which is known as Waldeyer's tonsillar ring and consists of the adenoid tonsil or pharyngeal tonsil, two tubal tonsils, two palatine tonsils, and the lingual tonsils. These organs play an important role in the immune system. When used unqualified, the term most commonly refers specifically to the palatine tonsils, which are two lymphoid organs situated at either side of the back of the human throat. The palatine tonsils and the adenoid tonsil are organs consisting of lymphoepithelial tissue located near the oropharynx and nasopharynx parts of the throat",
+                "icl_document": "The **tonsils** are a set of lymphoid organs facing into the aerodigestive tract, which is known as Waldeyer's tonsillar ring and consists of the adenoid tonsil or pharyngeal tonsil, two tubal tonsils, two palatine tonsils, and the lingual tonsils.",
+                "domain": "textbook",
+                "document_outline": "Medical description of tonsils",
+            }
+        ]
+    )
+
+
+@pytest.fixture
+def knowledge_pipeline(pipeline_context, pipelines_package):
+    yaml_path = resources.files(pipelines_package).joinpath("knowledge.yaml")
+    return Pipeline.from_file(pipeline_context, yaml_path)
diff --git a/tests/functional/test_full_pipeline.py b/tests/functional/test_full_pipeline.py
new file mode 100644
index 00000000..9cef7808
--- /dev/null
+++ b/tests/functional/test_full_pipeline.py
@@ -0,0 +1,55 @@
+# Standard
+import unittest
+
+# Third Party
+import pytest
+
+# First Party
+from src.instructlab.sdg.datamixing import _get_question_hack, _get_response_hack
+from src.instructlab.sdg.pipeline import FULL_PIPELINES_PACKAGE
+
+
+@pytest.fixture
+def model():
+    return "mistral-7b-instruct-v0.2.Q5_K_M.gguf"
+    # return "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+    # return "mistral-7b-instruct-v0.2.Q3_K_S.gguf"
+
+
+@pytest.fixture
+def model_family():
+    return "mixtral"
+
+
+@pytest.fixture
+def model_repo_id():
+    return "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
+
+
+@pytest.fixture
+def num_instructions_to_generate():
+    return 1
+
+
+@pytest.fixture
+def pipelines_package():
+    return FULL_PIPELINES_PACKAGE
+
+
+@pytest.mark.gpu
+class TestFullPipeline(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def _setup_fixtures(self, knowledge_dataset, knowledge_pipeline):
+        self.knowledge_dataset = knowledge_dataset
+        self.knowledge_pipeline = knowledge_pipeline
+
+    def test_knowledge(self):
+        samples = self.knowledge_pipeline.generate(self.knowledge_dataset)
+        print(samples)
+        assert len(samples) > 0
+        for sample in samples:
+            print(sample)
+            question = _get_question_hack(sample)
+            response = _get_response_hack(sample)
+            assert len(question) > 0
+            assert len(response) > 0
diff --git a/tests/functional/test_simple_pipeline.py b/tests/functional/test_simple_pipeline.py
new file mode 100644
index 00000000..6229fc61
--- /dev/null
+++ b/tests/functional/test_simple_pipeline.py
@@ -0,0 +1,51 @@
+# Standard
+import unittest
+
+# Third Party
+import pytest
+
+# First Party
+from src.instructlab.sdg.datamixing import _get_question_hack, _get_response_hack
+from src.instructlab.sdg.pipeline import SIMPLE_PIPELINES_PACKAGE
+
+
+@pytest.fixture
+def model():
+    return "merlinite-7b-Q4_K_M.gguf"
+
+
+@pytest.fixture
+def model_family():
+    return "merlinite"
+
+
+@pytest.fixture
+def model_repo_id():
+    return "ibm/merlinite-7b-GGUF"
+
+
+@pytest.fixture
+def num_instructions_to_generate():
+    return 2
+
+
+@pytest.fixture
+def pipelines_package():
+    return SIMPLE_PIPELINES_PACKAGE
+
+
+@pytest.mark.gpu
+class TestSimplePipeline(unittest.TestCase):
+    @pytest.fixture(autouse=True)
+    def _setup_fixtures(self, knowledge_dataset, knowledge_pipeline):
+        self.knowledge_dataset = knowledge_dataset
+        self.knowledge_pipeline = knowledge_pipeline
+
+    def test_knowledge(self):
+        samples = self.knowledge_pipeline.generate(self.knowledge_dataset)
+        assert len(samples) > 0
+        for sample in samples:
+            question = _get_question_hack(sample)
+            response = _get_response_hack(sample)
+            assert len(question) > 0
+            assert len(response) > 0
diff --git a/tox.ini b/tox.ini
index 7590a2ba..9d14f279 100644
--- a/tox.ini
+++ b/tox.ini
@@ -17,15 +17,8 @@ wheel_build_env = pkg
 deps = -r requirements-dev.txt
 commands =
     unit: {envpython} -m pytest {posargs:tests --ignore=tests/functional}
-    unitcov: {envpython} -W error::UserWarning -m pytest --cov=instructlab.sdg --cov-report term --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests --ignore=tests/functional -m "not (examples or slow)"}
-    functional: {envpython} -m pytest {posargs:tests/functional}
-allowlist_externals =
-    functional: ./scripts/functional-tests.sh
-
-[testenv:py3-functional]
-setenv =
-    OPENAI_API_BASE={env:OPENAI_API_BASE:http://localhost:8000/v1}
-    OPENAI_API_KEY={env:OPENAI_API_KEY:EMPTY}
+    unitcov: {envpython} -W error::UserWarning -m pytest --cov=instructlab.sdg --cov-report term --cov-report=html:coverage-{env_name} --cov-report=xml:coverage-{env_name}.xml --html=durations/{env_name}.html {posargs:tests --ignore=tests/functional -m "not (examples or gpu)"}
+    functional: {envpython} -m pytest {posargs:tests/functional -m "not gpu"}
 
 # format, check, and linting targets don't build and install the project to
 # speed up testing.
@@ -91,6 +84,14 @@ deps = -r requirements-dev.txt
 commands =
     {envpython} ./scripts/validate_pipelines.py
 
+[testenv:py3-functional-gpu]
+description = run functional tests that require a GPU
+package = wheel
+wheel_build_env = pkg
+deps = -r requirements-dev-gpu.txt
+commands =
+    {envpython} -m pytest {posargs:tests/functional -m "gpu"}
+
 [gh]
 python =
     3.11 = py311-{unitcov, functional}