From a6dd0d278175a48a519834b987f41042c2dd936c Mon Sep 17 00:00:00 2001 From: BJ Hargrave Date: Wed, 5 Jun 2024 13:34:39 -0400 Subject: [PATCH] schema package: Build a python package for the schema This will allow schema users to pip install the schema instead of using something like git submodules to place the schema files at some local path. Signed-off-by: BJ Hargrave --- .github/workflows/lint.yml | 14 +- .github/workflows/pypi.yml | 130 ++++++++++++++++++ .gitignore | 24 ++++ README.md | 4 +- pyproject.toml | 47 +++++++ src/instructlab/schema/__init__.py | 21 +++ src/instructlab/schema/py.typed | 0 src/instructlab/schema/v1/__init__.py | 0 .../schema/v1}/compositional_skills.json | 0 .../instructlab/schema/v1}/knowledge.json | 0 .../instructlab/schema/v1}/version.json | 0 src/instructlab/schema/v2/__init__.py | 0 .../schema/v2}/compositional_skills.json | 0 .../instructlab/schema/v2}/knowledge.json | 0 .../instructlab/schema/v2}/version.json | 0 15 files changed, 232 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/pypi.yml create mode 100644 pyproject.toml create mode 100644 src/instructlab/schema/__init__.py create mode 100644 src/instructlab/schema/py.typed create mode 100644 src/instructlab/schema/v1/__init__.py rename {v1 => src/instructlab/schema/v1}/compositional_skills.json (100%) rename {v1 => src/instructlab/schema/v1}/knowledge.json (100%) rename {v1 => src/instructlab/schema/v1}/version.json (100%) create mode 100644 src/instructlab/schema/v2/__init__.py rename {v2 => src/instructlab/schema/v2}/compositional_skills.json (100%) rename {v2 => src/instructlab/schema/v2}/knowledge.json (100%) rename {v2 => src/instructlab/schema/v2}/version.json (100%) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 34b951d..2327d1b 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -8,7 +8,7 @@ on: branches: - main paths: - - 'v*/**/*.json' + - 'src/instructlab/schema/v*/**/*.json' - '.github/workflows/lint.yml' # This workflow - '.github/scripts/**' # Scripts used by this workflow @@ -16,7 +16,7 @@ on: branches: - main paths: - - 'v*/**/*.json' + - 'src/instructlab/schema/v*/**/*.json' - '.github/workflows/lint.yml' # This workflow - '.github/scripts/**' # Scripts used by this workflow @@ -31,7 +31,7 @@ permissions: contents: read jobs: - lint: + schemalint: runs-on: ubuntu-latest steps: - name: "Harden Runner" @@ -58,15 +58,15 @@ jobs: uses: tj-actions/changed-files@d6babd6899969df1a11d14c368283ea4436bca78 # v44.5.2 with: files: | - v*/**/*.json + src/instructlab/schema/v*/**/*.json - name: "Check changed schema file contents" - if: steps.changed-files.outputs.any_changed == 'true' + if: ${{ fromJSON(steps.changed-files.outputs.any_changed) }} run: | check-jsonschema --verbose --schemafile https://json-schema.org/draft/2020-12/schema ${{ steps.changed-files.outputs.all_changed_files }} - name: "Check all schema file contents" - if: steps.changed-files.outputs.any_changed != 'true' + if: ${{ !fromJSON(steps.changed-files.outputs.any_changed) }} run: | # shellcheck disable=SC2046 - check-jsonschema --verbose --schemafile https://json-schema.org/draft/2020-12/schema $(find v* -name "*.json") + check-jsonschema --verbose --schemafile https://json-schema.org/draft/2020-12/schema $(find src/instructlab/schema/v* -name "*.json") diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml new file mode 100644 index 0000000..1c9a536 --- /dev/null +++ b/.github/workflows/pypi.yml @@ -0,0 +1,130 @@ +# SPDX-License-Identifier: Apache-2.0 + +name: Build, test, and upload PyPI package + +on: + push: + branches: + - "main" + tags: + - "v*" + pull_request: + branches: + - "main" + release: + types: + - published + +env: + LC_ALL: en_US.UTF-8 + +defaults: + run: + shell: bash + +permissions: + contents: read + +jobs: + # Create and verify release artifacts + # - build source dist (tar ball) and wheel + # - validate artifacts with various tools + # - upload artifacts to GHA + build-package: + name: Build and check packages + runs-on: ubuntu-latest + steps: + - name: "Harden Runner" + uses: step-security/harden-runner@f086349bfa2bd1361f7909c78558e816508cdc10 # v2.8.0 + with: + egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs + + + - name: "Checkout" + uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + with: + # for setuptools-scm + fetch-depth: 0 + + - name: "Build and Inspect" + uses: hynek/build-and-inspect-python-package@b4fc3f6ba2b3da04f09659be99e2a29fb6146a61 # v2.6.0 + + # push to Test PyPI on + # - a new GitHub release is published + # - a PR is merged into main branch + publish-test-pypi: + name: Publish packages to test.pypi.org + # environment: publish-test-pypi + if: ${{ fromJSON(vars.PYPI_PUBLISH) && github.repository_owner == 'instructlab' && (github.event.action == 'published' || (github.event_name == 'push' && github.ref == 'refs/heads/main')) }} + permissions: + contents: read + # see https://docs.pypi.org/trusted-publishers/ + id-token: write + runs-on: ubuntu-latest + needs: build-package + + steps: + - name: "Harden Runner" + uses: step-security/harden-runner@f086349bfa2bd1361f7909c78558e816508cdc10 # v2.8.0 + with: + egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs + + - name: "Download build artifacts" + uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + with: + name: Packages + path: dist + + - name: "Upload to Test PyPI" + uses: pypa/gh-action-pypi-publish@81e9d935c883d0b210363ab89cf05f3894778450 # v1.8.14 + with: + repository-url: https://test.pypi.org/legacy/ + + # push to Production PyPI on + # - a new GitHub release is published + publish-pypi: + name: Publish release to pypi.org + # environment: publish-pypi + if: ${{ fromJSON(vars.PYPI_PUBLISH) && github.repository_owner == 'instructlab' && github.event.action == 'published' }} + permissions: + # see https://docs.pypi.org/trusted-publishers/ + id-token: write + # allow gh release upload + contents: write + + runs-on: ubuntu-latest + needs: build-package + + steps: + - name: "Harden Runner" + uses: step-security/harden-runner@f086349bfa2bd1361f7909c78558e816508cdc10 # v2.8.0 + with: + egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs + + - name: "Download build artifacts" + uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + with: + name: Packages + path: dist + + - name: "Sigstore sign package" + uses: sigstore/gh-action-sigstore-python@61f6a500bbfdd9a2a339cf033e5421951fbc1cd2 # v2.1.1 + with: + inputs: | + ./dist/*.tar.gz + ./dist/*.whl + + - name: "Upload artifacts and signatures to GitHub release" + run: | + gh release upload '${{ github.ref_name }}' dist/* --repo '${{ github.repository }}' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + # PyPI does not accept .sigstore artifacts and + # gh-action-pypi-publish has no option to ignore them. + - name: "Remove sigstore signatures before uploading to PyPI" + run: | + rm ./dist/*.sigstore + + - name: "Upload to PyPI" + uses: pypa/gh-action-pypi-publish@81e9d935c883d0b210363ab89cf05f3894778450 # v1.8.14 diff --git a/.gitignore b/.gitignore index 701ff28..fed87b9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,27 @@ +# generated by setuptools_scm +/src/instructlab/schema/_version.py + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + # Environments .env .venv diff --git a/README.md b/README.md index 7756104..2999477 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ # Taxonomy Schema -This repository defines the JSON schema for the [Taxonomy](https://github.com/instructlab/taxonomy) YAML. +This Python package defines the JSON schema for the InstructLab [Taxonomy](https://github.com/instructlab/taxonomy) YAML. + +Consumers of this schema can `pip install` this package and access the schema using `importlib.resources` on the `instructlab.schema` package. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..49efd30 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: Apache-2.0 + +[build-system] +requires = ["setuptools>=64", "setuptools_scm>=8"] +build-backend = "setuptools.build_meta" + +[project] +name = "instructlab-schema" +authors = [ + { name="InstructLab", email="dev@instructlab.ai" }, +] +description = "InstructLab Taxonomy Schema" +readme = "README.md" +license = {text = "Apache-2.0"} +requires-python = ">=3.9" +classifiers = [ + "Development Status :: 4 - Beta", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] +dynamic = ["dependencies", "optional-dependencies", "version"] + +[project.urls] +homepage = "https://instructlab.io" +source = "https://github.com/instructlab/schema" +issues = "https://github.com/instructlab/schema/issues" + +[tool.setuptools_scm] +version_file = "src/instructlab/schema/_version.py" +# do not include +gREV local version, required for Test PyPI upload +local_scheme = "no-local-version" + +[tool.setuptools] +package-dir = {"" = "src"} + +[tool.setuptools.packages.find] +where = ["src"] +include = ["instructlab.schema*"] + +[tool.setuptools.package-data] +"instructlab.schema" = ["py.typed", "v*/*.json"] diff --git a/src/instructlab/schema/__init__.py b/src/instructlab/schema/__init__.py new file mode 100644 index 0000000..c0e36b9 --- /dev/null +++ b/src/instructlab/schema/__init__.py @@ -0,0 +1,21 @@ +"""InstructLab Taxonomy Schema""" + +from importlib import resources +from importlib.resources.abc import Traversable +from typing import List + +__all__ = ["schema_versions"] + + +def schema_versions() -> List[Traversable]: + """Return the sorted list of schema versions. + + Returns: + List[Traversable]: A sorted list of schema versions. + """ + schema_base = resources.files(__package__) + versions = sorted( + (v for v in schema_base.iterdir() if v.name[0] == "v" and v.name[1:].isdigit()), + key=lambda k: int(k.name[1:]), + ) + return versions diff --git a/src/instructlab/schema/py.typed b/src/instructlab/schema/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/src/instructlab/schema/v1/__init__.py b/src/instructlab/schema/v1/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/v1/compositional_skills.json b/src/instructlab/schema/v1/compositional_skills.json similarity index 100% rename from v1/compositional_skills.json rename to src/instructlab/schema/v1/compositional_skills.json diff --git a/v1/knowledge.json b/src/instructlab/schema/v1/knowledge.json similarity index 100% rename from v1/knowledge.json rename to src/instructlab/schema/v1/knowledge.json diff --git a/v1/version.json b/src/instructlab/schema/v1/version.json similarity index 100% rename from v1/version.json rename to src/instructlab/schema/v1/version.json diff --git a/src/instructlab/schema/v2/__init__.py b/src/instructlab/schema/v2/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/v2/compositional_skills.json b/src/instructlab/schema/v2/compositional_skills.json similarity index 100% rename from v2/compositional_skills.json rename to src/instructlab/schema/v2/compositional_skills.json diff --git a/v2/knowledge.json b/src/instructlab/schema/v2/knowledge.json similarity index 100% rename from v2/knowledge.json rename to src/instructlab/schema/v2/knowledge.json diff --git a/v2/version.json b/src/instructlab/schema/v2/version.json similarity index 100% rename from v2/version.json rename to src/instructlab/schema/v2/version.json