diff --git a/.github/workflows/actionlint.yml b/.github/workflows/actionlint.yml index 2e8415b..4436824 100644 --- a/.github/workflows/actionlint.yml +++ b/.github/workflows/actionlint.yml @@ -38,7 +38,6 @@ jobs: uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 with: fetch-depth: 0 - submodules: true - name: "Download actionlint" run: | diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 39d8ea7..5dee377 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -40,7 +40,6 @@ jobs: uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 with: fetch-depth: 0 - submodules: true - name: "Check Markdown documents" uses: DavidAnson/markdownlint-cli2-action@b4c9feab76d8025d1e83c653fa3990936df0e6c8 # v16.0.0 with: diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 34b951d..2327d1b 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -8,7 +8,7 @@ on: branches: - main paths: - - 'v*/**/*.json' + - 'src/instructlab/schema/v*/**/*.json' - '.github/workflows/lint.yml' # This workflow - '.github/scripts/**' # Scripts used by this workflow @@ -16,7 +16,7 @@ on: branches: - main paths: - - 'v*/**/*.json' + - 'src/instructlab/schema/v*/**/*.json' - '.github/workflows/lint.yml' # This workflow - '.github/scripts/**' # Scripts used by this workflow @@ -31,7 +31,7 @@ permissions: contents: read jobs: - lint: + schemalint: runs-on: ubuntu-latest steps: - name: "Harden Runner" @@ -58,15 +58,15 @@ jobs: uses: tj-actions/changed-files@d6babd6899969df1a11d14c368283ea4436bca78 # v44.5.2 with: files: | - v*/**/*.json + src/instructlab/schema/v*/**/*.json - name: "Check changed schema file contents" - if: steps.changed-files.outputs.any_changed == 'true' + if: ${{ fromJSON(steps.changed-files.outputs.any_changed) }} run: | check-jsonschema --verbose --schemafile https://json-schema.org/draft/2020-12/schema ${{ steps.changed-files.outputs.all_changed_files }} - name: "Check all schema file contents" - if: steps.changed-files.outputs.any_changed != 'true' + if: ${{ !fromJSON(steps.changed-files.outputs.any_changed) }} run: | # shellcheck disable=SC2046 - check-jsonschema --verbose --schemafile https://json-schema.org/draft/2020-12/schema $(find v* -name "*.json") + check-jsonschema --verbose --schemafile https://json-schema.org/draft/2020-12/schema $(find src/instructlab/schema/v* -name "*.json") diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml new file mode 100644 index 0000000..5129a06 --- /dev/null +++ b/.github/workflows/pypi.yml @@ -0,0 +1,130 @@ +# SPDX-License-Identifier: Apache-2.0 + +name: Build, test, and upload PyPI package + +on: + push: + branches: + - "main" + tags: + - "v*" + pull_request: + branches: + - "main" + release: + types: + - published + +env: + LC_ALL: en_US.UTF-8 + +defaults: + run: + shell: bash + +permissions: + contents: read + +jobs: + # Create and verify release artifacts + # - build source dist (tar ball) and wheel + # - validate artifacts with various tools + # - upload artifacts to GHA + build-package: + name: Build and check packages + runs-on: ubuntu-latest + steps: + - name: "Harden Runner" + uses: step-security/harden-runner@f086349bfa2bd1361f7909c78558e816508cdc10 # v2.8.0 + with: + egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs + + + - name: "Checkout" + uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 + with: + # for setuptools-scm + fetch-depth: 0 + + - name: "Build and Inspect" + uses: hynek/build-and-inspect-python-package@b4fc3f6ba2b3da04f09659be99e2a29fb6146a61 # v2.6.0 + + # push to Test PyPI on + # - a new GitHub release is published + # - a PR is merged into main branch + publish-test-pypi: + name: Publish packages to test.pypi.org + # environment: publish-test-pypi + if: ${{ fromJSON(vars.PYPI_PUBLISH) && (github.repository_owner == 'instructlab') && ((github.event.action == 'published') || ((github.event_name == 'push') && (github.ref == 'refs/heads/main'))) }} + permissions: + contents: read + # see https://docs.pypi.org/trusted-publishers/ + id-token: write + runs-on: ubuntu-latest + needs: build-package + + steps: + - name: "Harden Runner" + uses: step-security/harden-runner@f086349bfa2bd1361f7909c78558e816508cdc10 # v2.8.0 + with: + egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs + + - name: "Download build artifacts" + uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + with: + name: Packages + path: dist + + - name: "Upload to Test PyPI" + uses: pypa/gh-action-pypi-publish@81e9d935c883d0b210363ab89cf05f3894778450 # v1.8.14 + with: + repository-url: https://test.pypi.org/legacy/ + + # push to Production PyPI on + # - a new GitHub release is published + publish-pypi: + name: Publish release to pypi.org + # environment: publish-pypi + if: ${{ fromJSON(vars.PYPI_PUBLISH) && (github.repository_owner == 'instructlab') && (github.event.action == 'published') }} + permissions: + # see https://docs.pypi.org/trusted-publishers/ + id-token: write + # allow gh release upload + contents: write + + runs-on: ubuntu-latest + needs: build-package + + steps: + - name: "Harden Runner" + uses: step-security/harden-runner@f086349bfa2bd1361f7909c78558e816508cdc10 # v2.8.0 + with: + egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs + + - name: "Download build artifacts" + uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7 + with: + name: Packages + path: dist + + - name: "Sigstore sign package" + uses: sigstore/gh-action-sigstore-python@61f6a500bbfdd9a2a339cf033e5421951fbc1cd2 # v2.1.1 + with: + inputs: | + ./dist/*.tar.gz + ./dist/*.whl + + - name: "Upload artifacts and signatures to GitHub release" + run: | + gh release upload '${{ github.ref_name }}' dist/* --repo '${{ github.repository }}' + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + # PyPI does not accept .sigstore artifacts and + # gh-action-pypi-publish has no option to ignore them. + - name: "Remove sigstore signatures before uploading to PyPI" + run: | + rm ./dist/*.sigstore + + - name: "Upload to PyPI" + uses: pypa/gh-action-pypi-publish@81e9d935c883d0b210363ab89cf05f3894778450 # v1.8.14 diff --git a/.gitignore b/.gitignore index 701ff28..086d9b7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,32 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# generated by setuptools_scm +/src/instructlab/schema/_version.py + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + # Environments .env .venv diff --git a/README.md b/README.md index 7756104..2999477 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ # Taxonomy Schema -This repository defines the JSON schema for the [Taxonomy](https://github.com/instructlab/taxonomy) YAML. +This Python package defines the JSON schema for the InstructLab [Taxonomy](https://github.com/instructlab/taxonomy) YAML. + +Consumers of this schema can `pip install` this package and access the schema using `importlib.resources` on the `instructlab.schema` package. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..49efd30 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,47 @@ +# SPDX-License-Identifier: Apache-2.0 + +[build-system] +requires = ["setuptools>=64", "setuptools_scm>=8"] +build-backend = "setuptools.build_meta" + +[project] +name = "instructlab-schema" +authors = [ + { name="InstructLab", email="dev@instructlab.ai" }, +] +description = "InstructLab Taxonomy Schema" +readme = "README.md" +license = {text = "Apache-2.0"} +requires-python = ">=3.9" +classifiers = [ + "Development Status :: 4 - Beta", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] +dynamic = ["dependencies", "optional-dependencies", "version"] + +[project.urls] +homepage = "https://instructlab.io" +source = "https://github.com/instructlab/schema" +issues = "https://github.com/instructlab/schema/issues" + +[tool.setuptools_scm] +version_file = "src/instructlab/schema/_version.py" +# do not include +gREV local version, required for Test PyPI upload +local_scheme = "no-local-version" + +[tool.setuptools] +package-dir = {"" = "src"} + +[tool.setuptools.packages.find] +where = ["src"] +include = ["instructlab.schema*"] + +[tool.setuptools.package-data] +"instructlab.schema" = ["py.typed", "v*/*.json"] diff --git a/src/instructlab/schema/__init__.py b/src/instructlab/schema/__init__.py new file mode 100644 index 0000000..ebf7616 --- /dev/null +++ b/src/instructlab/schema/__init__.py @@ -0,0 +1,26 @@ +"""InstructLab Taxonomy Schema""" + +from importlib import resources + +try: + from importlib.resources.abc import Traversable +except ImportError: + from importlib.abc import Traversable # Python 3.10 + +from typing import List + +__all__ = ["schema_versions"] + + +def schema_versions() -> List[Traversable]: + """Return the sorted list of schema versions. + + Returns: + List[Traversable]: A sorted list of schema versions. + """ + schema_base = resources.files(__package__) + versions = sorted( + (v for v in schema_base.iterdir() if v.name[0] == "v" and v.name[1:].isdigit()), + key=lambda k: int(k.name[1:]), + ) + return versions diff --git a/src/instructlab/schema/py.typed b/src/instructlab/schema/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/src/instructlab/schema/v1/__init__.py b/src/instructlab/schema/v1/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/v1/compositional_skills.json b/src/instructlab/schema/v1/compositional_skills.json similarity index 100% rename from v1/compositional_skills.json rename to src/instructlab/schema/v1/compositional_skills.json diff --git a/v1/knowledge.json b/src/instructlab/schema/v1/knowledge.json similarity index 100% rename from v1/knowledge.json rename to src/instructlab/schema/v1/knowledge.json diff --git a/v1/version.json b/src/instructlab/schema/v1/version.json similarity index 100% rename from v1/version.json rename to src/instructlab/schema/v1/version.json diff --git a/src/instructlab/schema/v2/__init__.py b/src/instructlab/schema/v2/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/v2/compositional_skills.json b/src/instructlab/schema/v2/compositional_skills.json similarity index 100% rename from v2/compositional_skills.json rename to src/instructlab/schema/v2/compositional_skills.json diff --git a/v2/knowledge.json b/src/instructlab/schema/v2/knowledge.json similarity index 100% rename from v2/knowledge.json rename to src/instructlab/schema/v2/knowledge.json diff --git a/v2/version.json b/src/instructlab/schema/v2/version.json similarity index 100% rename from v2/version.json rename to src/instructlab/schema/v2/version.json