Skip to content

Commit

Permalink
schema package: Build a python package for the schema
Browse files Browse the repository at this point in the history
This will allow schema users to pip install the schema instead of
using something like git submodules to place the schema files at some
local path.

Signed-off-by: BJ Hargrave <[email protected]>
  • Loading branch information
bjhargrave committed Jun 6, 2024
1 parent 5b4ce11 commit b717790
Show file tree
Hide file tree
Showing 17 changed files with 242 additions and 10 deletions.
1 change: 0 additions & 1 deletion .github/workflows/actionlint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ jobs:
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
fetch-depth: 0
submodules: true

- name: "Download actionlint"
run: |
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ jobs:
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
fetch-depth: 0
submodules: true
- name: "Check Markdown documents"
uses: DavidAnson/markdownlint-cli2-action@b4c9feab76d8025d1e83c653fa3990936df0e6c8 # v16.0.0
with:
Expand Down
14 changes: 7 additions & 7 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ on:
branches:
- main
paths:
- 'v*/**/*.json'
- 'src/instructlab/schema/v*/**/*.json'
- '.github/workflows/lint.yml' # This workflow
- '.github/scripts/**' # Scripts used by this workflow

pull_request:
branches:
- main
paths:
- 'v*/**/*.json'
- 'src/instructlab/schema/v*/**/*.json'
- '.github/workflows/lint.yml' # This workflow
- '.github/scripts/**' # Scripts used by this workflow

Expand All @@ -31,7 +31,7 @@ permissions:
contents: read

jobs:
lint:
schemalint:
runs-on: ubuntu-latest
steps:
- name: "Harden Runner"
Expand All @@ -58,15 +58,15 @@ jobs:
uses: tj-actions/changed-files@d6babd6899969df1a11d14c368283ea4436bca78 # v44.5.2
with:
files: |
v*/**/*.json
src/instructlab/schema/v*/**/*.json
- name: "Check changed schema file contents"
if: steps.changed-files.outputs.any_changed == 'true'
if: ${{ fromJSON(steps.changed-files.outputs.any_changed) }}
run: |
check-jsonschema --verbose --schemafile https://json-schema.org/draft/2020-12/schema ${{ steps.changed-files.outputs.all_changed_files }}
- name: "Check all schema file contents"
if: steps.changed-files.outputs.any_changed != 'true'
if: ${{ !fromJSON(steps.changed-files.outputs.any_changed) }}
run: |
# shellcheck disable=SC2046
check-jsonschema --verbose --schemafile https://json-schema.org/draft/2020-12/schema $(find v* -name "*.json")
check-jsonschema --verbose --schemafile https://json-schema.org/draft/2020-12/schema $(find src/instructlab/schema/v* -name "*.json")
130 changes: 130 additions & 0 deletions .github/workflows/pypi.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# SPDX-License-Identifier: Apache-2.0

name: Build, test, and upload PyPI package

on:
push:
branches:
- "main"
tags:
- "v*"
pull_request:
branches:
- "main"
release:
types:
- published

env:
LC_ALL: en_US.UTF-8

defaults:
run:
shell: bash

permissions:
contents: read

jobs:
# Create and verify release artifacts
# - build source dist (tar ball) and wheel
# - validate artifacts with various tools
# - upload artifacts to GHA
build-package:
name: Build and check packages
runs-on: ubuntu-latest
steps:
- name: "Harden Runner"
uses: step-security/harden-runner@f086349bfa2bd1361f7909c78558e816508cdc10 # v2.8.0
with:
egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs


- name: "Checkout"
uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
# for setuptools-scm
fetch-depth: 0

- name: "Build and Inspect"
uses: hynek/build-and-inspect-python-package@b4fc3f6ba2b3da04f09659be99e2a29fb6146a61 # v2.6.0

# push to Test PyPI on
# - a new GitHub release is published
# - a PR is merged into main branch
publish-test-pypi:
name: Publish packages to test.pypi.org
# environment: publish-test-pypi
if: ${{ fromJSON(vars.PYPI_PUBLISH) && (github.repository_owner == 'instructlab') && ((github.event.action == 'published') || ((github.event_name == 'push') && (github.ref == 'refs/heads/main'))) }}
permissions:
contents: read
# see https://docs.pypi.org/trusted-publishers/
id-token: write
runs-on: ubuntu-latest
needs: build-package

steps:
- name: "Harden Runner"
uses: step-security/harden-runner@f086349bfa2bd1361f7909c78558e816508cdc10 # v2.8.0
with:
egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs

- name: "Download build artifacts"
uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7
with:
name: Packages
path: dist

- name: "Upload to Test PyPI"
uses: pypa/gh-action-pypi-publish@81e9d935c883d0b210363ab89cf05f3894778450 # v1.8.14
with:
repository-url: https://test.pypi.org/legacy/

# push to Production PyPI on
# - a new GitHub release is published
publish-pypi:
name: Publish release to pypi.org
# environment: publish-pypi
if: ${{ fromJSON(vars.PYPI_PUBLISH) && (github.repository_owner == 'instructlab') && (github.event.action == 'published') }}
permissions:
# see https://docs.pypi.org/trusted-publishers/
id-token: write
# allow gh release upload
contents: write

runs-on: ubuntu-latest
needs: build-package

steps:
- name: "Harden Runner"
uses: step-security/harden-runner@f086349bfa2bd1361f7909c78558e816508cdc10 # v2.8.0
with:
egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs

- name: "Download build artifacts"
uses: actions/download-artifact@65a9edc5881444af0b9093a5e628f2fe47ea3b2e # v4.1.7
with:
name: Packages
path: dist

- name: "Sigstore sign package"
uses: sigstore/gh-action-sigstore-python@61f6a500bbfdd9a2a339cf033e5421951fbc1cd2 # v2.1.1
with:
inputs: |
./dist/*.tar.gz
./dist/*.whl
- name: "Upload artifacts and signatures to GitHub release"
run: |
gh release upload '${{ github.ref_name }}' dist/* --repo '${{ github.repository }}'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

# PyPI does not accept .sigstore artifacts and
# gh-action-pypi-publish has no option to ignore them.
- name: "Remove sigstore signatures before uploading to PyPI"
run: |
rm ./dist/*.sigstore
- name: "Upload to PyPI"
uses: pypa/gh-action-pypi-publish@81e9d935c883d0b210363ab89cf05f3894778450 # v1.8.14
29 changes: 29 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,32 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# generated by setuptools_scm
/src/instructlab/schema/_version.py

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# Environments
.env
.venv
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Taxonomy Schema

This repository defines the JSON schema for the [Taxonomy](https://github.com/instructlab/taxonomy) YAML.
This Python package defines the JSON schema for the InstructLab [Taxonomy](https://github.com/instructlab/taxonomy) YAML.

Consumers of this schema can `pip install` this package and access the schema using `importlib.resources` on the `instructlab.schema` package.
47 changes: 47 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# SPDX-License-Identifier: Apache-2.0

[build-system]
requires = ["setuptools>=64", "setuptools_scm>=8"]
build-backend = "setuptools.build_meta"

[project]
name = "instructlab-schema"
authors = [
{ name="InstructLab", email="[email protected]" },
]
description = "InstructLab Taxonomy Schema"
readme = "README.md"
license = {text = "Apache-2.0"}
requires-python = ">=3.9"
classifiers = [
"Development Status :: 4 - Beta",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
]
dynamic = ["dependencies", "optional-dependencies", "version"]

[project.urls]
homepage = "https://instructlab.io"
source = "https://github.com/instructlab/schema"
issues = "https://github.com/instructlab/schema/issues"

[tool.setuptools_scm]
version_file = "src/instructlab/schema/_version.py"
# do not include +gREV local version, required for Test PyPI upload
local_scheme = "no-local-version"

[tool.setuptools]
package-dir = {"" = "src"}

[tool.setuptools.packages.find]
where = ["src"]
include = ["instructlab.schema*"]

[tool.setuptools.package-data]
"instructlab.schema" = ["py.typed", "v*/*.json"]
26 changes: 26 additions & 0 deletions src/instructlab/schema/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""InstructLab Taxonomy Schema"""

from importlib import resources

try:
from importlib.resources.abc import Traversable
except ImportError:
from importlib.abc import Traversable # Python 3.10

from typing import List

__all__ = ["schema_versions"]


def schema_versions() -> List[Traversable]:
"""Return the sorted list of schema versions.
Returns:
List[Traversable]: A sorted list of schema versions.
"""
schema_base = resources.files(__package__)
versions = sorted(
(v for v in schema_base.iterdir() if v.name[0] == "v" and v.name[1:].isdigit()),
key=lambda k: int(k.name[1:]),
)
return versions
Empty file added src/instructlab/schema/py.typed
Empty file.
Empty file.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Empty file.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit b717790

Please sign in to comment.