From e9d7b027ed1cff881a33304d09c89df5fb0a5f2a Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Tue, 4 Jun 2024 15:26:50 -0400 Subject: [PATCH 1/4] ci: add action for python linting and formatting Signed-off-by: Nathan Weinberg --- .github/workflows/lint.yml | 65 ++++++++++++++++++++++++++ .github/workflows/matchers/pylint.json | 33 +++++++++++++ isort.cfg => .isort.cfg | 0 requirements-dev.txt | 2 +- requirements.txt | 7 +++ src/instructlab_sdg/__init__.py | 1 + src/instructlab_sdg/generate_data.py | 27 ++++++----- src/instructlab_sdg/utils.py | 6 +-- 8 files changed, 125 insertions(+), 16 deletions(-) create mode 100644 .github/workflows/lint.yml create mode 100644 .github/workflows/matchers/pylint.json rename isort.cfg => .isort.cfg (100%) create mode 100644 requirements.txt diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..fd1c0f70 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: Apache-2.0 + +name: Lint and Format + +on: + push: + branches: + - "main" + - "release-**" + paths: + - '**.py' + - 'pyproject.toml' + - 'requirements*.txt' + - 'tox.ini' + - 'scripts/*.sh' + - '.github/**' + pull_request: + branches: + - "main" + - "release-**" + paths: + - '**.py' + - 'pyproject.toml' + - 'requirements*.txt' + - 'tox.ini' + - 'scripts/*.sh' + - '.github/**' + +env: + PYTHON_VERSION: 3.11 + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + # https://github.com/actions/checkout/issues/249 + fetch-depth: 0 + submodules: true + + - name: Setup Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: 3.11 + cache: pip + cache-dependency-path: | + **/pyproject.toml + **/requirements*.txt + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install tox + + - name: Run Ruff check + run: | + tox -e ruff -- check + + - name: Run linting + if: success() || failure() + run: | + echo "::add-matcher::.github/workflows/matchers/pylint.json" + tox -e lint diff --git a/.github/workflows/matchers/pylint.json b/.github/workflows/matchers/pylint.json new file mode 100644 index 00000000..2b62078c --- /dev/null +++ b/.github/workflows/matchers/pylint.json @@ -0,0 +1,33 @@ +{ + "problemMatcher": [ + { + "owner": "pylint-error", + "severity": "error", + "pattern": [ + { + "regexp": "^(.+):(\\d+):(\\d+):\\s(([EF]\\d{4}):\\s.+)$", + "file": 1, + "line": 2, + "column": 3, + "message": 4, + "code": 5 + } + ] + }, + { + "owner": "pylint-warning", + "severity": "warning", + "pattern": [ + { + "regexp": "^(.+):(\\d+):(\\d+):\\s(([CRW]\\d{4}):\\s.+)$", + "file": 1, + "line": 2, + "column": 3, + "message": 4, + "code": 5 + } + ] + } + ] + } + \ No newline at end of file diff --git a/isort.cfg b/.isort.cfg similarity index 100% rename from isort.cfg rename to .isort.cfg diff --git a/requirements-dev.txt b/requirements-dev.txt index b0e0f77e..6513dc6b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # TODO: Uncomment below line once requirements.txt is created -# -r requirements.txt +-r requirements.txt pre-commit>=3.0.4,<4.0 pylint>=2.16.2,<4.0 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..f7be0dc6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: Apache-2.0 +click>=8.1.7,<9.0.0 +httpx>=0.25.0,<1.0.0 +jinja2 +openai>=1.13.3,<2.0.0 +rouge_score +tqdm>=4.66.2,<5.0.0 \ No newline at end of file diff --git a/src/instructlab_sdg/__init__.py b/src/instructlab_sdg/__init__.py index 3ca8d1f0..8eb177f1 100644 --- a/src/instructlab_sdg/__init__.py +++ b/src/instructlab_sdg/__init__.py @@ -1 +1,2 @@ +# First Party from instructlab_sdg.generate_data import generate_data diff --git a/src/instructlab_sdg/generate_data.py b/src/instructlab_sdg/generate_data.py index d9a3a8b3..4206d4c6 100644 --- a/src/instructlab_sdg/generate_data.py +++ b/src/instructlab_sdg/generate_data.py @@ -1,6 +1,10 @@ # SPDX-License-Identifier: Apache-2.0 # Standard +from datetime import datetime +from functools import partial +from pathlib import Path +from typing import Optional import json import multiprocessing import os @@ -8,23 +12,22 @@ import re import string import time -from datetime import datetime -from functools import partial -from pathlib import Path -from typing import Optional -import click -import tqdm -# instructlab - All of these need to go away - issue #6 -from instructlab.config import (DEFAULT_MULTIPROCESSING_START_METHOD, - get_model_family) -from instructlab.utils import (chunk_document, max_seed_example_tokens, - num_chars_from_tokens, read_taxonomy) # Third Party +# instructlab - All of these need to go away - issue #6 +from instructlab.config import DEFAULT_MULTIPROCESSING_START_METHOD, get_model_family +from instructlab.utils import ( + chunk_document, + max_seed_example_tokens, + num_chars_from_tokens, + read_taxonomy, +) from jinja2 import Template from rouge_score import rouge_scorer +import click +import tqdm -# Local +# First Party from instructlab_sdg import utils DEFAULT_PROMPT_TEMPLATE_MERLINITE = """\ diff --git a/src/instructlab_sdg/utils.py b/src/instructlab_sdg/utils.py index ab5e9041..75528b6d 100644 --- a/src/instructlab_sdg/utils.py +++ b/src/instructlab_sdg/utils.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # Standard +from typing import Optional, Sequence, Union import copy import dataclasses import io @@ -9,14 +10,13 @@ import math import os import sys -from typing import Optional, Sequence, Union -import httpx +# Third Party # instructlab - TODO these need to go away, issue #6 from instructlab.config import DEFAULT_API_KEY, DEFAULT_MODEL_OLD from instructlab.utils import get_sysprompt -# Third Party from openai import OpenAI, OpenAIError +import httpx StrOrOpenAIObject = Union[str, object] From eb59e4d2a2c7a7f0c1bb0e698d337890e062e81e Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Wed, 5 Jun 2024 10:15:28 -0400 Subject: [PATCH 2/4] Add instructlab as temporary dependency to resolve lint error Signed-off-by: Nathan Weinberg --- requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f7be0dc6..6cef0cd5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,6 @@ httpx>=0.25.0,<1.0.0 jinja2 openai>=1.13.3,<2.0.0 rouge_score -tqdm>=4.66.2,<5.0.0 \ No newline at end of file +tqdm>=4.66.2,<5.0.0 +# TODO: remove 'instructlab' once https://github.com/instructlab/sdg/issues/6 is resolved +instructlab From 5bfc0bbf384f0dd18c080c6660343a9b761d1c8f Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Wed, 5 Jun 2024 10:41:44 -0400 Subject: [PATCH 3/4] Add mypy to new lint action Signed-off-by: Nathan Weinberg --- .github/workflows/lint.yml | 10 ++++++++-- pyproject.toml | 11 ++++++++++- tox.ini | 12 +++++++++++- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index fd1c0f70..72af9aff 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 -name: Lint and Format +name: Lint, Format, and MyPy on: push: @@ -50,6 +50,7 @@ jobs: **/requirements*.txt - name: Install dependencies + id: deps run: | python -m pip install --upgrade pip python -m pip install tox @@ -59,7 +60,12 @@ jobs: tox -e ruff -- check - name: Run linting - if: success() || failure() + if: ${{ !cancelled() && (steps.deps.outcome == 'success') }} run: | echo "::add-matcher::.github/workflows/matchers/pylint.json" tox -e lint + + - name: Run mypy type checks + if: ${{ !cancelled() && (steps.deps.outcome == 'success') }} + run: | + tox -e mypy diff --git a/pyproject.toml b/pyproject.toml index 933291bb..8588d2be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ source = "https://github.com/instructlab/sdg" issues = "https://github.com/instructlab/sdg/issues" [tool.setuptools_scm] -version_file = "src/sdg/_version.py" +version_file = "src/instructlab_sdg/_version.py" # do not include +gREV local version, required for Test PyPI upload local_scheme = "no-local-version" @@ -103,3 +103,12 @@ from-first = true # import-heading-firstparty=First Party # import-heading-localfolder=Local known-local-folder = ["tuning"] + +[tool.mypy] +disable_error_code = ["import-not-found", "import-untyped"] +exclude = [ + "^src/instructlab_sdg/generate_data\\.py$", + "^src/instructlab_sdg/utils\\.py$", +] +# honor excludes by not following there through imports +follow_imports = "silent" diff --git a/tox.ini b/tox.ini index 3f44fe83..c722f0e4 100644 --- a/tox.ini +++ b/tox.ini @@ -3,7 +3,7 @@ [tox] # py3-unit runs unit tests with 'python3' # py311-unit runs the same tests with 'python3.11' -envlist = ruff, lint, spellcheck +envlist = ruff, lint, mypy, spellcheck minversion = 4.4 # format, check, and linting targets don't build and install the project to @@ -49,3 +49,13 @@ commands = sh -c 'command -v aspell || (echo "aspell is not installed. Please install it." && exit 1)' {envpython} -m pyspelling --config {toxinidir}/.spellcheck.yml --spellchecker aspell allowlist_externals = sh + +[testenv:mypy] +description = Python type checking with mypy +deps = + mypy>=1.10.0,<2.0 + types-tqdm + types-PyYAML + pytest +commands = + mypy src From e9a289882be61fee86556fc4f6f92882b762b94b Mon Sep 17 00:00:00 2001 From: Nathan Weinberg Date: Wed, 5 Jun 2024 10:44:51 -0400 Subject: [PATCH 4/4] Updated Makefile to have `make verify` be single command for all Python checks This is a slight reorg I just think makes more sense - anyone touching Python code can run `make verify` as a simple sanity check for their code. Spellcheck is more Markdown-specific and already has its own Makefile command so replaced 'spellcheck' with the new 'mypy` tox env Signed-off-by: Nathan Weinberg --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 38b98b26..ca481f69 100644 --- a/Makefile +++ b/Makefile @@ -63,5 +63,5 @@ spellcheck-sort: .spellcheck-en-custom.txt ## Sort spellcheck directory sort -d -f -o $< $< .PHONY: verify -verify: check-tox ## Run linting and formatting checks via tox - tox p -e ruff,fastlint,spellcheck +verify: check-tox ## Run linting, typing, and formatting checks via tox + tox p -e fastlint,mypy,ruff