From a29792d0e2dd67da2c0d4246d85028b8be9a426c Mon Sep 17 00:00:00 2001 From: crasm Date: Fri, 22 Dec 2023 01:23:46 -0500 Subject: [PATCH] python: Add check-requirements.sh and GitHub workflow This script and workflow forces package versions to remain compatible across all convert*.py scripts, while allowing secondary convert scripts to import dependencies not wanted in convert.py. --- .../workflows/python-check-requirements.yml | 27 +++ check-requirements.sh | 157 ++++++++++++++++++ convert-persimmon-to-gguf.py | 1 + requirements-convert-hf-to-gguf.txt | 3 + requirements-convert-llama-ggml-to-gguf.txt | 1 + requirements-convert-lora-to-ggml.txt | 2 + requirements-convert-persimmon-to-gguf.txt | 2 + requirements-convert.txt | 5 + requirements.txt | 16 +- 9 files changed, 209 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/python-check-requirements.yml create mode 100755 check-requirements.sh mode change 100644 => 100755 convert-persimmon-to-gguf.py create mode 100644 requirements-convert-hf-to-gguf.txt create mode 100644 requirements-convert-llama-ggml-to-gguf.txt create mode 100644 requirements-convert-lora-to-ggml.txt create mode 100644 requirements-convert-persimmon-to-gguf.txt create mode 100644 requirements-convert.txt diff --git a/.github/workflows/python-check-requirements.yml b/.github/workflows/python-check-requirements.yml new file mode 100644 index 0000000000000..cc97ee8100566 --- /dev/null +++ b/.github/workflows/python-check-requirements.yml @@ -0,0 +1,27 @@ +name: Python check requirements.txt + +on: + push: + paths: + - 'check-requirements.sh' + - 'convert*.py' + - 'requirements*.txt' + pull_request: + paths: + - 'check-requirements.sh' + - 'convert*.py' + - 'requirements*.txt' + +jobs: + python-check-requirements: + runs-on: ubuntu-latest + name: check-requirements + steps: + - name: Check out source repository + uses: actions/checkout@v3 + - name: Set up Python environment + uses: actions/setup-python@v4 + with: + python-version: "3.11" + - name: Run check-requirements.sh script + run: bash check-requirements.sh nocleanup diff --git a/check-requirements.sh b/check-requirements.sh new file mode 100755 index 0000000000000..a38e6293f1701 --- /dev/null +++ b/check-requirements.sh @@ -0,0 +1,157 @@ +#!/bin/bash +# +# check-requirements.sh checks all requirements files for each top-level +# convert*.py script. +# +# WARNING: This is quite IO intensive, because a fresh venv is set up for every +# python script. As of 2023-12-22, this writes ~2.7GB of data. An adequately +# sized tmpfs /tmp or ramdisk is recommended if running this frequently. +# +# usage: ./check-requirements.sh [] +# ./check-requirements.sh 'nocleanup' [] +# +# where: +# - is a directory that can be used as the base for +# setting up the venvs. Defaults to `/tmp`. +# - 'nocleanup' as the first argument will disable automatic cleanup +# of the files created by this script. +# +# requires: +# - bash >= 3.2.57 +# - shellcheck +# +# For each script, it creates a fresh venv, `pip install -r` the +# requirements, and finally executes the python script with no arguments to +# check for a `ModuleNotFoundError`. +# + +log() { + local level="$1"; shift + local format="$1"; shift + # shellcheck disable=SC2059 + >&2 printf "$level: $format\n" "$@" +} + +info() { + log 'INFO' "$@" +} + +fatal() { + log 'FATAL' "$@" + exit 1 +} + +cleanup() { + if [[ -n ${workdir+x} && -d $workdir && -w $workdir ]]; then + info "Removing $workdir" + ( + count=0 + rm -rfv "$workdir" | while read -r; do + if (( count++ > 750 )); then + printf '.' + count=0 + fi + done + printf '\n' + )& + wait $! + info "Removed '$workdir'" + fi +} + +abort() { + cleanup + exit 1 +} + +if [[ $1 == nocleanup ]]; then + shift # discard nocleanup arg +else + trap abort SIGINT SIGTERM SIGQUIT SIGABRT + trap cleanup EXIT +fi + +set -eu -o pipefail +this="$(realpath "$0")" +readonly this +cd "$(dirname "$this")" + +shellcheck "$this" + +workdir= +if [[ -n ${1+x} ]]; then + arg_dir="$(realpath "$1")" + if [[ ! ( -d $arg_dir && -w $arg_dir ) ]]; then + fatal "$arg_dir is not a valid directory" + fi + workdir="$(mktemp -d "$arg_dir/check-requirements.XXXX")" +else + workdir="$(mktemp -d "/tmp/check-requirements.XXXX")" +fi +readonly workdir + +info "Working directory: $workdir" + +assert_arg_count() { + local argcount="$1"; shift + if (( $# != argcount )); then + fatal "${FUNCNAME[1]}: incorrect number of args" + fi +} + +check_requirements() { + assert_arg_count 2 "$@" + local venv="$1" + local reqs="$2" + + info "$reqs: beginning check" + ( + # shellcheck source=/dev/null + source "$venv/bin/activate" + pip --disable-pip-version-check install -q -r "$reqs" + ) + info "$reqs: OK" +} + +check_convert_script() { + assert_arg_count 1 "$@" + local py="$1" + local pyname="${py%.py}" + + info "$py: beginning check" + + local reqs="requirements-$pyname.txt" + if [[ ! -r "$reqs" ]]; then + fatal "$py missing requirements. Expected: $reqs" + fi + + local venv="$workdir/$pyname-venv" + python3 -m venv "$venv" + + check_requirements "$venv" "$reqs" + set +e + ( + # shellcheck source=/dev/null + source "$venv/bin/activate" + py_err="$workdir/$pyname.out" + python "$py" 2> "$py_err" + >&2 cat "$py_err" + grep -e 'ModuleNotFoundError' "$py_err" + ) + set -e + # shellcheck disable=SC2181 + (( $? )) && fatal "$py: some imports not declared in $reqs" + info "$py: imports OK" +} + +# Check requirements.txt +all_venv="$workdir/all-venv" +python3 -m venv "$all_venv" +check_requirements "$all_venv" 'requirements.txt' + +check_convert_script 'convert.py' +for py in convert-*.py; do + check_convert_script "$py" +done + +info "Done! No issues found." diff --git a/convert-persimmon-to-gguf.py b/convert-persimmon-to-gguf.py old mode 100644 new mode 100755 index 206b7d5ff9e31..1ba5864dc25ec --- a/convert-persimmon-to-gguf.py +++ b/convert-persimmon-to-gguf.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python3 import torch import os from pprint import pprint diff --git a/requirements-convert-hf-to-gguf.txt b/requirements-convert-hf-to-gguf.txt new file mode 100644 index 0000000000000..4d00b19666108 --- /dev/null +++ b/requirements-convert-hf-to-gguf.txt @@ -0,0 +1,3 @@ +-r requirements-convert.txt +torch==2.1.1 +transformers==4.35.2 diff --git a/requirements-convert-llama-ggml-to-gguf.txt b/requirements-convert-llama-ggml-to-gguf.txt new file mode 100644 index 0000000000000..8a5377762c1fa --- /dev/null +++ b/requirements-convert-llama-ggml-to-gguf.txt @@ -0,0 +1 @@ +-r requirements-convert.txt diff --git a/requirements-convert-lora-to-ggml.txt b/requirements-convert-lora-to-ggml.txt new file mode 100644 index 0000000000000..30827c8964d3e --- /dev/null +++ b/requirements-convert-lora-to-ggml.txt @@ -0,0 +1,2 @@ +-r requirements-convert.txt +torch==2.1.1 diff --git a/requirements-convert-persimmon-to-gguf.txt b/requirements-convert-persimmon-to-gguf.txt new file mode 100644 index 0000000000000..30827c8964d3e --- /dev/null +++ b/requirements-convert-persimmon-to-gguf.txt @@ -0,0 +1,2 @@ +-r requirements-convert.txt +torch==2.1.1 diff --git a/requirements-convert.txt b/requirements-convert.txt new file mode 100644 index 0000000000000..1a116256671e5 --- /dev/null +++ b/requirements-convert.txt @@ -0,0 +1,5 @@ +numpy==1.24.4 +sentencepiece==0.1.98 +transformers>=4.34.0 +gguf>=0.1.0 +protobuf>=4.21.0 diff --git a/requirements.txt b/requirements.txt index 1a116256671e5..da4f3f9a874e3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,11 @@ -numpy==1.24.4 -sentencepiece==0.1.98 -transformers>=4.34.0 -gguf>=0.1.0 -protobuf>=4.21.0 +# These requirements include all dependencies for all top-level python scripts +# for llama.cpp. Avoid adding packages here directly. +# +# Package versions must stay compatible across all top-level python scripts. +# + +-r requirements-convert.txt + +-r requirements-convert-hf-to-gguf.txt +-r requirements-convert-lora-to-ggml.txt +-r requirements-convert-persimmon-to-gguf.txt