Skip to content

Commit

Permalink
py : type-check all Python scripts with Pyright (ggerganov#8341)
Browse files Browse the repository at this point in the history
* py : type-check all Python scripts with Pyright

* server-tests : use trailing slash in openai base_url

* server-tests : add more type annotations

* server-tests : strip "chat" from base_url in oai_chat_completions

* server-tests : model metadata is a dict

* ci : disable pip cache in type-check workflow

The cache is not shared between branches, and it's 250MB in size,
so it would become quite a big part of the 10GB cache limit of the repo.

* py : fix new type errors from master branch

* tests : fix test-tokenizer-random.py

Apparently, gcc applies optimisations even when pre-processing,
which confuses pycparser.

* ci : only show warnings and errors in python type-check

The "information" level otherwise has entries
from 'examples/pydantic_models_to_grammar.py',
which could be confusing for someone trying to figure out what failed,
considering that these messages can safely be ignored
even though they look like errors.
  • Loading branch information
compilade authored and Neo Zhang committed Jul 13, 2024
1 parent 3bc2dde commit da394c9
Show file tree
Hide file tree
Showing 33 changed files with 297 additions and 173 deletions.
16 changes: 16 additions & 0 deletions .devops/nix/package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,22 @@ let
ps.tiktoken
ps.torchWithoutCuda
ps.transformers

# server bench
ps.matplotlib

# server tests
ps.openai
ps.behave
ps.prometheus-client

# for examples/pydantic-models-to-grammar-examples.py
ps.docstring-parser
ps.pydantic

# for scripts/compare-llama-bench.py
ps.gitpython
ps.tabulate
]
);

Expand Down
38 changes: 38 additions & 0 deletions .github/workflows/python-type-check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: Python Type-Check

on:
push:
paths:
- '.github/workflows/python-type-check.yml'
- '**.py'
- '**/requirements*.txt'
pull_request:
paths:
- '.github/workflows/python-type-check.yml'
- '**.py'
- '**/requirements*.txt'

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true

jobs:
python-type-check:
runs-on: ubuntu-latest
name: pyright type-check
steps:
- name: Check out source repository
uses: actions/checkout@v4
- name: Set up Python environment
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install Python dependencies
# TODO: use a venv
run: pip install -r requirements/requirements-all.txt
- name: Type-check with Pyright
uses: jakebailey/pyright-action@v2
with:
version: 1.1.370
level: warning
warnings: true
20 changes: 8 additions & 12 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ def write_tensors(self):
break

for new_name, data in ((n, d.squeeze().numpy()) for n, d in self.modify_tensors(data_torch, name, bid)):
data: np.ndarray = data # type hint
data: np.ndarray # type hint
n_dims = len(data.shape)
data_dtype = data.dtype
data_qtype: gguf.GGMLQuantizationType | None = None
Expand Down Expand Up @@ -599,10 +599,6 @@ def _create_vocab_sentencepiece(self):

tokenizer_path = self.dir_model / 'tokenizer.model'

tokens: list[bytes] = []
scores: list[float] = []
toktypes: list[int] = []

if not tokenizer_path.is_file():
raise FileNotFoundError(f"File not found: {tokenizer_path}")

Expand Down Expand Up @@ -2119,7 +2115,7 @@ def set_vocab(self):
logger.error(f'Error: Missing {tokenizer_path}')
sys.exit(1)

sentencepiece_model = model.ModelProto()
sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue]
sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
add_prefix = sentencepiece_model.normalizer_spec.add_dummy_prefix

Expand Down Expand Up @@ -2971,16 +2967,16 @@ def set_vocab(self):
if not tokenizer_path.is_file():
raise FileNotFoundError(f"File not found: {tokenizer_path}")

sentencepiece_model = model.ModelProto()
sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue]
sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())

# some models like Pile-T5 family use BPE tokenizer instead of Unigram
if sentencepiece_model.trainer_spec.model_type == 2: # BPE
if sentencepiece_model.trainer_spec.model_type == 2: # BPE
# assure the tokenizer model file name is correct
assert tokenizer_path.name == 'tokenizer.model'
return self._set_vocab_sentencepiece()
else:
assert sentencepiece_model.trainer_spec.model_type == 1 # UNIGRAM
assert sentencepiece_model.trainer_spec.model_type == 1 # UNIGRAM

add_prefix = sentencepiece_model.normalizer_spec.add_dummy_prefix
remove_whitespaces = sentencepiece_model.normalizer_spec.remove_extra_whitespaces
Expand Down Expand Up @@ -3151,7 +3147,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
# but Jais's PyTorch model simply precalculates the slope values and places them
# in relative_pes.slopes
n_head_closest_log2 = 2 ** math.floor(math.log2(self.hparams["n_head"]))
first_val = float(data_torch._data[0])
first_val = float(data_torch[0].item())
self.max_alibi_bias = -round(math.log2(first_val) * n_head_closest_log2)

return tensors
Expand Down Expand Up @@ -3185,7 +3181,7 @@ class ChatGLMModel(Model):
def set_vocab_chatglm3(self):
dir_model = self.dir_model
hparams = self.hparams
tokens: list[bytearray] = []
tokens: list[bytes] = []
toktypes: list[int] = []
scores: list[float] = []

Expand Down Expand Up @@ -3334,7 +3330,7 @@ def set_vocab(self):
special_vocab.add_to_gguf(self.gguf_writer)

def set_gguf_parameters(self):
self.gguf_writer.add_name(self.hparams.get("_name_or_path").split("/")[1]) # THUDM/glm4-9b-chat or THUDM/chatglm3-6b
self.gguf_writer.add_name(self.hparams["_name_or_path"].split("/")[1]) # THUDM/glm4-9b-chat or THUDM/chatglm3-6b
n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed"))
n_head = self.hparams.get("n_head", self.hparams.get("num_attention_heads"))
n_head_kv = self.hparams.get("multi_query_group_num", n_head)
Expand Down
3 changes: 2 additions & 1 deletion convert_llama_ggml_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,8 @@ def add_tensors(self, gguf_writer):


def handle_metadata(cfg, hp):
import convert
import examples.convert_legacy_llama as convert

assert cfg.model_metadata_dir.is_dir(), 'Metadata dir is not a directory'
hf_config_path = cfg.model_metadata_dir / "config.json"
orig_config_path = cfg.model_metadata_dir / "params.json"
Expand Down
21 changes: 12 additions & 9 deletions examples/convert_legacy_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ class Metadata:
version: Optional[str] = None
url: Optional[str] = None
description: Optional[str] = None
licence: Optional[str] = None
license: Optional[str] = None
source_url: Optional[str] = None
source_hf_repo: Optional[str] = None

Expand Down Expand Up @@ -492,12 +492,13 @@ def validate_conversion_to(self, data_type: DataType) -> None:

LazyModel: TypeAlias = 'dict[str, LazyTensor]'

ModelFormat: TypeAlias = Literal['ggml', 'torch', 'safetensors', 'none']

@dataclass
class ModelPlus:
model: LazyModel
paths: list[Path] # Where this was read from.
format: Literal['ggml', 'torch', 'safetensors', 'none']
format: ModelFormat
vocab: BaseVocab | None # For GGML models (which have vocab built in), the vocab.


Expand Down Expand Up @@ -536,7 +537,7 @@ def load() -> UnquantizedTensor:


def merge_multifile_models(models_plus: list[ModelPlus]) -> ModelPlus:
formats = set(mp.format for mp in models_plus)
formats: set[ModelFormat] = set(mp.format for mp in models_plus)
assert len(formats) == 1, "different formats?"
format = formats.pop()
paths = [path for mp in models_plus for path in mp.paths]
Expand All @@ -555,7 +556,7 @@ def merge_multifile_models(models_plus: list[ModelPlus]) -> ModelPlus:
else:
model = merge_sharded([mp.model for mp in models_plus])

return ModelPlus(model, paths, format, vocab) # pytype: disable=wrong-arg-types
return ModelPlus(model, paths, format, vocab)


def permute_lazy(lazy_tensor: LazyTensor, n_head: int, n_head_kv: int) -> LazyTensor:
Expand Down Expand Up @@ -805,7 +806,7 @@ class OutputFile:
def __init__(self, fname_out: Path, endianess:gguf.GGUFEndian = gguf.GGUFEndian.LITTLE):
self.gguf = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH], endianess=endianess)

def add_meta_model(self, params: Params, metadata: Metadata) -> None:
def add_meta_model(self, params: Params, metadata: Metadata | None) -> None:
# Metadata About The Model And Its Provenence
name = "LLaMA"
if metadata is not None and metadata.name is not None:
Expand All @@ -827,8 +828,8 @@ def add_meta_model(self, params: Params, metadata: Metadata) -> None:
self.gguf.add_url(metadata.url)
if metadata.description is not None:
self.gguf.add_description(metadata.description)
if metadata.licence is not None:
self.gguf.add_licence(metadata.licence)
if metadata.license is not None:
self.gguf.add_licence(metadata.license)
if metadata.source_url is not None:
self.gguf.add_source_url(metadata.source_url)
if metadata.source_hf_repo is not None:
Expand Down Expand Up @@ -943,7 +944,7 @@ def close(self) -> None:
@staticmethod
def write_vocab_only(
fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab,
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False, metadata: Metadata = None,
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False, metadata: Metadata | None = None,
) -> None:
check_vocab_size(params, vocab, pad_vocab=pad_vocab)

Expand Down Expand Up @@ -977,7 +978,7 @@ def write_all(
fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: BaseVocab, svocab: gguf.SpecialVocab,
concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE,
pad_vocab: bool = False,
metadata: Metadata = None,
metadata: Metadata | None = None,
) -> None:
check_vocab_size(params, vocab, pad_vocab=pad_vocab)

Expand Down Expand Up @@ -1396,6 +1397,8 @@ def main(args_in: list[str] | None = None) -> None:
if model_plus.vocab is not None and args.vocab_dir is None and not args.no_vocab:
vocab = model_plus.vocab

assert params is not None

logger.info(f"Vocab info: {vocab}")
logger.info(f"Special vocab info: {special_vocab}")
model = model_plus.model
Expand Down
2 changes: 1 addition & 1 deletion examples/finetune/convert_finetune_checkpoint_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def __init__(self, dtype='f', ne=None):
if len(self.ne) == 0:
self.nbytes = 0
else:
self.nbytes = int(np.product(self.ne)) * 4
self.nbytes = int(np.prod(self.ne)) * 4
else:
raise ValueError(f"Unhandled data type '{self.dtype}'")

Expand Down
5 changes: 4 additions & 1 deletion examples/json_schema_pydantic_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#! pip install pydantic
#! python json_schema_pydantic_example.py

from pydantic import BaseModel, Extra, TypeAdapter
from pydantic import BaseModel, Field, TypeAdapter
from annotated_types import MinLen
from typing import Annotated, List, Optional
import json, requests
Expand All @@ -17,6 +17,9 @@ def create_completion(*, response_model=None, endpoint="http://localhost:8080/v1
The response_model param takes a type (+ supports Pydantic) and behaves just as w/ Instructor (see below)
'''
response_format = None
type_adapter = None

if response_model:
type_adapter = TypeAdapter(response_model)
schema = type_adapter.json_schema()
Expand Down
12 changes: 7 additions & 5 deletions examples/json_schema_to_grammar.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#!/usr/bin/env python3
from __future__ import annotations

import argparse
import itertools
import json
Expand Down Expand Up @@ -188,7 +190,7 @@ def uniform_range(from_str: str, to_str: str):
raise RuntimeError("At least one of min_value or max_value must be set")

class BuiltinRule:
def __init__(self, content: str, deps: list = None):
def __init__(self, content: str, deps: list | None = None):
self.content = content
self.deps = deps or []

Expand Down Expand Up @@ -248,7 +250,7 @@ def __init__(self, *, prop_order, allow_fetch, dotall, raw_pattern):

def _format_literal(self, literal):
escaped = GRAMMAR_LITERAL_ESCAPE_RE.sub(
lambda m: GRAMMAR_LITERAL_ESCAPES.get(m.group(0)), literal
lambda m: GRAMMAR_LITERAL_ESCAPES.get(m.group(0)) or m.group(0), literal
)
return f'"{escaped}"'

Expand Down Expand Up @@ -403,11 +405,11 @@ def _visit_pattern(self, pattern, name):
i = 0
length = len(pattern)

def to_rule(s: Tuple[str, bool]) -> str:
def to_rule(s: tuple[str, bool]) -> str:
(txt, is_literal) = s
return "\"" + txt + "\"" if is_literal else txt

def transform() -> Tuple[str, bool]:
def transform() -> tuple[str, bool]:
'''
Parse a unit at index i (advancing it), and return its string representation + whether it's a literal.
'''
Expand All @@ -420,7 +422,7 @@ def transform() -> Tuple[str, bool]:
# We only need a flat structure here to apply repetition operators to the last item, and
# to merge literals at the and (we're parsing grouped ( sequences ) recursively and don't treat '|' specially
# (GBNF's syntax is luckily very close to regular expressions!)
seq: list[Tuple[str, bool]] = []
seq: list[tuple[str, bool]] = []

def get_dot():
if self._dotall:
Expand Down
10 changes: 6 additions & 4 deletions examples/llava/convert_image_encoder_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ def bytes_to_unicode():
fout.add_description("two-tower CLIP model")

if has_text_encoder:
assert t_hparams is not None
assert tokens is not None
# text_model hparams
fout.add_uint32(k(KEY_CONTEXT_LENGTH, TEXT), t_hparams["max_position_embeddings"])
fout.add_uint32(k(KEY_EMBEDDING_LENGTH, TEXT), t_hparams["hidden_size"])
Expand Down Expand Up @@ -259,8 +261,8 @@ def bytes_to_unicode():


if processor is not None:
image_mean = processor.image_processor.image_mean if args.image_mean is None or args.image_mean == default_image_mean else args.image_mean
image_std = processor.image_processor.image_std if args.image_std is None or args.image_std == default_image_std else args.image_std
image_mean = processor.image_processor.image_mean if args.image_mean is None or args.image_mean == default_image_mean else args.image_mean # pyright: ignore[reportAttributeAccessIssue]
image_std = processor.image_processor.image_std if args.image_std is None or args.image_std == default_image_std else args.image_std # pyright: ignore[reportAttributeAccessIssue]
else:
image_mean = args.image_mean if args.image_mean is not None else default_image_mean
image_std = args.image_std if args.image_std is not None else default_image_std
Expand All @@ -272,7 +274,7 @@ def bytes_to_unicode():


if has_llava_projector:
model.vision_model.encoder.layers.pop(-1)
model.vision_model.encoder.layers.pop(-1) # pyright: ignore[reportAttributeAccessIssue]
projector = torch.load(args.llava_projector)
for name, data in projector.items():
name = get_tensor_name(name)
Expand All @@ -286,7 +288,7 @@ def bytes_to_unicode():

print("Projector tensors added\n")

state_dict = model.state_dict()
state_dict = model.state_dict() # pyright: ignore[reportAttributeAccessIssue]
for name, data in state_dict.items():
if should_skip_tensor(name, has_text_encoder, has_vision_encoder, has_llava_projector):
# we don't need this
Expand Down
10 changes: 7 additions & 3 deletions examples/llava/llava_surgery_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
import glob
import os
import torch
from safetensors.torch import load as safe_load, save as safe_save, safe_open, save_file
from safetensors import safe_open
from safetensors.torch import save_file
from typing import Any, ContextManager, cast

# Function to determine if file is a SafeTensor file
def is_safetensor_file(file_path):
Expand All @@ -13,7 +15,7 @@ def is_safetensor_file(file_path):
def load_model(file_path):
if is_safetensor_file(file_path):
tensors = {}
with safe_open(file_path, framework="pt", device="cpu") as f:
with cast(ContextManager[Any], safe_open(file_path, framework="pt", device="cpu")) as f:
for key in f.keys():
tensors[key] = f.get_tensor(key).clone()
# output shape
Expand Down Expand Up @@ -134,7 +136,7 @@ def proj_criteria(checkpoint):
if last_checkpoint is not None:
for k, v in last_checkpoint.items():
print(k)
print(f"Found {len(mm_tensors)} tensors to extract out of {len(last_checkpoint)} tensors.")
print(f"Found {len(mm_tensors)} tensors to extract out of {len(last_checkpoint) if last_checkpoint is not None else 0} tensors.")
print("No tensors found. Is this a LLaVA model?")
exit()

Expand All @@ -143,8 +145,10 @@ def proj_criteria(checkpoint):
# projector = {name: checkpoint.[name].float() for name in mm_tensors}
projector = {}
for name in mm_tensors:
assert last_checkpoint is not None
projector[name] = last_checkpoint[name].float()
for name in first_mm_tensors:
assert first_checkpoint is not None
projector[name] = first_checkpoint[name].float()

if len(projector) > 0:
Expand Down
Loading

0 comments on commit da394c9

Please sign in to comment.