From 443dcbe5afc7ce2df87cd691eab1478da81d7360 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Thu, 21 Mar 2024 07:58:12 -0700 Subject: [PATCH] [Misc] Bump up transformers to v4.39.0 & Remove StarCoder2Config (#3551) Co-authored-by: Roy Co-authored-by: Roger Meier --- requirements-rocm.txt | 2 +- requirements.txt | 2 +- vllm/model_executor/models/starcoder2.py | 8 +-- vllm/transformers_utils/config.py | 10 ---- vllm/transformers_utils/configs/__init__.py | 2 - vllm/transformers_utils/configs/starcoder2.py | 55 ------------------- 6 files changed, 3 insertions(+), 76 deletions(-) delete mode 100644 vllm/transformers_utils/configs/starcoder2.py diff --git a/requirements-rocm.txt b/requirements-rocm.txt index c30479e40..07d94cd94 100644 --- a/requirements-rocm.txt +++ b/requirements-rocm.txt @@ -7,7 +7,7 @@ ray >= 2.9 sentencepiece # Required for LLaMA tokenizer. numpy tokenizers>=0.15.0 -transformers >= 4.38.0 # Required for Gemma. +transformers >= 4.39.0 # Required for StarCoder2. fastapi uvicorn[standard] pydantic >= 2.0 # Required for OpenAI server. diff --git a/requirements.txt b/requirements.txt index c9a5bd661..e136defad 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ ray >= 2.9 sentencepiece # Required for LLaMA tokenizer. numpy torch == 2.1.2 -transformers >= 4.38.0 # Required for Gemma. +transformers >= 4.39.0 # Required for StarCoder2. xformers == 0.0.23.post1 # Required for CUDA 12.1. fastapi uvicorn[standard] diff --git a/vllm/model_executor/models/starcoder2.py b/vllm/model_executor/models/starcoder2.py index e418951a6..e5003361b 100644 --- a/vllm/model_executor/models/starcoder2.py +++ b/vllm/model_executor/models/starcoder2.py @@ -22,6 +22,7 @@ import torch from torch import nn +from transformers import Starcoder2Config from vllm.model_executor.input_metadata import InputMetadata from vllm.model_executor.sampling_metadata import SamplingMetadata @@ -42,13 +43,6 @@ hf_model_weights_iterator) from vllm.sequence import SamplerOutput -try: - from transformers import Starcoder2Config -except ImportError: - # fallback to PretrainedConfig - # NOTE: Please install transformers from source or use transformers>=4.39.0 - from transformers import PretrainedConfig as Starcoder2Config - KVCache = Tuple[torch.Tensor, torch.Tensor] diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 081e81768..dc2262489 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -9,7 +9,6 @@ "mpt": MPTConfig, "RefinedWeb": RWConfig, # For tiiuae/falcon-40b(-instruct) "RefinedWebModel": RWConfig, # For tiiuae/falcon-7b(-instruct) - "starcoder2": Starcoder2Config, "jais": JAISConfig, } @@ -18,15 +17,6 @@ def get_config(model: str, trust_remote_code: bool, revision: Optional[str] = None, code_revision: Optional[str] = None) -> PretrainedConfig: - # FIXME(woosuk): This is a temporary fix for StarCoder2. - # Remove this when the model is supported by HuggingFace transformers. - if "bigcode" in model and "starcoder2" in model: - config_class = _CONFIG_REGISTRY["starcoder2"] - config = config_class.from_pretrained(model, - revision=revision, - code_revision=code_revision) - return config - try: config = AutoConfig.from_pretrained( model, diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py index 150ee2ce9..6fed2fab8 100644 --- a/vllm/transformers_utils/configs/__init__.py +++ b/vllm/transformers_utils/configs/__init__.py @@ -4,13 +4,11 @@ # tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the # `FalconConfig` class from the official HuggingFace transformers library. from vllm.transformers_utils.configs.falcon import RWConfig -from vllm.transformers_utils.configs.starcoder2 import Starcoder2Config from vllm.transformers_utils.configs.jais import JAISConfig __all__ = [ "ChatGLMConfig", "MPTConfig", "RWConfig", - "Starcoder2Config", "JAISConfig", ] diff --git a/vllm/transformers_utils/configs/starcoder2.py b/vllm/transformers_utils/configs/starcoder2.py deleted file mode 100644 index 2879cd044..000000000 --- a/vllm/transformers_utils/configs/starcoder2.py +++ /dev/null @@ -1,55 +0,0 @@ -from transformers import PretrainedConfig - - -class Starcoder2Config(PretrainedConfig): - model_type = "starcoder2" - keys_to_ignore_at_inference = ["past_key_values"] - - def __init__( - self, - vocab_size=49152, - hidden_size=3072, - intermediate_size=12288, - num_hidden_layers=30, - num_attention_heads=24, - num_key_value_heads=2, - hidden_act="gelu_pytorch_tanh", - max_position_embeddings=4096, - initializer_range=0.018042, - norm_epsilon=1e-5, - use_cache=True, - bos_token_id=50256, - eos_token_id=50256, - rope_theta=10000.0, - sliding_window=None, - attention_dropout=0.0, - residual_dropout=0.0, - embedding_dropout=0.0, - use_bias=True, - **kwargs, - ): - self.vocab_size = vocab_size - self.max_position_embeddings = max_position_embeddings - self.hidden_size = hidden_size - self.intermediate_size = intermediate_size - self.num_hidden_layers = num_hidden_layers - self.num_attention_heads = num_attention_heads - self.sliding_window = sliding_window - self.use_bias = use_bias - self.num_key_value_heads = num_key_value_heads - self.hidden_act = hidden_act - self.initializer_range = initializer_range - self.norm_epsilon = norm_epsilon - self.use_cache = use_cache - self.rope_theta = rope_theta - self.attention_dropout = attention_dropout - self.residual_dropout = residual_dropout - self.embedding_dropout = embedding_dropout - - super().__init__( - bos_token_id=bos_token_id, - eos_token_id=eos_token_id, - **kwargs, - ) - if self.architectures is None: - self.architectures = ['Starcoder2ForCausalLM']