Skip to content

Commit

Permalink
misc
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Jul 30, 2024
1 parent 13a02e8 commit c551f03
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 59 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test_cli_cpu_llama_cpp.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: CLI CPU Llama.Cpp Tests
name: CLI CPU LlamaCpp Tests

on:
workflow_dispatch:
Expand Down
12 changes: 9 additions & 3 deletions optimum_benchmark/backends/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,25 +63,31 @@ def __init__(self, config: BackendConfigT):

elif self.config.library == "timm":
self.logger.info("\t+ Benchmarking a Timm model")
self.pretrained_config = get_timm_pretrained_config(self.config.model)
self.model_shapes = extract_timm_shapes_from_config(self.pretrained_config)
self.pretrained_config = get_timm_pretrained_config(self.config.model)
self.automodel_loader = get_timm_automodel_loader()
self.pretrained_processor = None
self.generation_config = None

elif self.config.library == "llama_cpp":
self.logger.info("\t+ Benchmarking a Llama.cpp model")
self.logger.info("\t+ Benchmarking a LlamaCpp model")
self.pretrained_processor = None
self.generation_config = None
self.pretrained_config = None
self.automodel_loader = None
self.model_shapes = {}

else:
self.logger.info("\t+ Benchmarking a Transformers model")
self.generation_config = get_transformers_generation_config(self.config.model, **self.config.model_kwargs)
self.pretrained_config = get_transformers_pretrained_config(self.config.model, **self.config.model_kwargs)
self.automodel_loader = get_transformers_automodel_loader_for_task(self.config.task)
self.pretrained_processor = get_transformers_pretrained_processor(
self.config.processor, **self.config.processor_kwargs
)
self.model_shapes = extract_transformers_shapes_from_artifacts(
self.pretrained_config, self.pretrained_processor
)
self.automodel_loader = get_transformers_automodel_loader_for_task(self.config.task)

def seed(self) -> None:
set_seed(self.config.seed)
Expand Down
17 changes: 12 additions & 5 deletions optimum_benchmark/backends/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,24 @@ def __post_init__(self):
self.processor = self.model

# TODO: add cache_dir, token, etc. to these methods
if self.library is None:
self.library = infer_library_from_model_name_or_path(
self.model,
self.model_kwargs.get("revision", None),
)

if self.task is None:
self.task = infer_task_from_model_name_or_path(
self.model, self.model_kwargs.get("revision", None), self.library
self.model,
self.model_kwargs.get("revision", None),
self.library,
)

if self.library is None:
self.library = infer_library_from_model_name_or_path(self.model, self.model_kwargs.get("revision", None))

if self.model_type is None:
self.model_type = infer_model_type_from_model_name_or_path(
self.model, self.model_kwargs.get("revision", None), self.library
self.model,
self.model_kwargs.get("revision", None),
self.library,
)

if self.device is None:
Expand Down
62 changes: 17 additions & 45 deletions optimum_benchmark/backends/llama_cpp/backend.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from tempfile import TemporaryDirectory
from typing import Any, Dict, Tuple
from typing import Any, Dict

from llama_cpp import Llama

Expand All @@ -10,13 +10,11 @@
class LlamaCppBackend(Backend[LlamaCppConfig]):
NAME: str = "llama_cpp"

pretrained_model: Llama

def __init__(self, config: LlamaCppConfig) -> None:
super().__init__(config)

if self.config.no_weights:
self.logger.info("\t+ Loading no weights model")
raise NotImplementedError("No weights model is not yet implemented")

def load(self) -> None:
self.logger.info("\t+ Creating backend temporary directory")
self.tmpdir = TemporaryDirectory()
Expand All @@ -35,10 +33,6 @@ def load_model_from_pretrained(self) -> None:
**self.llama_cpp_kwargs,
)

def validate_task(self) -> None:
if self.config.task not in ["text-generation"]:
raise ValueError(f"Task {self.config.task} not supported by {self.NAME}")

@property
def llama_cpp_kwargs(self) -> Dict[str, Any]:
return {
Expand All @@ -47,51 +41,29 @@ def llama_cpp_kwargs(self) -> Dict[str, Any]:
"echo": False,
}

def prepare_inputs(self, inputs: Dict[str, Any]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
def prepare_input_shapes(self, input_shapes: Dict[str, Any]) -> Dict[str, Any]:
if self.config.task == "text-generation":
if inputs["input_ids"].shape[0] != 1:
raise ValueError("Batch size must be 1 for Llama.cpp text generation")
if input_shapes["batch_size"] != 1:
raise ValueError("Batch size must be 1 for LlamaCpp text generation")

inputs = super().prepare_inputs(inputs)
inputs["tokens"] = inputs["input_ids"].squeeze()
return input_shapes

return inputs
elif self.config.task == "feature-extraction":
detokenized_batch = list(map(self.pretrained_model.detokenize, inputs["input_ids"]))
decoded_batch = [x.decode("utf-8") for x in detokenized_batch]
def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
if self.config.task == "text-generation":
return {"tokens": inputs["input_ids"].squeeze(0).tolist()}

inputs["input_str"] = decoded_batch
return inputs
elif self.config.task == "feature-extraction":
return {"input": [self.pretrained_model.detokenize(x).decode("utf-8") for x in inputs["input_ids"]]}

raise ValueError(f"Task {self.config.task} not supported by {self.NAME}")

def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> Any:
"""
Forward pass of the model\
Get the embeddings of the input tokens
"""

return self.pretrained_model.embed(inputs["input_str"])
self.pretrained_model.embed(**inputs["input"])

def prefill(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> list[int]:
"""
Prefill the model with the input tokens
We consider prefill as the time to first token, thus we evaluate the time it takes for the model to generate the first token
"""

next(self.pretrained_model.generate(tokens=inputs["tokens"]))
return inputs
next(self.pretrained_model.generate(**inputs))

def generate(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> list[int]:
"""
Generate new tokens from the pretrained model
"""

output = []

for token in self.pretrained_model.generate(tokens=inputs["tokens"]):
output.append(token)
if len(output) >= kwargs["max_new_tokens"]:
break

return output
generator = self.pretrained_model.generate(**inputs)
for _ in range(kwargs["max_new_tokens"]):
next(generator)
12 changes: 7 additions & 5 deletions optimum_benchmark/backends/llama_cpp/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,16 @@ class LlamaCppConfig(BackendConfig):
_target_: str = "optimum_benchmark.backends.llama_cpp.backend.LlamaCppBackend"

no_weights: bool = False
library: str = "llama_cpp"
filename: Optional[str] = None

def __post_init__(self):
self.library = "llama_cpp"
self.model_type = "llama_cpp"

super().__post_init__()

self.device = self.device.lower() # type: ignore
self.library = "llama_cpp"
if self.task not in ["feature-extraction", "text-generation"]:
raise NotImplementedError(f"Task {self.task} is not supported by LlamaCpp backend.")

if self.device not in ["cuda", "mps", "cpu"]:
raise ValueError(f"Llama.cpp Backend only supports 'cpu', 'mps' and 'cuda' devices, got {self.device}")
if self.no_weights:
raise NotImplementedError("`no_weights` benchmarking is not supported by LlamaCpp backend.")
1 change: 1 addition & 0 deletions optimum_benchmark/backends/pytorch/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,7 @@ def train(
training_data_collator: Callable[[List[Dict[str, Any]]], Dict[str, Any]],
) -> TrainerState:
self.logger.info(f"\t+ Wrapping training arguments with {TrainingArguments.__name__}")
training_arguments["use_cpu"] = self.config.device == "cpu"
training_arguments = TrainingArguments(**training_arguments)
self.logger.info(f"\t+ Wrapping model with {Trainer.__name__}")
trainer = Trainer(
Expand Down

0 comments on commit c551f03

Please sign in to comment.