Skip to content

Commit

Permalink
ci/test energy star
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Nov 22, 2024
1 parent 1b79eac commit 5d267c0
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 16 deletions.
49 changes: 49 additions & 0 deletions .github/workflows/test_energy_star.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: CLI CUDA Energy Star Tests

on:
workflow_dispatch:
push:
branches:
- main
pull_request:
branches:
- main
types:
- opened
- reopened
- synchronize
- labeled
- unlabeled

concurrency:
cancel-in-progress: true
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}

jobs:
run_energy_star_single_gpu_tests:
if: ${{
(github.event_name == 'push') ||
(github.event_name == 'workflow_dispatch') ||
contains( github.event.pull_request.labels.*.name, 'single_gpu') ||
contains( github.event.pull_request.labels.*.name, 'energy_star') ||
contains( github.event.pull_request.labels.*.name, 'cli_cuda_pytorch_single_gpu')
}}

runs-on:
group: aws-g5-4xlarge-plus

container:
image: ghcr.io/huggingface/optimum-benchmark:latest-cuda
options: --ipc host --gpus all

steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install dependencies
run: |
pip install -e .[testing,diffusers,timm,codecarbon]
- name: Run tests
run: |
pytest tests/test_energy_star.py -x -s
2 changes: 1 addition & 1 deletion examples/energy_star/t5_question_answering.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ scenario:
context_column_name: context
dataset_prefix1: "question: "
dataset_prefix2: " context: "
t5_task: question-answering
t5_task: question_answering
num_samples: 1000

input_shapes:
Expand Down
2 changes: 1 addition & 1 deletion examples/energy_star/t5_text_classification.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ backend:
scenario:
dataset_name: EnergyStarAI/text_classification
dataset_prefix1: "sst2 sentence: "
t5_task: text-classification
t5_task: text_classification
text_column_name: text

num_samples: 1000
Expand Down
11 changes: 7 additions & 4 deletions optimum_benchmark/backends/transformers_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,13 @@ def extract_transformers_shapes_from_artifacts(
return shapes


def get_transformers_automodel_loader_for_task(task: str):
model_loader_name = TASKS_TO_MODEL_LOADERS[task]
model_loader_class = getattr(transformers, model_loader_name)
return model_loader_class
def get_transformers_automodel_loader_for_task(task: str, model_type: Optional[str] = None):
if model_type is not None:
model_loader_name = TASKS_TO_MODEL_TYPES_TO_MODEL_CLASSES[task][model_type]
else:
model_loader_name = TASKS_TO_MODEL_LOADERS[task]

return getattr(transformers, model_loader_name)


TORCH_INIT_FUNCTIONS = {
Expand Down
22 changes: 12 additions & 10 deletions optimum_benchmark/scenarios/energy_star/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,15 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport:

if self.task in TEXT_GENERATION_TASKS:
self.logger.info("\t+ Updating Text Generation kwargs with default values")
self.generate_kwargs = {**TEXT_GENERATION_DEFAULT_KWARGS, **self.config.generate_kwargs}
self.config.generate_kwargs = {**TEXT_GENERATION_DEFAULT_KWARGS, **self.config.generate_kwargs}
self.prefill_kwargs = {**self.config.generate_kwargs, **TEXT_GENERATION_PREFILL_OVERRIDES}
self.logger.info("\t+ Initializing Text Generation report")
self.report = BenchmarkReport.from_list(
targets=["load_dataset", "preprocess_dataset", "load_model", "prefill", "decode", "per_token"]
)
elif self.task in IMAGE_DIFFUSION_TASKS:
self.logger.info("\t+ Updating Image Diffusion kwargs with default values")
self.call_kwargs = {**IMAGE_DIFFUSION_DEFAULT_KWARGS, **self.config.call_kwargs}
self.config.call_kwargs = {**IMAGE_DIFFUSION_DEFAULT_KWARGS, **self.config.call_kwargs}
self.logger.info("\t+ Initializing Image Diffusion report")
self.report = BenchmarkReport.from_list(
targets=["load_dataset", "preprocess_dataset", "load_model", "call"]
Expand Down Expand Up @@ -146,9 +146,11 @@ def run_model_loading_energy_tracking(self, backend: Backend[BackendConfigT]):
# Text Generation warmup
def warmup_text_generation(self, backend: Backend[BackendConfigT]):
self.logger.info("\t+ Warming up backend for Text Generation")
backend.generate(self.prepared_sample_inputs, self.generate_kwargs)
backend.generate(self.prepared_sample_inputs, self.config.generate_kwargs)
for _ in range(self.config.warmup_runs):
backend.generate(self.prepared_sample_inputs, {**self.generate_kwargs, **TEXT_GENERATION_WARMUP_OVERRIDES})
backend.generate(
self.prepared_sample_inputs, {**self.config.generate_kwargs, **TEXT_GENERATION_WARMUP_OVERRIDES}
)

# Image Diffusion warmup
def warmup_image_diffusion(self, backend: Backend[BackendConfigT]):
Expand Down Expand Up @@ -183,7 +185,7 @@ def run_text_generation_energy_tracking(self, backend: Backend[BackendConfigT]):
with self.energy_tracker.track(file_prefix="generate"):
for i in tqdm(range(0, self.config.num_samples, self.config.input_shapes["batch_size"])):
inputs = backend.prepare_inputs(self.dataset[i : i + self.config.input_shapes["batch_size"]])
backend.generate(inputs, self.generate_kwargs)
backend.generate(inputs, self.config.generate_kwargs)

generate_energy = self.energy_tracker.get_energy()
decode_energy = generate_energy - prefill_energy
Expand Down Expand Up @@ -239,7 +241,7 @@ def dataset_forward_volume(self) -> int: # in samples
@property
def dataset_call_volume(self) -> int: # in images
if self.task == "text-to-image":
return self.config.num_samples * self.call_kwargs["num_images_per_prompt"]
return self.config.num_samples * self.config.call_kwargs["num_images_per_prompt"]
else:
return self.config.num_samples

Expand All @@ -249,7 +251,7 @@ def dataset_prefill_volume(self) -> int: # in tokens

for sample in self.dataset:
if "input_ids" in sample.keys():
# text/image-text conditioned generation (sequence_length tokens)
# text/image-text conditioned generation
prefill_volume += self.raw_sample_inputs["input_ids"].numel()
else:
# image/audio/other conditioned generation (1 bos token)
Expand All @@ -261,13 +263,13 @@ def dataset_prefill_volume(self) -> int: # in tokens
def dataset_per_token_volume(self) -> int: # in tokens
return (
self.config.num_samples
* self.generate_kwargs["num_beams"] # at each beam stage there are num_beams tokens generated
* self.config.generate_kwargs["num_beams"] # at each beam stage there are num_beams tokens generated
)

@property
def dataset_decode_volume(self) -> int: # in tokens
return (
self.config.num_samples
* self.generate_kwargs["num_beams"] # at each beam stage there are num_beams tokens generated
* (self.generate_kwargs["max_new_tokens"] - 1) # 1 token is generated during prefill
* self.config.generate_kwargs["num_beams"] # at each beam stage there are num_beams tokens generated
* (self.config.generate_kwargs["max_new_tokens"] - 1) # 1 token is generated during prefill
)
41 changes: 41 additions & 0 deletions tests/test_energy_star.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import os
from logging import getLogger
from pathlib import Path

import pytest

from optimum_benchmark.logging_utils import run_subprocess_and_log_stream_output

LOGGER = getLogger("test-cli")


TEST_CONFIG_DIR = Path(__file__).parent.parent / "examples/energy_star"
TEST_CONFIG_NAMES = [
config.split(".")[0]
for config in os.listdir(TEST_CONFIG_DIR)
if config.endswith(".yaml") and not (config.startswith("_") or config.endswith("_"))
]

ROCR_VISIBLE_DEVICES = os.environ.get("ROCR_VISIBLE_DEVICES", None)
CUDA_VISIBLE_DEVICES = os.environ.get("CUDA_VISIBLE_DEVICES", None)


@pytest.mark.parametrize("config_name", TEST_CONFIG_NAMES)
def test_cli_configs(config_name):
args = [
"optimum-benchmark",
"--config-dir",
TEST_CONFIG_DIR,
"--config-name",
config_name,
"scenario.num_samples=2",
"scenario.input_shapes.batch_size=2",
]

if ROCR_VISIBLE_DEVICES is not None:
args += [f'backend.device_ids="{ROCR_VISIBLE_DEVICES}"']
elif CUDA_VISIBLE_DEVICES is not None:
args += [f'backend.device_ids="{CUDA_VISIBLE_DEVICES}"']

popen = run_subprocess_and_log_stream_output(LOGGER, args)
assert popen.returncode == 0, f"Failed to run {config_name}"

0 comments on commit 5d267c0

Please sign in to comment.