Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
… into main
  • Loading branch information
IlyasMoutawwakil committed Oct 8, 2024
2 parents 92d6230 + 5df5826 commit 0a6df5b
Show file tree
Hide file tree
Showing 30 changed files with 473 additions and 239 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/test_api_cuda.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ jobs:
contains( github.event.pull_request.labels.*.name, 'api_cuda')
}}

runs-on: [single-gpu, nvidia-gpu, a10, ci]
runs-on:
group: aws-g5-4xlarge-plus

container:
image: ghcr.io/huggingface/optimum-benchmark:latest-cuda
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/test_cli_cuda_onnxruntime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ jobs:
contains( github.event.pull_request.labels.*.name, 'cli_cuda_onnxruntime')
}}

runs-on: [single-gpu, nvidia-gpu, a10, ci]
runs-on:
group: aws-g5-4xlarge-plus

container:
image: ghcr.io/huggingface/optimum-benchmark:latest-cuda
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/test_cli_cuda_py_txi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ jobs:
contains( github.event.pull_request.labels.*.name, 'cli_cuda_py_txi')
}}

runs-on: [single-gpu, nvidia-gpu, a10, ci]
runs-on:
group: aws-g5-4xlarge-plus

steps:
- name: Checkout
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/test_cli_cuda_pytorch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ jobs:
contains( github.event.pull_request.labels.*.name, 'cli_cuda_pytorch_single_gpu')
}}

runs-on: [single-gpu, nvidia-gpu, a10, ci]
runs-on:
group: aws-g5-4xlarge-plus

container:
image: ghcr.io/huggingface/optimum-benchmark:latest-cuda
Expand Down Expand Up @@ -60,7 +61,8 @@ jobs:
contains( github.event.pull_request.labels.*.name, 'cli_cuda_pytorch_multi_gpu')
}}

runs-on: [multi-gpu, nvidia-gpu, 4-a10, ci]
runs-on:
group: aws-g5-12xlarge-plus

container:
image: ghcr.io/huggingface/optimum-benchmark:latest-cuda
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/test_cli_cuda_tensorrt_llm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ jobs:
contains( github.event.pull_request.labels.*.name, 'cli_cuda_tensorrt_llm')
}}

runs-on: [single-gpu, nvidia-gpu, a10, ci]
runs-on:
group: aws-g5-4xlarge-plus

container:
image: huggingface/optimum-nvidia:latest
Expand Down Expand Up @@ -60,7 +61,8 @@ jobs:
contains( github.event.pull_request.labels.*.name, 'cli_cuda_tensorrt_llm_multi_gpu')
}}

runs-on: [multi-gpu, nvidia-gpu, 4-a10, ci]
runs-on:
group: aws-g5-12xlarge-plus

container:
image: huggingface/optimum-nvidia:latest
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/test_cli_cuda_torch_ort.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ jobs:
contains( github.event.pull_request.labels.*.name, 'cli_cuda_torch_or_single_gpu')
}}

runs-on: [single-gpu, nvidia-gpu, a10, ci]
runs-on:
group: aws-g5-4xlarge-plus

container:
image: ghcr.io/huggingface/optimum-benchmark:latest-cuda-ort
Expand Down Expand Up @@ -61,7 +62,8 @@ jobs:
contains( github.event.pull_request.labels.*.name, 'cli_cuda_torch_ort_multi_gpu')
}}

runs-on: [multi-gpu, nvidia-gpu, 4-a10, ci]
runs-on:
group: aws-g5-12xlarge-plus

container:
image: ghcr.io/huggingface/optimum-benchmark:latest-cuda-ort
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/test_cli_cuda_vllm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ jobs:
contains( github.event.pull_request.labels.*.name, 'cli_cuda_vllm_single_gpu')
}}

runs-on: [single-gpu, nvidia-gpu, a10, ci]
runs-on:
group: aws-g5-4xlarge-plus

container:
image: vllm/vllm-openai:latest
Expand Down Expand Up @@ -60,7 +61,8 @@ jobs:
contains( github.event.pull_request.labels.*.name, 'cli_cuda_vllm_multi_gpu')
}}

runs-on: [multi-gpu, nvidia-gpu, 4-a10, ci]
runs-on:
group: aws-g5-12xlarge-plus

container:
image: vllm/vllm-openai:latest
Expand Down
3 changes: 3 additions & 0 deletions examples/_base_.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
log_report: true
print_report: true

# hydra/cli specific settings
hydra:
run:
Expand Down
42 changes: 32 additions & 10 deletions examples/pytorch_bert.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,45 @@
import os

from huggingface_hub import whoami

from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig
from optimum_benchmark.logging_utils import setup_logging

setup_logging(level="INFO", prefix="MAIN-PROCESS")
try:
USERNAME = whoami()["name"]
except Exception as e:
print(f"Failed to get username from Hugging Face Hub: {e}")
USERNAME = None

if __name__ == "__main__":
BENCHMARK_NAME = "pytorch_bert"
REPO_ID = f"IlyasMoutawwakil/{BENCHMARK_NAME}"
BENCHMARK_NAME = "pytorch_bert"


def run_benchmark():
launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn")
backend_config = PyTorchConfig(device="cuda", device_ids="0", no_weights=True, model="bert-base-uncased")
scenario_config = InferenceConfig(memory=True, latency=True, input_shapes={"batch_size": 1, "sequence_length": 128})

benchmark_config = BenchmarkConfig(
name=BENCHMARK_NAME, launcher=launcher_config, backend=backend_config, scenario=scenario_config
name=BENCHMARK_NAME,
launcher=launcher_config,
scenario=scenario_config,
backend=backend_config,
print_report=True,
log_report=True,
)
# benchmark_config.push_to_hub(repo_id=REPO_ID)

benchmark_report = Benchmark.launch(benchmark_config)
# benchmark_report.push_to_hub(repo_id=REPO_ID)

return benchmark_config, benchmark_report


if __name__ == "__main__":
level = os.environ.get("LOG_LEVEL", "INFO")
to_file = os.environ.get("LOG_TO_FILE", "0") == "1"
setup_logging(level=level, to_file=to_file, prefix="MAIN-PROCESS")

benchmark_config, benchmark_report = run_benchmark()
benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
# benchmark.push_to_hub(repo_id=REPO_ID)

if USERNAME is not None:
benchmark_config.push_to_hub(repo_id=f"{USERNAME}/benchmarks", subfolder=BENCHMARK_NAME)
benchmark_report.push_to_hub(repo_id=f"{USERNAME}/benchmarks", subfolder=BENCHMARK_NAME)
benchmark.push_to_hub(repo_id=f"{USERNAME}/benchmarks", subfolder=BENCHMARK_NAME)
49 changes: 32 additions & 17 deletions examples/pytorch_llama.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
import os

from huggingface_hub import whoami

from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig
from optimum_benchmark.logging_utils import setup_logging

try:
USERNAME = whoami()["name"]
except Exception as e:
print(f"Failed to get username from Hugging Face Hub: {e}")
USERNAME = None

BENCHMARK_NAME = "pytorch-llama"

WEIGHTS_CONFIGS = {
Expand All @@ -11,16 +19,16 @@
"quantization_scheme": None,
"quantization_config": {},
},
# "4bit-awq-gemm": {
# "torch_dtype": "float16",
# "quantization_scheme": "awq",
# "quantization_config": {"bits": 4, "version": "gemm"},
# },
# "4bit-gptq-exllama-v2": {
# "torch_dtype": "float16",
# "quantization_scheme": "gptq",
# "quantization_config": {"bits": 4, "use_exllama ": True, "version": 2, "model_seqlen": 256},
# },
"4bit-awq-gemm": {
"torch_dtype": "float16",
"quantization_scheme": "awq",
"quantization_config": {"bits": 4, "version": "gemm"},
},
"4bit-gptq-exllama-v2": {
"torch_dtype": "float16",
"quantization_scheme": "gptq",
"quantization_config": {"bits": 4, "use_exllama ": True, "version": 2, "model_seqlen": 256},
},
}


Expand All @@ -42,16 +50,17 @@ def run_benchmark(weight_config: str):
input_shapes={"batch_size": 1, "sequence_length": 128},
generate_kwargs={"max_new_tokens": 32, "min_new_tokens": 32},
)

benchmark_config = BenchmarkConfig(
name=BENCHMARK_NAME, launcher=launcher_config, scenario=scenario_config, backend=backend_config
name=BENCHMARK_NAME,
launcher=launcher_config,
scenario=scenario_config,
backend=backend_config,
print_report=True,
log_report=True,
)
benchmark_report = Benchmark.launch(benchmark_config)
benchmark = Benchmark(config=benchmark_config, report=benchmark_report)

filename = f"{BENCHMARK_NAME}-{backend_config.version}-{weight_config}.json"
benchmark.push_to_hub(repo_id="optimum-benchmark/pytorch-llama", filename=filename)
benchmark.save_json(path=f"benchmarks/{filename}")
return benchmark_config, benchmark_report


if __name__ == "__main__":
Expand All @@ -60,4 +69,10 @@ def run_benchmark(weight_config: str):
setup_logging(level=level, to_file=to_file, prefix="MAIN-PROCESS")

for weight_config in WEIGHTS_CONFIGS:
run_benchmark(weight_config)
benchmark_config, benchmark_report = run_benchmark(weight_config)
benchmark = Benchmark(config=benchmark_config, report=benchmark_report)

if USERNAME is not None:
benchmark.push_to_hub(
repo_id=f"{USERNAME}/benchmarks", filename=f"{weight_config}.json", subfolder=BENCHMARK_NAME
)
17 changes: 17 additions & 0 deletions optimum_benchmark/backends/diffusers_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,18 @@


def get_diffusers_pretrained_config(model: str, **kwargs) -> Dict[str, int]:
if not is_diffusers_available():
raise ImportError("diffusers is not available. Please, pip install diffusers.")

config = DiffusionPipeline.load_config(model, **kwargs)
pipeline_config = config[0] if isinstance(config, tuple) else config
return pipeline_config


def extract_diffusers_shapes_from_model(model: str, **kwargs) -> Dict[str, int]:
if not is_diffusers_available():
raise ImportError("diffusers is not available. Please, pip install diffusers.")

model_config = get_diffusers_pretrained_config(model, **kwargs)

shapes = {}
Expand All @@ -56,6 +62,14 @@ def extract_diffusers_shapes_from_model(model: str, **kwargs) -> Dict[str, int]:
shapes["height"] = vae_config["sample_size"]
shapes["width"] = vae_config["sample_size"]

elif "vae_decoder" in model_config:
vae_import_path = model_config["vae_decoder"]
vae_class = get_class(f"{vae_import_path[0]}.{vae_import_path[1]}")
vae_config = vae_class.load_config(model, subfolder="vae_decoder", **kwargs)
shapes["num_channels"] = vae_config["out_channels"]
shapes["height"] = vae_config["sample_size"]
shapes["width"] = vae_config["sample_size"]

elif "vae_encoder" in model_config:
vae_import_path = model_config["vae_encoder"]
vae_class = get_class(f"{vae_import_path[0]}.{vae_import_path[1]}")
Expand All @@ -74,6 +88,9 @@ def extract_diffusers_shapes_from_model(model: str, **kwargs) -> Dict[str, int]:


def get_diffusers_automodel_loader_for_task(task: str):
if not is_diffusers_available():
raise ImportError("diffusers is not available. Please, pip install diffusers.")

model_loader_name = TASKS_TO_MODEL_LOADERS[task]
model_loader_class = getattr(diffusers, model_loader_name)
return model_loader_class
8 changes: 3 additions & 5 deletions optimum_benchmark/backends/onnxruntime/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,11 +297,9 @@ def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
with Accelerator().split_between_processes(inputs=inputs, apply_padding=False) as process_inputs:
inputs = process_inputs

if self.config.library == "transformers":
for key, value in list(inputs.items()):
if key in ["position_ids", "token_type_ids"]:
if key not in self.pretrained_model.input_names:
inputs.pop(key)
for key in list(inputs.keys()):
if hasattr(self.pretrained_model, "input_names") and key not in self.pretrained_model.input_names:
inputs.pop(key)

for key, value in inputs.items():
if isinstance(value, torch.Tensor):
Expand Down
4 changes: 4 additions & 0 deletions optimum_benchmark/backends/openvino/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,10 @@ def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
with Accelerator().split_between_processes(inputs=inputs, apply_padding=False) as process_inputs:
inputs = process_inputs

for key in list(inputs.keys()):
if hasattr(self.pretrained_model, "input_names") and key not in self.pretrained_model.input_names:
inputs.pop(key)

return inputs

def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:
Expand Down
3 changes: 3 additions & 0 deletions optimum_benchmark/backends/peft_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,8 @@


def apply_peft(model: PreTrainedModel, peft_type: str, peft_config: Dict[str, Any]) -> PreTrainedModel:
if not is_peft_available():
raise ImportError("peft is not available. Please, pip install peft.")

peft_config = PEFT_TYPE_TO_CONFIG_MAPPING[peft_type](**peft_config)
return get_peft_model(model=model, peft_config=peft_config)
9 changes: 9 additions & 0 deletions optimum_benchmark/backends/timm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@


def get_timm_pretrained_config(model_name: str) -> PretrainedConfig:
if not is_timm_available():
raise ImportError("timm is not available. Please, pip install timm.")

model_source, model_name = parse_model_name(model_name)
if model_source == "hf-hub":
# For model names specified in the form `hf-hub:path/architecture_name@revision`,
Expand All @@ -21,6 +24,9 @@ def get_timm_pretrained_config(model_name: str) -> PretrainedConfig:


def extract_timm_shapes_from_config(config: PretrainedConfig) -> Dict[str, Any]:
if not is_timm_available():
raise ImportError("timm is not available. Please, pip install timm.")

artifacts_dict = {}

config_dict = {k: v for k, v in config.to_dict().items() if v is not None}
Expand Down Expand Up @@ -74,4 +80,7 @@ def extract_timm_shapes_from_config(config: PretrainedConfig) -> Dict[str, Any]:


def get_timm_automodel_loader():
if not is_timm_available():
raise ImportError("timm is not available. Please, pip install timm.")

return create_model
Loading

0 comments on commit 0a6df5b

Please sign in to comment.