Merge branch 'main' of https://github.com/huggingface/optimum-benchmark…

… into main
huggingface · Oct 8, 2024 · 0a6df5b · 0a6df5b
2 parents 92d6230 + 5df5826
commit 0a6df5b
Show file tree

Hide file tree

Showing 30 changed files with 473 additions and 239 deletions.
diff --git a/.github/workflows/test_api_cuda.yaml b/.github/workflows/test_api_cuda.yaml
@@ -29,7 +29,8 @@ jobs:
       contains( github.event.pull_request.labels.*.name, 'api_cuda')
       }}
 
-    runs-on: [single-gpu, nvidia-gpu, a10, ci]
+    runs-on:
+      group: aws-g5-4xlarge-plus
 
     container:
       image: ghcr.io/huggingface/optimum-benchmark:latest-cuda

diff --git a/.github/workflows/test_cli_cuda_onnxruntime.yaml b/.github/workflows/test_cli_cuda_onnxruntime.yaml
@@ -30,7 +30,8 @@ jobs:
       contains( github.event.pull_request.labels.*.name, 'cli_cuda_onnxruntime')
       }}
 
-    runs-on: [single-gpu, nvidia-gpu, a10, ci]
+    runs-on:
+      group: aws-g5-4xlarge-plus
 
     container:
       image: ghcr.io/huggingface/optimum-benchmark:latest-cuda

diff --git a/.github/workflows/test_cli_cuda_py_txi.yaml b/.github/workflows/test_cli_cuda_py_txi.yaml
@@ -30,7 +30,8 @@ jobs:
       contains( github.event.pull_request.labels.*.name, 'cli_cuda_py_txi')
       }}
 
-    runs-on: [single-gpu, nvidia-gpu, a10, ci]
+    runs-on:
+      group: aws-g5-4xlarge-plus
 
     steps:
       - name: Checkout

diff --git a/.github/workflows/test_cli_cuda_pytorch.yaml b/.github/workflows/test_cli_cuda_pytorch.yaml
@@ -31,7 +31,8 @@ jobs:
       contains( github.event.pull_request.labels.*.name, 'cli_cuda_pytorch_single_gpu')
       }}
 
-    runs-on: [single-gpu, nvidia-gpu, a10, ci]
+    runs-on:
+      group: aws-g5-4xlarge-plus
 
     container:
       image: ghcr.io/huggingface/optimum-benchmark:latest-cuda
@@ -60,7 +61,8 @@ jobs:
       contains( github.event.pull_request.labels.*.name, 'cli_cuda_pytorch_multi_gpu')
       }}
 
-    runs-on: [multi-gpu, nvidia-gpu, 4-a10, ci]
+    runs-on:
+      group: aws-g5-12xlarge-plus
 
     container:
       image: ghcr.io/huggingface/optimum-benchmark:latest-cuda

diff --git a/.github/workflows/test_cli_cuda_tensorrt_llm.yaml b/.github/workflows/test_cli_cuda_tensorrt_llm.yaml
@@ -31,7 +31,8 @@ jobs:
       contains( github.event.pull_request.labels.*.name, 'cli_cuda_tensorrt_llm')
       }}
 
-    runs-on: [single-gpu, nvidia-gpu, a10, ci]
+    runs-on:
+      group: aws-g5-4xlarge-plus
 
     container:
       image: huggingface/optimum-nvidia:latest
@@ -60,7 +61,8 @@ jobs:
       contains( github.event.pull_request.labels.*.name, 'cli_cuda_tensorrt_llm_multi_gpu')
       }}
 
-    runs-on: [multi-gpu, nvidia-gpu, 4-a10, ci]
+    runs-on:
+      group: aws-g5-12xlarge-plus
 
     container:
       image: huggingface/optimum-nvidia:latest

diff --git a/.github/workflows/test_cli_cuda_torch_ort.yaml b/.github/workflows/test_cli_cuda_torch_ort.yaml
@@ -31,7 +31,8 @@ jobs:
       contains( github.event.pull_request.labels.*.name, 'cli_cuda_torch_or_single_gpu')
       }}
 
-    runs-on: [single-gpu, nvidia-gpu, a10, ci]
+    runs-on:
+      group: aws-g5-4xlarge-plus
 
     container:
       image: ghcr.io/huggingface/optimum-benchmark:latest-cuda-ort
@@ -61,7 +62,8 @@ jobs:
       contains( github.event.pull_request.labels.*.name, 'cli_cuda_torch_ort_multi_gpu')
       }}
 
-    runs-on: [multi-gpu, nvidia-gpu, 4-a10, ci]
+    runs-on:
+      group: aws-g5-12xlarge-plus
 
     container:
       image: ghcr.io/huggingface/optimum-benchmark:latest-cuda-ort

diff --git a/.github/workflows/test_cli_cuda_vllm.yaml b/.github/workflows/test_cli_cuda_vllm.yaml
@@ -31,7 +31,8 @@ jobs:
       contains( github.event.pull_request.labels.*.name, 'cli_cuda_vllm_single_gpu')
       }}
 
-    runs-on: [single-gpu, nvidia-gpu, a10, ci]
+    runs-on:
+      group: aws-g5-4xlarge-plus
 
     container:
       image: vllm/vllm-openai:latest
@@ -60,7 +61,8 @@ jobs:
       contains( github.event.pull_request.labels.*.name, 'cli_cuda_vllm_multi_gpu')
       }}
 
-    runs-on: [multi-gpu, nvidia-gpu, 4-a10, ci]
+    runs-on:
+      group: aws-g5-12xlarge-plus
 
     container:
       image: vllm/vllm-openai:latest

diff --git a/examples/_base_.yaml b/examples/_base_.yaml
@@ -1,3 +1,6 @@
+log_report: true
+print_report: true
+
 # hydra/cli specific settings
 hydra:
   run:

diff --git a/examples/pytorch_bert.py b/examples/pytorch_bert.py
@@ -1,23 +1,45 @@
+import os
+
+from huggingface_hub import whoami
+
 from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig
 from optimum_benchmark.logging_utils import setup_logging
 
-setup_logging(level="INFO", prefix="MAIN-PROCESS")
+try:
+    USERNAME = whoami()["name"]
+except Exception as e:
+    print(f"Failed to get username from Hugging Face Hub: {e}")
+    USERNAME = None
 
-if __name__ == "__main__":
-    BENCHMARK_NAME = "pytorch_bert"
-    REPO_ID = f"IlyasMoutawwakil/{BENCHMARK_NAME}"
+BENCHMARK_NAME = "pytorch_bert"
 
+
+def run_benchmark():
     launcher_config = ProcessConfig(device_isolation=True, device_isolation_action="warn")
     backend_config = PyTorchConfig(device="cuda", device_ids="0", no_weights=True, model="bert-base-uncased")
     scenario_config = InferenceConfig(memory=True, latency=True, input_shapes={"batch_size": 1, "sequence_length": 128})
-
     benchmark_config = BenchmarkConfig(
-        name=BENCHMARK_NAME, launcher=launcher_config, backend=backend_config, scenario=scenario_config
+        name=BENCHMARK_NAME,
+        launcher=launcher_config,
+        scenario=scenario_config,
+        backend=backend_config,
+        print_report=True,
+        log_report=True,
     )
-    # benchmark_config.push_to_hub(repo_id=REPO_ID)
-
     benchmark_report = Benchmark.launch(benchmark_config)
-    # benchmark_report.push_to_hub(repo_id=REPO_ID)
 
+    return benchmark_config, benchmark_report
+
+
+if __name__ == "__main__":
+    level = os.environ.get("LOG_LEVEL", "INFO")
+    to_file = os.environ.get("LOG_TO_FILE", "0") == "1"
+    setup_logging(level=level, to_file=to_file, prefix="MAIN-PROCESS")
+
+    benchmark_config, benchmark_report = run_benchmark()
     benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
-    # benchmark.push_to_hub(repo_id=REPO_ID)
+
+    if USERNAME is not None:
+        benchmark_config.push_to_hub(repo_id=f"{USERNAME}/benchmarks", subfolder=BENCHMARK_NAME)
+        benchmark_report.push_to_hub(repo_id=f"{USERNAME}/benchmarks", subfolder=BENCHMARK_NAME)
+        benchmark.push_to_hub(repo_id=f"{USERNAME}/benchmarks", subfolder=BENCHMARK_NAME)
diff --git a/examples/pytorch_llama.py b/examples/pytorch_llama.py
@@ -1,8 +1,16 @@
 import os
 
+from huggingface_hub import whoami
+
 from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, ProcessConfig, PyTorchConfig
 from optimum_benchmark.logging_utils import setup_logging
 
+try:
+    USERNAME = whoami()["name"]
+except Exception as e:
+    print(f"Failed to get username from Hugging Face Hub: {e}")
+    USERNAME = None
+
 BENCHMARK_NAME = "pytorch-llama"
 
 WEIGHTS_CONFIGS = {
@@ -11,16 +19,16 @@
         "quantization_scheme": None,
         "quantization_config": {},
     },
-    # "4bit-awq-gemm": {
-    #     "torch_dtype": "float16",
-    #     "quantization_scheme": "awq",
-    #     "quantization_config": {"bits": 4, "version": "gemm"},
-    # },
-    # "4bit-gptq-exllama-v2": {
-    #     "torch_dtype": "float16",
-    #     "quantization_scheme": "gptq",
-    #     "quantization_config": {"bits": 4, "use_exllama ": True, "version": 2, "model_seqlen": 256},
-    # },
+    "4bit-awq-gemm": {
+        "torch_dtype": "float16",
+        "quantization_scheme": "awq",
+        "quantization_config": {"bits": 4, "version": "gemm"},
+    },
+    "4bit-gptq-exllama-v2": {
+        "torch_dtype": "float16",
+        "quantization_scheme": "gptq",
+        "quantization_config": {"bits": 4, "use_exllama ": True, "version": 2, "model_seqlen": 256},
+    },
 }
 
 
@@ -42,16 +50,17 @@ def run_benchmark(weight_config: str):
         input_shapes={"batch_size": 1, "sequence_length": 128},
         generate_kwargs={"max_new_tokens": 32, "min_new_tokens": 32},
     )
-
     benchmark_config = BenchmarkConfig(
-        name=BENCHMARK_NAME, launcher=launcher_config, scenario=scenario_config, backend=backend_config
+        name=BENCHMARK_NAME,
+        launcher=launcher_config,
+        scenario=scenario_config,
+        backend=backend_config,
+        print_report=True,
+        log_report=True,
     )
     benchmark_report = Benchmark.launch(benchmark_config)
-    benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
 
-    filename = f"{BENCHMARK_NAME}-{backend_config.version}-{weight_config}.json"
-    benchmark.push_to_hub(repo_id="optimum-benchmark/pytorch-llama", filename=filename)
-    benchmark.save_json(path=f"benchmarks/{filename}")
+    return benchmark_config, benchmark_report
 
 
 if __name__ == "__main__":
@@ -60,4 +69,10 @@ def run_benchmark(weight_config: str):
     setup_logging(level=level, to_file=to_file, prefix="MAIN-PROCESS")
 
     for weight_config in WEIGHTS_CONFIGS:
-        run_benchmark(weight_config)
+        benchmark_config, benchmark_report = run_benchmark(weight_config)
+        benchmark = Benchmark(config=benchmark_config, report=benchmark_report)
+
+        if USERNAME is not None:
+            benchmark.push_to_hub(
+                repo_id=f"{USERNAME}/benchmarks", filename=f"{weight_config}.json", subfolder=BENCHMARK_NAME
+            )
diff --git a/optimum_benchmark/backends/diffusers_utils.py b/optimum_benchmark/backends/diffusers_utils.py
@@ -39,12 +39,18 @@
 
 
 def get_diffusers_pretrained_config(model: str, **kwargs) -> Dict[str, int]:
+    if not is_diffusers_available():
+        raise ImportError("diffusers is not available. Please, pip install diffusers.")
+
     config = DiffusionPipeline.load_config(model, **kwargs)
     pipeline_config = config[0] if isinstance(config, tuple) else config
     return pipeline_config
 
 
 def extract_diffusers_shapes_from_model(model: str, **kwargs) -> Dict[str, int]:
+    if not is_diffusers_available():
+        raise ImportError("diffusers is not available. Please, pip install diffusers.")
+
     model_config = get_diffusers_pretrained_config(model, **kwargs)
 
     shapes = {}
@@ -56,6 +62,14 @@ def extract_diffusers_shapes_from_model(model: str, **kwargs) -> Dict[str, int]:
         shapes["height"] = vae_config["sample_size"]
         shapes["width"] = vae_config["sample_size"]
 
+    elif "vae_decoder" in model_config:
+        vae_import_path = model_config["vae_decoder"]
+        vae_class = get_class(f"{vae_import_path[0]}.{vae_import_path[1]}")
+        vae_config = vae_class.load_config(model, subfolder="vae_decoder", **kwargs)
+        shapes["num_channels"] = vae_config["out_channels"]
+        shapes["height"] = vae_config["sample_size"]
+        shapes["width"] = vae_config["sample_size"]
+
     elif "vae_encoder" in model_config:
         vae_import_path = model_config["vae_encoder"]
         vae_class = get_class(f"{vae_import_path[0]}.{vae_import_path[1]}")
@@ -74,6 +88,9 @@ def extract_diffusers_shapes_from_model(model: str, **kwargs) -> Dict[str, int]:
 
 
 def get_diffusers_automodel_loader_for_task(task: str):
+    if not is_diffusers_available():
+        raise ImportError("diffusers is not available. Please, pip install diffusers.")
+
     model_loader_name = TASKS_TO_MODEL_LOADERS[task]
     model_loader_class = getattr(diffusers, model_loader_name)
     return model_loader_class
diff --git a/optimum_benchmark/backends/onnxruntime/backend.py b/optimum_benchmark/backends/onnxruntime/backend.py
@@ -297,11 +297,9 @@ def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
             with Accelerator().split_between_processes(inputs=inputs, apply_padding=False) as process_inputs:
                 inputs = process_inputs
 
-        if self.config.library == "transformers":
-            for key, value in list(inputs.items()):
-                if key in ["position_ids", "token_type_ids"]:
-                    if key not in self.pretrained_model.input_names:
-                        inputs.pop(key)
+        for key in list(inputs.keys()):
+            if hasattr(self.pretrained_model, "input_names") and key not in self.pretrained_model.input_names:
+                inputs.pop(key)
 
         for key, value in inputs.items():
             if isinstance(value, torch.Tensor):

diff --git a/optimum_benchmark/backends/openvino/backend.py b/optimum_benchmark/backends/openvino/backend.py
@@ -201,6 +201,10 @@ def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
             with Accelerator().split_between_processes(inputs=inputs, apply_padding=False) as process_inputs:
                 inputs = process_inputs
 
+        for key in list(inputs.keys()):
+            if hasattr(self.pretrained_model, "input_names") and key not in self.pretrained_model.input_names:
+                inputs.pop(key)
+
         return inputs
 
     def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:

diff --git a/optimum_benchmark/backends/peft_utils.py b/optimum_benchmark/backends/peft_utils.py
@@ -9,5 +9,8 @@
 
 
 def apply_peft(model: PreTrainedModel, peft_type: str, peft_config: Dict[str, Any]) -> PreTrainedModel:
+    if not is_peft_available():
+        raise ImportError("peft is not available. Please, pip install peft.")
+
     peft_config = PEFT_TYPE_TO_CONFIG_MAPPING[peft_type](**peft_config)
     return get_peft_model(model=model, peft_config=peft_config)
diff --git a/optimum_benchmark/backends/timm_utils.py b/optimum_benchmark/backends/timm_utils.py
@@ -10,6 +10,9 @@
 
 
 def get_timm_pretrained_config(model_name: str) -> PretrainedConfig:
+    if not is_timm_available():
+        raise ImportError("timm is not available. Please, pip install timm.")
+
     model_source, model_name = parse_model_name(model_name)
     if model_source == "hf-hub":
         # For model names specified in the form `hf-hub:path/architecture_name@revision`,
@@ -21,6 +24,9 @@ def get_timm_pretrained_config(model_name: str) -> PretrainedConfig:
 
 
 def extract_timm_shapes_from_config(config: PretrainedConfig) -> Dict[str, Any]:
+    if not is_timm_available():
+        raise ImportError("timm is not available. Please, pip install timm.")
+
     artifacts_dict = {}
 
     config_dict = {k: v for k, v in config.to_dict().items() if v is not None}
@@ -74,4 +80,7 @@ def extract_timm_shapes_from_config(config: PretrainedConfig) -> Dict[str, Any]:
 
 
 def get_timm_automodel_loader():
+    if not is_timm_available():
+        raise ImportError("timm is not available. Please, pip install timm.")
+
     return create_model