huggingface · IlyasMoutawwakil · Jan 14, 2024 · Jan 13, 2024 · Jan 13, 2024 · Jan 13, 2024
diff --git a/.github/workflows/test_cpu_onnxruntime.yaml b/.github/workflows/test_cpu_onnxruntime.yaml
@@ -26,7 +26,7 @@ jobs:
       - name: Install requirements
         run: |
           pip install --upgrade pip
-          pip install -e .[test,onnxruntime,diffusers]
+          pip install -e .[test,onnxruntime,diffusers,timm]
 
       - name: Run tests
         run: |

diff --git a/.github/workflows/test_cpu_pytorch.yaml b/.github/workflows/test_cpu_pytorch.yaml
@@ -26,7 +26,7 @@ jobs:
       - name: Install requirements
         run: |
           pip install --upgrade pip
-          pip install -e .[test,diffusers]
+          pip install -e .[test,diffusers,timm]
 
       - name: Run tests
         run: |

diff --git a/.github/workflows/test_cuda_onnxruntime_inference.yaml b/.github/workflows/test_cuda_onnxruntime_inference.yaml
@@ -40,4 +40,4 @@ jobs:
           --workdir /workspace/optimum-benchmark
           --gpus '"device=0,1"'
           opt-bench-cuda:11.8.0
-          -c "pip install -e .[test,onnxruntime-gpu,diffusers] && pytest -k 'cuda and onnxruntime and inference' -x"
+          -c "pip install -e .[test,onnxruntime-gpu] && pytest -k 'cuda and onnxruntime and inference' -x"
diff --git a/.github/workflows/test_cuda_pytorch.yaml b/.github/workflows/test_cuda_pytorch.yaml
@@ -50,4 +50,4 @@ jobs:
           --gpus '"device=0,1"'
           --entrypoint /bin/bash
           opt-bench-cuda:${{ matrix.image.cuda_version }}
-          -c "pip install -e .[test,peft,diffusers,deepspeed] && pytest -k 'cuda and pytorch' -x"
+          -c "pip install -e .[test,peft,deepspeed] && pytest -k 'cuda and pytorch' -x"
diff --git a/.github/workflows/test_rocm_pytorch.yaml b/.github/workflows/test_rocm_pytorch.yaml
@@ -53,4 +53,4 @@ jobs:
           --device /dev/dri/renderD129
           --entrypoint /bin/bash
           opt-bench-rocm:${{ matrix.image.rocm_version }}
-          -c "pip install -e .[test,peft,diffusers,deepspeed] && pytest -k 'cuda and pytorch' -x"
+          -c "pip install -e .[test,peft,deepspeed] && pytest -k 'cuda and pytorch' -x"
diff --git a/.github/workflows/test_tensorrt_onnxruntime_inference.yaml b/.github/workflows/test_tensorrt_onnxruntime_inference.yaml
@@ -40,4 +40,4 @@ jobs:
           --gpus '"device=0,1"'
           --entrypoint /bin/bash
           opt-bench-tensorrt:22.12
-          -c "pip install -e .[test,onnxruntime-gpu,diffusers] && pytest -k 'tensorrt and onnxruntime and inference' -x"
+          -c "pip install -e .[test,onnxruntime-gpu] && pytest -k 'tensorrt and onnxruntime and inference' -x"
diff --git a/examples/pytorch_timm.yaml b/examples/pytorch_timm.yaml
@@ -0,0 +1,31 @@
+defaults:
+  - backend: pytorch # default backend
+  - launcher: process # default launcher
+  - benchmark: inference # default benchmark
+  - experiment # inheriting experiment schema
+  - _self_ # for hydra 1.1 compatibility
+  - override hydra/job_logging: colorlog # colorful logging
+  - override hydra/hydra_logging: colorlog # colorful logging
+
+model: timm/mobilenetv3_large_100.ra_in1k
+experiment_name: pytorch_timm
+device: cuda
+
+launcher:
+  device_isolation: true
+
+benchmark:
+  input_shapes:
+    batch_size: 1
+
+hydra:
+  run:
+    dir: runs/${experiment_name}
+  sweep:
+    dir: sweeps/${experiment_name}
+  job:
+    chdir: true
+    env_set:
+      OVERRIDE_BENCHMARKS: 1
+      CUDA_VISIBLE_DEVICES: 0
+      CUDA_DEVICE_ORDER: PCI_BUS_ID
diff --git a/optimum_benchmark/backends/base.py b/optimum_benchmark/backends/base.py
@@ -45,52 +45,47 @@ class Backend(Generic[BackendConfigT], ABC):
     pretrained_generation_config: Optional[GenerationConfig]
     automodel_class: Callable[..., PreTrainedModel]
 
-    def __init__(self, model: str, task: str, device: str, hub_kwargs: Dict[str, Any]):
+    def __init__(self, model: str, task: str, library: str, device: str, hub_kwargs: Dict[str, Any]):
         self.task = task
         self.model = model
         self.device = device
+        self.library = library
         self.hub_kwargs = hub_kwargs
 
-        if self.is_diffusion_pipeline():
-            self.library = "diffusers"
+        if self.library == "diffusers":
             self.model_type = self.task
             self.pretrained_config = None
             self.pretrained_processor = None
+        elif self.library == "timm":
+            from .timm_utils import get_pretrained_config
+
+            self.pretrained_config = get_pretrained_config(self.model)
+            self.model_type = self.pretrained_config.architecture
+            self.pretrained_processor = None
         else:
-            self.library = "transformers"
-            self.pretrained_config = AutoConfig.from_pretrained(
-                pretrained_model_name_or_path=self.model, **self.hub_kwargs
-            )
+            self.pretrained_config = AutoConfig.from_pretrained(self.model, **self.hub_kwargs)
             self.model_type = self.pretrained_config.model_type
 
             try:
-                # sometimes contains information about the model's
-                # input shapes that are not available in the config
-                self.pretrained_processor = AutoProcessor.from_pretrained(
-                    pretrained_model_name_or_path=self.model, **self.hub_kwargs
-                )
+                # sometimes contains information about the model's input shapes that are not available in the config
+                self.pretrained_processor = AutoProcessor.from_pretrained(self.model, **self.hub_kwargs)
             except ValueError:
                 # sometimes the processor is not available or can't be determined/detected
                 LOGGER.warning("Could not find the model's preprocessor")
                 self.pretrained_processor = None
 
-        if self.is_text_generation_model():
-            try:
-                self.pretrained_generation_config = GenerationConfig.from_pretrained(
-                    pretrained_model_name=self.model, **self.hub_kwargs
-                )
-            except Exception:
-                LOGGER.warning("Could not find the model's generation config")
-                self.pretrained_generation_config = None
-        else:
+        try:
+            self.pretrained_generation_config = GenerationConfig.from_pretrained(
+                pretrained_model_name=self.model, **self.hub_kwargs
+            )
+        except Exception:
             self.pretrained_generation_config = None
 
         self.automodel_class = get_model_class_for_task(
-            # TODO: make this configurable to add support for other frameworks
-            framework="pt",
-            task=self.task,
-            library=self.library,
             model_type=self.model_type,
+            library=self.library,
+            task=self.task,
+            framework="pt",
         )
 
     def is_text_generation_model(self) -> bool:
@@ -115,14 +110,6 @@ def prepare_for_inference(self, **kwargs) -> None:
         pass
 
     def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
-        # TODO: move this to only backends that need it (non cpu backends)
-        if self.is_diffusion_pipeline():
-            return inputs  # diffusion pipelines takes a list of strings
-        else:
-            LOGGER.info(f"\t+ Moving inputs tensors to device {self.device}")
-            for key, value in inputs.items():
-                inputs[key] = value.to(self.device)
-
         return inputs
 
     def forward(self, input: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput:
@@ -136,7 +123,7 @@ def train(self, **kwargs) -> TrainerState:
 
     @property
     def model_shapes(self) -> Dict[str, int]:
-        if self.is_diffusion_pipeline():
+        if self.library == "diffusers":
             model_shapes = extract_shapes_from_diffusion_pipeline(
                 pipeline=self.pretrained_model,
             )
@@ -167,3 +154,5 @@ def clean(self) -> None:
 
         if self.config.delete_cache:
             self.delete_hf_model_cache()
+
+        gc.collect()
diff --git a/optimum_benchmark/backends/neural_compressor/backend.py b/optimum_benchmark/backends/neural_compressor/backend.py
@@ -1,3 +1,4 @@
+import gc
 from logging import getLogger
 from tempfile import TemporaryDirectory
 from typing import Any, Dict
@@ -20,8 +21,8 @@
 class INCBackend(Backend[INCConfig]):
     NAME: str = "neural-compressor"
 
-    def __init__(self, model: str, task: str, device: str, hub_kwargs: Dict[str, Any]) -> None:
-        super().__init__(model, task, device, hub_kwargs)
+    def __init__(self, model: str, task: str, library: str, device: str, hub_kwargs: Dict[str, Any]) -> None:
+        super().__init__(model, task, library, device, hub_kwargs)
         self.validate_device()
         self.validate_task()
 
@@ -101,7 +102,16 @@ def quantize_automodel(self) -> None:
         )
         self.model = quantized_model_path
 
+    def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        if self.library == "diffusers":
+            return {"prompt": inputs["prompt"]}
+
+        return inputs
+
     def clean(self) -> None:
         super().clean()
+
         if hasattr(self, "tmpdir"):
             self.tmpdir.cleanup()
+
+        gc.collect()
diff --git a/optimum_benchmark/backends/onnxruntime/backend.py b/optimum_benchmark/backends/onnxruntime/backend.py
@@ -41,8 +41,8 @@
 class ORTBackend(Backend[ORTConfig]):
     NAME: str = "onnxruntime"
 
-    def __init__(self, model: str, task: str, device: str, hub_kwargs: Dict[str, Any]) -> None:
-        super().__init__(model, task, device, hub_kwargs)
+    def __init__(self, model: str, task: str, library: str, device: str, hub_kwargs: Dict[str, Any]) -> None:
+        super().__init__(model, task, library, device, hub_kwargs)
         self.validate_device()
         self.validate_task()
 
@@ -57,7 +57,7 @@ def validate_task(self) -> None:
     def configure(self, config: ORTConfig) -> None:
         super().configure(config)
 
-        if self.is_diffusion_pipeline():
+        if self.library == "diffusers":
             self.ortmodel_class = get_class(TASKS_TO_ORTSD[self.task])
         elif self.task in TASKS_TO_ORTMODELS:
             self.ortmodel_class = TASKS_TO_ORTMODELS[self.task]
@@ -345,17 +345,19 @@ def inputs_names(self) -> List[str]:
             return []
 
     def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
-        inputs = super().prepare_inputs(inputs)
-
-        if self.is_diffusion_pipeline():
-            return inputs
+        if self.library == "diffusers":
+            return {"prompt": inputs["prompt"]}
 
         for key in list(inputs.keys()):
             # sometimes optimum onnx exported models don't have inputs
             # that their pytorch counterparts have, for instance token_type_ids
             if key not in self.inputs_names:
                 inputs.pop(key)
 
+        LOGGER.info(f"\t+ Moving inputs tensors to device {self.device}")
+        for key, value in inputs.items():
+            inputs[key] = value.to(self.device)
+
         return inputs
 
     def prepare_for_inference(self, **kwargs) -> None:
@@ -416,12 +418,12 @@ def train(
     def clean(self) -> None:
         super().clean()
 
-        if self.device == "cuda":
-            LOGGER.info("\t+ Emptying CUDA cache")
-            torch.cuda.empty_cache()
-
         if hasattr(self, "tmpdir"):
             LOGGER.info("\t+ Cleaning temporary directory")
             self.tmpdir.cleanup()
 
+        if self.device == "cuda":
+            LOGGER.info("\t+ Emptying CUDA cache")
+            torch.cuda.empty_cache()
+
         gc.collect()
diff --git a/optimum_benchmark/backends/openvino/backend.py b/optimum_benchmark/backends/openvino/backend.py
@@ -1,3 +1,4 @@
+import gc
 import inspect
 from logging import getLogger
 from tempfile import TemporaryDirectory
@@ -18,8 +19,8 @@
 class OVBackend(Backend[OVConfig]):
     NAME: str = "openvino"
 
-    def __init__(self, model: str, task: str, device: str, hub_kwargs: Dict[str, Any]) -> None:
-        super().__init__(model, task, device, hub_kwargs)
+    def __init__(self, model: str, task: str, library: str, device: str, hub_kwargs: Dict[str, Any]) -> None:
+        super().__init__(model, task, library, device, hub_kwargs)
         self.validate_device()
         self.validate_task()
 
@@ -107,6 +108,12 @@ def quantize_automodel(self) -> None:
         )
         self.model = quantized_model_path
 
+    def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        if self.library == "diffusers":
+            return {"prompt": inputs["prompt"]}
+
+        return inputs
+
     def prepare_for_inference(self, **kwargs) -> None:
         if self.config.reshape:
             static_shapes = {
@@ -127,5 +134,8 @@ def prepare_for_inference(self, **kwargs) -> None:
 
     def clean(self) -> None:
         super().clean()
+
         if hasattr(self, "tmpdir"):
             self.tmpdir.cleanup()
+
+        gc.collect()