Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pytorch & ORT Timm support #110

Merged
merged 9 commits into from
Jan 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test_cpu_onnxruntime.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
- name: Install requirements
run: |
pip install --upgrade pip
pip install -e .[test,onnxruntime,diffusers]
pip install -e .[test,onnxruntime,diffusers,timm]

- name: Run tests
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_cpu_pytorch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
- name: Install requirements
run: |
pip install --upgrade pip
pip install -e .[test,diffusers]
pip install -e .[test,diffusers,timm]

- name: Run tests
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_cuda_onnxruntime_inference.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ jobs:
--workdir /workspace/optimum-benchmark
--gpus '"device=0,1"'
opt-bench-cuda:11.8.0
-c "pip install -e .[test,onnxruntime-gpu,diffusers] && pytest -k 'cuda and onnxruntime and inference' -x"
-c "pip install -e .[test,onnxruntime-gpu] && pytest -k 'cuda and onnxruntime and inference' -x"
2 changes: 1 addition & 1 deletion .github/workflows/test_cuda_pytorch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,4 @@ jobs:
--gpus '"device=0,1"'
--entrypoint /bin/bash
opt-bench-cuda:${{ matrix.image.cuda_version }}
-c "pip install -e .[test,peft,diffusers,deepspeed] && pytest -k 'cuda and pytorch' -x"
-c "pip install -e .[test,peft,deepspeed] && pytest -k 'cuda and pytorch' -x"
2 changes: 1 addition & 1 deletion .github/workflows/test_rocm_pytorch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,4 @@ jobs:
--device /dev/dri/renderD129
--entrypoint /bin/bash
opt-bench-rocm:${{ matrix.image.rocm_version }}
-c "pip install -e .[test,peft,diffusers,deepspeed] && pytest -k 'cuda and pytorch' -x"
-c "pip install -e .[test,peft,deepspeed] && pytest -k 'cuda and pytorch' -x"
2 changes: 1 addition & 1 deletion .github/workflows/test_tensorrt_onnxruntime_inference.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ jobs:
--gpus '"device=0,1"'
--entrypoint /bin/bash
opt-bench-tensorrt:22.12
-c "pip install -e .[test,onnxruntime-gpu,diffusers] && pytest -k 'tensorrt and onnxruntime and inference' -x"
-c "pip install -e .[test,onnxruntime-gpu] && pytest -k 'tensorrt and onnxruntime and inference' -x"
31 changes: 31 additions & 0 deletions examples/pytorch_timm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
defaults:
- backend: pytorch # default backend
- launcher: process # default launcher
- benchmark: inference # default benchmark
- experiment # inheriting experiment schema
- _self_ # for hydra 1.1 compatibility
- override hydra/job_logging: colorlog # colorful logging
- override hydra/hydra_logging: colorlog # colorful logging

model: timm/mobilenetv3_large_100.ra_in1k
experiment_name: pytorch_timm
device: cuda

launcher:
device_isolation: true

benchmark:
input_shapes:
batch_size: 1

hydra:
run:
dir: runs/${experiment_name}
sweep:
dir: sweeps/${experiment_name}
job:
chdir: true
env_set:
OVERRIDE_BENCHMARKS: 1
CUDA_VISIBLE_DEVICES: 0
CUDA_DEVICE_ORDER: PCI_BUS_ID
57 changes: 23 additions & 34 deletions optimum_benchmark/backends/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,52 +45,47 @@ class Backend(Generic[BackendConfigT], ABC):
pretrained_generation_config: Optional[GenerationConfig]
automodel_class: Callable[..., PreTrainedModel]

def __init__(self, model: str, task: str, device: str, hub_kwargs: Dict[str, Any]):
def __init__(self, model: str, task: str, library: str, device: str, hub_kwargs: Dict[str, Any]):
self.task = task
self.model = model
self.device = device
self.library = library
self.hub_kwargs = hub_kwargs

if self.is_diffusion_pipeline():
self.library = "diffusers"
if self.library == "diffusers":
self.model_type = self.task
self.pretrained_config = None
self.pretrained_processor = None
elif self.library == "timm":
from .timm_utils import get_pretrained_config

self.pretrained_config = get_pretrained_config(self.model)
self.model_type = self.pretrained_config.architecture
self.pretrained_processor = None
else:
self.library = "transformers"
self.pretrained_config = AutoConfig.from_pretrained(
pretrained_model_name_or_path=self.model, **self.hub_kwargs
)
self.pretrained_config = AutoConfig.from_pretrained(self.model, **self.hub_kwargs)
self.model_type = self.pretrained_config.model_type

try:
# sometimes contains information about the model's
# input shapes that are not available in the config
self.pretrained_processor = AutoProcessor.from_pretrained(
pretrained_model_name_or_path=self.model, **self.hub_kwargs
)
# sometimes contains information about the model's input shapes that are not available in the config
self.pretrained_processor = AutoProcessor.from_pretrained(self.model, **self.hub_kwargs)
except ValueError:
# sometimes the processor is not available or can't be determined/detected
LOGGER.warning("Could not find the model's preprocessor")
self.pretrained_processor = None

if self.is_text_generation_model():
try:
self.pretrained_generation_config = GenerationConfig.from_pretrained(
pretrained_model_name=self.model, **self.hub_kwargs
)
except Exception:
LOGGER.warning("Could not find the model's generation config")
self.pretrained_generation_config = None
else:
try:
self.pretrained_generation_config = GenerationConfig.from_pretrained(
pretrained_model_name=self.model, **self.hub_kwargs
)
except Exception:
self.pretrained_generation_config = None

self.automodel_class = get_model_class_for_task(
# TODO: make this configurable to add support for other frameworks
framework="pt",
task=self.task,
library=self.library,
model_type=self.model_type,
library=self.library,
task=self.task,
framework="pt",
)

def is_text_generation_model(self) -> bool:
Expand All @@ -115,14 +110,6 @@ def prepare_for_inference(self, **kwargs) -> None:
pass

def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
# TODO: move this to only backends that need it (non cpu backends)
if self.is_diffusion_pipeline():
return inputs # diffusion pipelines takes a list of strings
else:
LOGGER.info(f"\t+ Moving inputs tensors to device {self.device}")
for key, value in inputs.items():
inputs[key] = value.to(self.device)

return inputs

def forward(self, input: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput:
Expand All @@ -136,7 +123,7 @@ def train(self, **kwargs) -> TrainerState:

@property
def model_shapes(self) -> Dict[str, int]:
if self.is_diffusion_pipeline():
if self.library == "diffusers":
model_shapes = extract_shapes_from_diffusion_pipeline(
pipeline=self.pretrained_model,
)
Expand Down Expand Up @@ -167,3 +154,5 @@ def clean(self) -> None:

if self.config.delete_cache:
self.delete_hf_model_cache()

gc.collect()
14 changes: 12 additions & 2 deletions optimum_benchmark/backends/neural_compressor/backend.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import gc
from logging import getLogger
from tempfile import TemporaryDirectory
from typing import Any, Dict
Expand All @@ -20,8 +21,8 @@
class INCBackend(Backend[INCConfig]):
NAME: str = "neural-compressor"

def __init__(self, model: str, task: str, device: str, hub_kwargs: Dict[str, Any]) -> None:
super().__init__(model, task, device, hub_kwargs)
def __init__(self, model: str, task: str, library: str, device: str, hub_kwargs: Dict[str, Any]) -> None:
super().__init__(model, task, library, device, hub_kwargs)
self.validate_device()
self.validate_task()

Expand Down Expand Up @@ -101,7 +102,16 @@ def quantize_automodel(self) -> None:
)
self.model = quantized_model_path

def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
if self.library == "diffusers":
return {"prompt": inputs["prompt"]}

return inputs

def clean(self) -> None:
super().clean()

if hasattr(self, "tmpdir"):
self.tmpdir.cleanup()

gc.collect()
24 changes: 13 additions & 11 deletions optimum_benchmark/backends/onnxruntime/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@
class ORTBackend(Backend[ORTConfig]):
NAME: str = "onnxruntime"

def __init__(self, model: str, task: str, device: str, hub_kwargs: Dict[str, Any]) -> None:
super().__init__(model, task, device, hub_kwargs)
def __init__(self, model: str, task: str, library: str, device: str, hub_kwargs: Dict[str, Any]) -> None:
super().__init__(model, task, library, device, hub_kwargs)
self.validate_device()
self.validate_task()

Expand All @@ -57,7 +57,7 @@ def validate_task(self) -> None:
def configure(self, config: ORTConfig) -> None:
super().configure(config)

if self.is_diffusion_pipeline():
if self.library == "diffusers":
self.ortmodel_class = get_class(TASKS_TO_ORTSD[self.task])
elif self.task in TASKS_TO_ORTMODELS:
self.ortmodel_class = TASKS_TO_ORTMODELS[self.task]
Expand Down Expand Up @@ -345,17 +345,19 @@ def inputs_names(self) -> List[str]:
return []

def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
inputs = super().prepare_inputs(inputs)

if self.is_diffusion_pipeline():
return inputs
if self.library == "diffusers":
return {"prompt": inputs["prompt"]}

for key in list(inputs.keys()):
# sometimes optimum onnx exported models don't have inputs
# that their pytorch counterparts have, for instance token_type_ids
if key not in self.inputs_names:
inputs.pop(key)

LOGGER.info(f"\t+ Moving inputs tensors to device {self.device}")
for key, value in inputs.items():
inputs[key] = value.to(self.device)

return inputs

def prepare_for_inference(self, **kwargs) -> None:
Expand Down Expand Up @@ -416,12 +418,12 @@ def train(
def clean(self) -> None:
super().clean()

if self.device == "cuda":
LOGGER.info("\t+ Emptying CUDA cache")
torch.cuda.empty_cache()

if hasattr(self, "tmpdir"):
LOGGER.info("\t+ Cleaning temporary directory")
self.tmpdir.cleanup()

if self.device == "cuda":
LOGGER.info("\t+ Emptying CUDA cache")
torch.cuda.empty_cache()

gc.collect()
14 changes: 12 additions & 2 deletions optimum_benchmark/backends/openvino/backend.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import gc
import inspect
from logging import getLogger
from tempfile import TemporaryDirectory
Expand All @@ -18,8 +19,8 @@
class OVBackend(Backend[OVConfig]):
NAME: str = "openvino"

def __init__(self, model: str, task: str, device: str, hub_kwargs: Dict[str, Any]) -> None:
super().__init__(model, task, device, hub_kwargs)
def __init__(self, model: str, task: str, library: str, device: str, hub_kwargs: Dict[str, Any]) -> None:
super().__init__(model, task, library, device, hub_kwargs)
self.validate_device()
self.validate_task()

Expand Down Expand Up @@ -107,6 +108,12 @@ def quantize_automodel(self) -> None:
)
self.model = quantized_model_path

def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
if self.library == "diffusers":
return {"prompt": inputs["prompt"]}

return inputs

def prepare_for_inference(self, **kwargs) -> None:
if self.config.reshape:
static_shapes = {
Expand All @@ -127,5 +134,8 @@ def prepare_for_inference(self, **kwargs) -> None:

def clean(self) -> None:
super().clean()

if hasattr(self, "tmpdir"):
self.tmpdir.cleanup()

gc.collect()
Loading
Loading