diff --git a/.github/workflows/check_quality.yaml b/.github/workflows/check_quality.yaml
index 36b99f99..c2db2f09 100644
--- a/.github/workflows/check_quality.yaml
+++ b/.github/workflows/check_quality.yaml
@@ -21,7 +21,7 @@ jobs:
       - name: Set up Python 3.10
         uses: actions/setup-python@v3
         with:
-          python-version: '3.10'
+          python-version: "3.10"
 
       - name: Install quality requirements
         run: |
diff --git a/.github/workflows/test_api_cpu.yaml b/.github/workflows/test_api_cpu.yaml
index 752afab7..70b8c02c 100644
--- a/.github/workflows/test_api_cpu.yaml
+++ b/.github/workflows/test_api_cpu.yaml
@@ -21,7 +21,7 @@ jobs:
       - name: Set up Python 3.10
         uses: actions/setup-python@v3
         with:
-          python-version: '3.10'
+          python-version: "3.10"
 
       - name: Install dependencies
         run: |
diff --git a/.github/workflows/test_api_misc.yaml b/.github/workflows/test_api_misc.yaml
index df72ffb2..99a24217 100644
--- a/.github/workflows/test_api_misc.yaml
+++ b/.github/workflows/test_api_misc.yaml
@@ -21,7 +21,7 @@ jobs:
       - name: Set up Python 3.10
         uses: actions/setup-python@v3
         with:
-          python-version: '3.10'
+          python-version: "3.10"
 
       - name: Install requirements
         run: |
diff --git a/.github/workflows/test_cli_cpu_neural_compressor.yaml b/.github/workflows/test_cli_cpu_neural_compressor.yaml
index 9150a90f..2e9008bf 100644
--- a/.github/workflows/test_cli_cpu_neural_compressor.yaml
+++ b/.github/workflows/test_cli_cpu_neural_compressor.yaml
@@ -21,12 +21,12 @@ jobs:
       - name: Set up Python 3.10
         uses: actions/setup-python@v3
         with:
-          python-version: '3.10'
+          python-version: "3.10"
 
-      - name: Install Intel Neural Compressor CPU requirements
+      - name: Install requirements
         run: |
           pip install --upgrade pip
           pip install -e .[testing,neural-compressor,diffusers,timm]
 
-      - name: Run Intel Neural Compressor CPU tests
+      - name: Run CPU tests
         run: pytest -k "cli and cpu and neural_compressor"
diff --git a/.github/workflows/test_cli_cpu_onnxruntime.yaml b/.github/workflows/test_cli_cpu_onnxruntime.yaml
index e7caf218..486429af 100644
--- a/.github/workflows/test_cli_cpu_onnxruntime.yaml
+++ b/.github/workflows/test_cli_cpu_onnxruntime.yaml
@@ -21,7 +21,7 @@ jobs:
       - name: Set up Python 3.10
         uses: actions/setup-python@v3
         with:
-          python-version: '3.10'
+          python-version: "3.10"
 
       - name: Install requirements
         run: |
@@ -29,5 +29,4 @@ jobs:
           pip install -e .[testing,onnxruntime,diffusers,timm]
 
       - name: Run tests
-        run: |
-          pytest -k "cli and cpu and onnxruntime"
+        run: pytest -k "cli and cpu and onnxruntime"
diff --git a/.github/workflows/test_cli_cpu_openvino.yaml b/.github/workflows/test_cli_cpu_openvino.yaml
index 00b40aef..bbfa29a8 100644
--- a/.github/workflows/test_cli_cpu_openvino.yaml
+++ b/.github/workflows/test_cli_cpu_openvino.yaml
@@ -21,7 +21,7 @@ jobs:
       - name: Set up Python 3.10
         uses: actions/setup-python@v3
         with:
-          python-version: '3.10'
+          python-version: "3.10"
 
       - name: Install requirements
         run: |
@@ -29,5 +29,4 @@ jobs:
           pip install -e .[testing,openvino,diffusers,timm]
 
       - name: Run tests
-        run: |
-          pytest -k "cli and cpu and openvino"
+        run: pytest -k "cli and cpu and openvino"
diff --git a/.github/workflows/test_cli_cpu_py_tgi.yaml b/.github/workflows/test_cli_cpu_py_tgi.yaml
new file mode 100644
index 00000000..1ec01c84
--- /dev/null
+++ b/.github/workflows/test_cli_cpu_py_tgi.yaml
@@ -0,0 +1,35 @@
+name: CLI CPU Py-TGI Tests
+
+on:
+  workflow_dispatch:
+  push:
+    branches: [main]
+  pull_request:
+    types: [opened, reopened, synchronize]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  run_cli_cpu_py_tgi_tests:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v3
+        with:
+          python-version: "3.10"
+
+      - name: Install requirements
+        run: |
+          pip install --upgrade pip
+          pip install -e .[testing,py-tgi]
+
+      - name: Pull TGI docker image
+        run: docker pull ghcr.io/huggingface/text-generation-inference:latest
+
+      - name: Run tests
+        run: pytest -k "cli and cpu and py_tgi"
diff --git a/.github/workflows/test_cli_cpu_pytorch.yaml b/.github/workflows/test_cli_cpu_pytorch.yaml
index 3df5368b..332229b7 100644
--- a/.github/workflows/test_cli_cpu_pytorch.yaml
+++ b/.github/workflows/test_cli_cpu_pytorch.yaml
@@ -21,7 +21,7 @@ jobs:
       - name: Set up Python 3.10
         uses: actions/setup-python@v3
         with:
-          python-version: '3.10'
+          python-version: "3.10"
 
       - name: Install requirements
         run: |
@@ -29,5 +29,4 @@ jobs:
           pip install -e .[testing,diffusers,timm,peft]
 
       - name: Run tests
-        run: |
-          pytest -k "cli and cpu and pytorch"
+        run: pytest -k "cli and cpu and pytorch"
diff --git a/.github/workflows/test_cli_misc.yaml b/.github/workflows/test_cli_misc.yaml
index 5b55c0a7..1ca9d54f 100644
--- a/.github/workflows/test_cli_misc.yaml
+++ b/.github/workflows/test_cli_misc.yaml
@@ -21,7 +21,7 @@ jobs:
       - name: Set up Python 3.10
         uses: actions/setup-python@v3
         with:
-          python-version: '3.10'
+          python-version: "3.10"
 
       - name: Install requirements
         run: |
diff --git a/.github/workflows/test_cli_tensorrt_onnxruntime.yaml b/.github/workflows/test_cli_tensorrt_onnxruntime.yaml
index a98bfc15..1d52ee33 100644
--- a/.github/workflows/test_cli_tensorrt_onnxruntime.yaml
+++ b/.github/workflows/test_cli_tensorrt_onnxruntime.yaml
@@ -26,15 +26,20 @@ jobs:
           --tag opt-bench-tensorrt:latest
           .
 
+      - name: Get GPUs with most free memory
+        id: get_devices
+        run: |
+          echo "::set-output name=devices::$(nvidia-smi --query-gpu=memory.free,index --format=csv,noheader,nounits | sort -n -k1 | tail -n 2 | awk -F', ' '{print $2}' | xargs echo -n | sed 's/ /,/g' | awk '{print $0}')"
+
       - name: Run tests
         run: docker run
           --rm
           --pid host
           --shm-size 64G
           --env USE_CUDA="1"
+          --gpus '"device=${{ steps.get_devices.outputs.devices }}"'
           --volume $(pwd):/workspace/optimum-benchmark
           --workdir /workspace/optimum-benchmark
-          --gpus '"device=0,1"'
           --entrypoint /bin/bash
           opt-bench-tensorrt:latest
           -c "pip install -e .[testing,onnxruntime-gpu,diffusers,timm] && pytest -k 'cli and tensorrt and onnxruntime' -x"
diff --git a/Makefile b/Makefile
index 60493c16..61c77458 100644
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,28 @@
 # List of targets that are not associated with files
 .PHONY:	quality style install build_docker_cpu build_docker_cuda build_docker_rocm build_docker_tensorrt test_api_misc test_api_cpu test_api_cuda test_api_rocm test_api_tensorrt test_cli_misc test_cli_cpu_pytorch test_cli_cpu_neural_compressor test_cli_cpu_onnxruntime test_cli_cpu_openvino test_cli_cuda_pytorch test_cli_rocm_pytorch test_cli_tensorrt_onnxruntime test_cli_tensorrt_llm
 
+, := ,
 PWD := $(shell pwd)
 USER_ID := $(shell id -u)
 GROUP_ID := $(shell id -g)
 
+API_MISC_REQS := testing
+API_CPU_REQS := testing,timm,diffusers
+API_CUDA_REQS := testing,timm,diffusers
+API_ROCM_REQS := testing,timm,diffusers
+
+CLI_MISC_REQS := testing
+
+CLI_CUDA_ONNXRUNTIME_REQS := testing,timm,diffusers
+CLI_ROCM_ONNXRUNTIME_REQS := testing,timm,diffusers
+CLI_CUDA_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft
+CLI_ROCM_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft
+
+CLI_CPU_OPENVINO_REQS := testing,openvino,timm,diffusers
+CLI_CPU_PYTORCH_REQS := testing,timm,diffusers,deepspeed,peft
+CLI_CPU_ONNXRUNTIME_REQS := testing,onnxruntime,timm,diffusers
+CLI_CPU_NEURAL_COMPRESSOR_REQS := testing,neural-compressor,timm,diffusers
+
 quality:
 	ruff check .
 	ruff format --check .
@@ -18,49 +36,44 @@ install:
 
 ## Docker builds
 
+define build_docker
+	docker build -f docker/$(1).dockerfile  --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t opt-bench-$(1):local .
+endef
+
 build_docker_cpu:
-	docker build -f docker/cpu.dockerfile  --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t opt-bench-cpu:local .
+	$(call build_docker,cpu)
 
 build_docker_cuda:
-	docker build -f docker/cuda.dockerfile  --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t opt-bench-cuda:local . 
+	$(call build_docker,cuda)
 
 build_docker_rocm:
-	docker build -f docker/rocm.dockerfile  --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t opt-bench-rocm:local . 
-
-build_docker_tensorrt:
-	docker build -f docker/tensorrt.dockerfile  --build-arg USER_ID=$(USER_ID) --build-arg GROUP_ID=$(GROUP_ID) -t opt-bench-tensorrt:local .
-
-## API tests
+	$(call build_docker,rocm)
 
-test_api_misc:
-	docker run \
-	--rm \
-	--pid host \
-	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
-	--workdir /workspace \
-	opt-bench-cpu:local -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and not (cpu or cuda or rocm or tensorrt)' -x"
+## Tests
 
-test_api_cpu:
+define test_ubuntu
 	docker run \
 	--rm \
 	--pid host \
 	--entrypoint /bin/bash \
 	--volume $(PWD):/workspace \
 	--workdir /workspace \
-	opt-bench-cpu:local -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and cpu' -x"
+	opt-bench-$(1):local -c "pip install -e .[$(2)] && pytest tests/ -k '$(3)' -x"
+endef
 
-test_api_cuda:
+define test_nvidia
 	docker run \
 	--rm \
 	--pid host \
+	--shm-size 64G \
 	--gpus '"device=0,1"' \
 	--entrypoint /bin/bash \
 	--volume $(PWD):/workspace \
 	--workdir /workspace \
-	opt-bench-cuda:local -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and cuda' -x"
+	opt-bench-$(1):local -c "pip install -e .[$(2)] && pytest tests/ -k '$(3)' -x"
+endef
 
-test_api_rocm:
+define test_amdgpu
 	docker run \
 	--rm \
 	--pid host \
@@ -71,101 +84,44 @@ test_api_rocm:
 	--entrypoint /bin/bash \
 	--volume $(PWD):/workspace \
 	--workdir /workspace \
-	opt-bench-rocm:local -c "pip install -e .[testing,timm,diffusers] && pytest tests/ -k 'api and cuda' -x"
+	opt-bench-$(1):local -c "pip install -e .[$(2)] && pytest tests/ -k '$(3)' -x"
+endef
 
-## CLI tests
+# group the extra
+test_api_cpu:
+	$(call test_ubuntu,cpu,$(API_CPU_REQS),api and cpu)
 
-### CLI CPU tests
+test_api_cuda:
+	$(call test_nvidia,cuda,$(API_CUDA_REQS),api and cuda)
 
-test_cli_misc:
-	docker run \
-	--rm \
-	--pid host \
-	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
-	--workdir /workspace \
-	opt-bench-cpu:local -c "pip install -e .[testing,diffusers,timm] && pytest tests/ -k 'cli and not (cpu or cuda or rocm or tensorrt)' -x"
+test_api_rocm:
+	$(call test_amdgpu,rocm,$(API_ROCM_REQS),api and rocm)
 
-test_cli_cpu_pytorch:
-	docker run \
-	--rm \
-	--pid host \
-	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
-	--workdir /workspace \
-	opt-bench-cpu:local -c "pip install -e .[testing,diffusers,timm] && pytest tests/ -k 'cli and cpu and pytorch' -x"
+test_api_misc:
+	$(call test_ubuntu,cpu,$(API_MISC_REQS),api and not (cpu or cuda or rocm or tensorrt))
 
-test_cli_cpu_neural_compressor:
-	docker run \
-	--rm \
-	--pid host \
-	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
-	--workdir /workspace \
-	opt-bench-cpu:local -c "pip install -e .[testing,neural-compressor,diffusers,timm] && pytest tests/ -k 'cli and cpu and neural_compressor' -x"
+## CLI tests
 
-test_cli_cpu_onnxruntime:
-	docker run \
-	--rm \
-	--pid host \
-	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
-	--workdir /workspace \
-	opt-bench-cpu:local -c "pip install -e .[testing,onnxruntime,diffusers,timm] && pytest tests/ -k 'cli and cpu and onnxruntime' -x"
+test_cli_cuda_pytorch:
+	$(call test_nvidia,cuda,$(CLI_CUDA_PYTORCH_REQS),cli and cuda and pytorch)
 
-test_cli_cpu_openvino:
-	docker run \
-	--rm \
-	--pid host \
-	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
-	--workdir /workspace \
-	opt-bench-cpu:local -c "pip install -e .[testing,openvino,diffusers,timm] && pytest tests/ -k 'cli and cpu and openvino' -x"
+test_cli_rocm_pytorch:
+	$(call test_amdgpu,rocm,$(CLI_ROCM_PYTORCH_REQS),cli and cuda and pytorch and peft)
 
-### CLI GPU tests
+test_cli_cuda_onnxruntime:
+	$(call test_nvidia,cuda,$(CLI_CUDA_ONNXRUNTIME_REQS),cli and cuda and onnxruntime)
 
-test_cli_cuda_pytorch:
-	docker run \
-	--rm \
-	--pid host \
-	--shm-size 64G \
-	--gpus '"device=0,1"' \
-	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
-	--workdir /workspace \
-	opt-bench-cuda:local -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest tests/ -k 'cli and cuda and pytorch' -x"
+test_cli_rocm_onnxruntime:
+	$(call test_amdgpu,rocm,$(CLI_ROCM_ONNXRUNTIME_REQS),cli and rocm and onnxruntime)
 
-test_cli_rocm_pytorch:
-	docker run \
-	--rm \
-	--pid host \
-	--shm-size 64G \
-	--device /dev/kfd \
-	--device /dev/dri/renderD128 \
-	--device /dev/dri/renderD129 \
-	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
-	--workdir /workspace \
-	opt-bench-rocm:local -c "pip install -e .[testing,diffusers,timm,deepspeed,peft] && pytest tests/ -k 'cli and cuda and pytorch' -x"
+test_cli_cpu_pytorch:
+	$(call test_ubuntu,cpu,$(CLI_CPU_PYTORCH_REQS),cli and cpu and pytorch)
 
-test_cli_tensorrt_onnxruntime:
-	docker run \
-	--rm \
-	--pid host \
-	--shm-size 64G \
-	--gpus '"device=0,1"' \
-	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
-	--workdir /workspace \
-	opt-bench-tensorrt:local -c "pip install -e .[testing,onnxruntime-gpu,diffusers,timm] && pytest tests/ -k 'cli and tensorrt and onnxruntime' -x"
+test_cli_cpu_openvino:
+	$(call test_ubuntu,cpu,$(CLI_CPU_OPENVINO_REQS),cli and cpu and openvino)
 
-test_cli_tensorrt_llm:
-	docker run \
-	--rm \
-	--pid host \
-	--shm-size 64G \
-	--gpus '"device=0,1"' \
-	--entrypoint /bin/bash \
-	--volume $(PWD):/workspace \
-	--workdir /workspace \
-	opt-bench-tensorrt-llm:local -c "pip install -e .[testing] && pip uninstall -y nvidia-ml-py && pytest tests/ -k 'cli and tensorrt and llm' -x"
+test_cli_cpu_onnxruntime:
+	$(call test_ubuntu,cpu,$(CLI_CPU_ONNXRUNTIME_REQS),cli and cpu and onnxruntime)
+
+test_cli_cpu_neural_compressor:
+	$(call test_ubuntu,cpu,$(CLI_CPU_NEURAL_COMPRESSOR_REQS),cli and cpu and neural-compressor)
diff --git a/README.md b/README.md
index 8b5e7368..fa34294b 100644
--- a/README.md
+++ b/README.md
@@ -58,14 +58,14 @@ pip install -e .
 
 Depending on the backends you want to use, you might need to install some extra dependencies:
 
-- Pytorch (default): `pip install optimum-benchmark`
+- PyTorch (default): `pip install optimum-benchmark`
 - OpenVINO: `pip install optimum-benchmark[openvino]`
 - Torch-ORT: `pip install optimum-benchmark[torch-ort]`
 - OnnxRuntime: `pip install optimum-benchmark[onnxruntime]`
 - TensorRT-LLM: `pip install optimum-benchmark[tensorrt-llm]`
 - OnnxRuntime-GPU: `pip install optimum-benchmark[onnxruntime-gpu]`
 - Intel Neural Compressor: `pip install optimum-benchmark[neural-compressor]`
-- Text Generation Inference: `pip install optimum-benchmark[text-generation-inference]`
+- Py-TGI: `pip install optimum-benchmark[py-tgi]`
 
 ### Running benchmarks from Python API 🧪
 
diff --git a/optimum_benchmark/backends/base.py b/optimum_benchmark/backends/base.py
index 8a6430d8..d9eaa5f3 100644
--- a/optimum_benchmark/backends/base.py
+++ b/optimum_benchmark/backends/base.py
@@ -5,7 +5,8 @@
 from logging import getLogger
 from typing import Any, ClassVar, Dict, Generic, Optional
 
-import numpy as np
+import datasets.utils.logging as datasets_logging
+import transformers.utils.logging as transformers_logging
 from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel, TrainerState
 
 from ..task_utils import get_automodel_class_for_task
@@ -20,6 +21,9 @@
     get_transformers_pretrained_processor,
 )
 
+datasets_logging.set_verbosity_error()
+transformers_logging.set_verbosity_error()
+
 LOGGER = getLogger("backend")
 
 
@@ -71,7 +75,6 @@ def __init__(self, config: BackendConfigT):
     def seed(self) -> None:
         LOGGER.info(f"\t+ Setting random seed to {self.config.seed}")
         random.seed(self.config.seed)
-        np.random.seed(self.config.seed)
 
     def prepare_for_inference(self, **kwargs) -> None:
         """
diff --git a/optimum_benchmark/backends/config.py b/optimum_benchmark/backends/config.py
index e8c9c231..623455b9 100644
--- a/optimum_benchmark/backends/config.py
+++ b/optimum_benchmark/backends/config.py
@@ -11,7 +11,12 @@
 
 LOGGER = getLogger("backend")
 
-HUB_KWARGS = {"revision": "main", "force_download": False, "local_files_only": False, "trust_remote_code": False}
+HUB_KWARGS = {
+    "revision": "main",
+    "force_download": False,
+    "local_files_only": False,
+    "trust_remote_code": False,
+}
 
 
 @dataclass
@@ -20,18 +25,18 @@ class BackendConfig(ABC):
     version: str
     _target_: str
 
-    seed: int = 42
-
     model: Optional[str] = None
+
+    task: Optional[str] = None
+    library: Optional[str] = None
+
     device: Optional[str] = None
     device_ids: Optional[str] = None
     # yes we use a string here instead of a list
     # because it's easier to pass in a yaml or from cli
     # and it's consistent with GPU environment variables
 
-    task: Optional[str] = None
-    library: Optional[str] = None
-
+    seed: int = 42
     inter_op_num_threads: Optional[int] = None
     intra_op_num_threads: Optional[int] = None
 
@@ -42,23 +47,28 @@ def __post_init__(self):
             raise ValueError("`model` must be specified.")
 
         if self.task is None:
-            self.task = infer_task_from_model_name_or_path(self.model)
+            self.task = infer_task_from_model_name_or_path(self.model, self.hub_kwargs.get("revision", None))
+
+        if self.library is None:
+            self.library = infer_library_from_model_name_or_path(self.model, self.hub_kwargs.get("revision", None))
 
         if self.device is None:
             self.device = "cuda" if is_nvidia_system() or is_rocm_system() else "cpu"
-            LOGGER.warning(f"`device` is not specified, defaulting to {self.device} based on system configuration.")
-
-        if self.device not in ["cuda", "cpu", "mps", "xla"]:
-            raise ValueError(f"`device` must be either `cuda`, `cpu`, `mps` or `xla`, but got {self.device}")
 
         if ":" in self.device:
-            # support pytorch device index notation
+            LOGGER.warning("`device` was specified using PyTorch format (e.g. `cuda:0`) which is not recommended.")
             self.device = self.device.split(":")[0]
             self.device_ids = self.device.split(":")[1]
+            LOGGER.warning(f"`device` and `device_ids` are now set to `{self.device}` and `{self.device_ids}`.")
+
+        if self.device not in ["cuda", "cpu", "mps", "xla"]:
+            raise ValueError(f"`device` must be either `cuda`, `cpu`, `mps` or `xla`, but got {self.device}")
 
         if self.device == "cuda":
             if self.device_ids is None:
+                LOGGER.warning("`device_ids` was not specified, using all available GPUs.")
                 self.device_ids = get_gpu_device_ids()
+                LOGGER.warning(f"`device_ids` is now set to `{self.device_ids}` based on system configuration.")
 
             os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
             os.environ["CUDA_VISIBLE_DEVICES"] = self.device_ids
@@ -69,21 +79,14 @@ def __post_init__(self):
                 os.environ["HIP_VISIBLE_DEVICES"] = self.device_ids
                 os.environ["ROCR_VISIBLE_DEVICES"] = self.device_ids
 
-        if self.library is None:
-            self.library = infer_library_from_model_name_or_path(self.model)
-
         if self.library not in ["transformers", "diffusers", "timm"]:
             raise ValueError(f"`library` must be either `transformers`, `diffusers` or `timm`, but got {self.library}")
 
         if self.inter_op_num_threads is not None:
-            if not isinstance(self.inter_op_num_threads, int):
-                raise ValueError(f"`inter_op_num_threads` must be an integer, but got {self.inter_op_num_threads}")
             if self.inter_op_num_threads == -1:
                 self.inter_op_num_threads = cpu_count()
 
         if self.intra_op_num_threads is not None:
-            if not isinstance(self.intra_op_num_threads, int):
-                raise ValueError(f"`intra_op_num_threads` must be an integer, but got {self.intra_op_num_threads}")
             if self.intra_op_num_threads == -1:
                 self.intra_op_num_threads = cpu_count()
 
diff --git a/optimum_benchmark/backends/diffusers_utils.py b/optimum_benchmark/backends/diffusers_utils.py
index 042ea3d0..a9b5b5a7 100644
--- a/optimum_benchmark/backends/diffusers_utils.py
+++ b/optimum_benchmark/backends/diffusers_utils.py
@@ -13,23 +13,25 @@ def get_diffusers_pretrained_config(model: str, **kwargs) -> Dict[str, int]:
 
 
 def extract_diffusers_shapes_from_model(model: str, **kwargs) -> Dict[str, int]:
-    config = diffusers.DiffusionPipeline.load_config(model, **kwargs)
+    model_config = get_diffusers_pretrained_config(model, **kwargs)
 
     shapes = {}
-    if "vae" in config:
-        vae_import_path = config["vae"]
+    if "vae" in model_config:
+        vae_import_path = model_config["vae"]
         vae_class = get_class(f"{vae_import_path[0]}.{vae_import_path[1]}")
         vae_config = vae_class.load_config(model, subfolder="vae", **kwargs)
         shapes["num_channels"] = vae_config["out_channels"]
         shapes["height"] = vae_config["sample_size"]
         shapes["width"] = vae_config["sample_size"]
-    elif "vae_encoder" in config:
-        vae_import_path = config["vae_encoder"]
+
+    elif "vae_encoder" in model_config:
+        vae_import_path = model_config["vae_encoder"]
         vae_class = get_class(f"{vae_import_path[0]}.{vae_import_path[1]}")
-        vae_config = vae_class.load_config(model, subfolder="vae", **kwargs)
+        vae_config = vae_class.load_config(model, subfolder="vae_encoder", **kwargs)
         shapes["num_channels"] = vae_config["out_channels"]
         shapes["height"] = vae_config["sample_size"]
         shapes["width"] = vae_config["sample_size"]
+
     else:
         shapes["num_channels"] = -1
         shapes["height"] = -1
diff --git a/optimum_benchmark/backends/neural_compressor/backend.py b/optimum_benchmark/backends/neural_compressor/backend.py
index cb70fdfc..3fecd3d9 100644
--- a/optimum_benchmark/backends/neural_compressor/backend.py
+++ b/optimum_benchmark/backends/neural_compressor/backend.py
@@ -8,19 +8,14 @@
 from hydra.utils import get_class
 from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig, TuningCriterion
 from optimum.intel.neural_compressor.quantization import INCQuantizer
-from transformers.modeling_utils import no_init_weights
 from transformers.utils import ModelOutput
-from transformers.utils.logging import set_verbosity_error
 
 from ...generators.dataset_generator import DatasetGenerator
 from ..base import Backend
-from ..transformers_utils import randomize_weights
+from ..transformers_utils import random_init_weights
 from .config import INCConfig
 from .utils import TASKS_TO_INCMODELS
 
-# disable transformers logging
-set_verbosity_error()
-
 LOGGER = getLogger("neural-compressor")
 
 
@@ -36,17 +31,30 @@ def __init__(self, config: INCConfig):
 
         if self.config.ptq_quantization:
             if self.config.no_weights:
+                LOGGER.info("\t+ Loading no weights AutoModel")
                 self.load_automodel_with_no_weights()
             else:
+                LOGGER.info("\t+ Loading pretrained AutoModel")
                 self.load_automodel_from_pretrained()
+
+            LOGGER.info("\t+ Applying post-training quantization")
             self.quantize_automodel()
-            self.delete_pretrained_model()
+
+            LOGGER.info("\t+ Loading quantized INCModel")
+            original_model, self.config.model = self.config.model, self.quantized_model
             self.load_incmodel_from_pretrained()
+            self.config.model = original_model
+
         elif self.config.no_weights:
+            LOGGER.info("\t+ Loading no weights INCModel")
             self.load_incmodel_with_no_weights()
+
         else:
+            LOGGER.info("\t+ Loading pretrained INCModel")
             self.load_incmodel_from_pretrained()
 
+        self.tmpdir.cleanup()
+
     def validate_task(self) -> None:
         if self.config.task not in TASKS_TO_INCMODELS:
             raise NotImplementedError(f"INCBackend does not support task {self.config.task}")
@@ -55,60 +63,52 @@ def validate_task(self) -> None:
         LOGGER.info(f"Using INCModel class {self.incmodel_class.__name__}")
 
     def load_automodel_from_pretrained(self) -> None:
-        LOGGER.info("\t+ Loading AutoModel from pretrained")
         self.pretrained_model = self.automodel_class.from_pretrained(self.config.model, **self.config.hub_kwargs)
 
     def create_no_weights_model(self) -> None:
-        LOGGER.info("\t+ Creating no weights model state_dict")
-        state_dict = torch.nn.Linear(1, 1).state_dict()
-
+        self.no_weights_model = os.path.join(self.tmpdir.name, "no_weights_model")
         LOGGER.info("\t+ Creating no weights model directory")
-        self.no_weights_model = os.path.join(self.tmpdir.name, "no_weights")
         os.makedirs(self.no_weights_model, exist_ok=True)
-
-        LOGGER.info("\t+ Saving no weights model pretrained config")
-        self.pretrained_config.save_pretrained(save_directory=self.no_weights_model)
-
-        LOGGER.info("\t+ Saving no weights model state_dict")
+        LOGGER.info("\t+ Creating no weights model state dict")
+        state_dict = torch.nn.Linear(1, 1).state_dict()
+        LOGGER.info("\t+ Saving no weights model pytorch_model.bin")
         torch.save(state_dict, os.path.join(self.no_weights_model, "pytorch_model.bin"))
 
+        if self.config.library == "transformers":
+            LOGGER.info("\t+ Saving no weights model pretrained config")
+            self.pretrained_config.save_pretrained(save_directory=self.no_weights_model)
+
     def load_automodel_with_no_weights(self) -> None:
+        LOGGER.info("\t+ Creating no weights model")
         self.create_no_weights_model()
 
-        with no_init_weights():
-            original_model = self.config.model
-            self.config.model = self.no_weights_model
-            LOGGER.info("\t+ Loading no weights model")
+        with random_init_weights():
+            original_model, self.config.model = self.config.model, self.no_weights_model
+            LOGGER.info("\t+ Loading no weights AutoModel")
             self.load_automodel_from_pretrained()
             self.config.model = original_model
 
-        LOGGER.info("\t+ Randomizing model weights")
-        randomize_weights(self.pretrained_model)
         LOGGER.info("\t+ Tying model weights")
         self.pretrained_model.tie_weights()
 
     def load_incmodel_from_pretrained(self) -> None:
-        LOGGER.info("\t+ Loading INCModel from pretrained")
         self.pretrained_model = self.incmodel_class.from_pretrained(self.config.model, **self.config.hub_kwargs)
 
     def load_incmodel_with_no_weights(self) -> None:
+        LOGGER.info("\t+ Creating no weights model")
         self.create_no_weights_model()
 
-        with no_init_weights():
-            original_model = self.config.model
-            self.config.model = self.no_weights_model
-            LOGGER.info("\t+ Loading no weights model")
+        with random_init_weights():
+            original_model, self.config.model = self.config.model, self.no_weights_model
+            LOGGER.info("\t+ Loading no weights INCModel")
             self.load_incmodel_from_pretrained()
             self.config.model = original_model
 
-        LOGGER.info("\t+ Randomizing model weights")
-        randomize_weights(self.pretrained_model.model)
         LOGGER.info("\t+ Tying model weights")
         self.pretrained_model.model.tie_weights()
 
     def quantize_automodel(self) -> None:
-        LOGGER.info("\t+ Attempting to quantize model")
-        quantized_model_path = f"{self.tmpdir.name}/quantized"
+        self.quantized_model = f"{self.tmpdir.name}/quantized_model"
         LOGGER.info("\t+ Processing quantization config")
         ptq_quantization_config = self.config.ptq_quantization_config.copy()
         ptq_quantization_config["accuracy_criterion"] = AccuracyCriterion(
@@ -139,7 +139,7 @@ def quantize_automodel(self) -> None:
 
         LOGGER.info("\t+ Quantizing model")
         quantizer.quantize(
-            save_directory=quantized_model_path,
+            save_directory=self.quantized_model,
             calibration_dataset=calibration_dataset,
             quantization_config=ptq_quantization_config,
             # TODO: add support for these
@@ -148,17 +148,20 @@ def quantize_automodel(self) -> None:
             file_name=None,
             batch_size=1,
         )
-        self.config.model = quantized_model_path
 
     def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        inputs = super().prepare_inputs(inputs)
+
         if self.config.library == "diffusers":
             return {"prompt": inputs["prompt"]}
 
         return inputs
 
+    @torch.inference_mode()
     def forward(self, input: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput:
         return self.pretrained_model(**input, **kwargs)
 
+    @torch.inference_mode()
     def generate(self, input: Dict[str, Any], kwargs: Dict[str, Any]) -> ModelOutput:
         return self.pretrained_model.generate(**input, **kwargs)
 
diff --git a/optimum_benchmark/backends/onnxruntime/backend.py b/optimum_benchmark/backends/onnxruntime/backend.py
index 33e2694a..64e51d8c 100644
--- a/optimum_benchmark/backends/onnxruntime/backend.py
+++ b/optimum_benchmark/backends/onnxruntime/backend.py
@@ -3,20 +3,12 @@
 from collections import OrderedDict
 from logging import getLogger
 from tempfile import TemporaryDirectory
-from typing import Any, Callable, Dict, List
+from typing import Any, Dict, List
 
 import torch
-from datasets import Dataset
 from hydra.utils import get_class
 from onnxruntime import SessionOptions
-from optimum.onnxruntime import (
-    ONNX_DECODER_NAME,
-    ONNX_DECODER_WITH_PAST_NAME,
-    ORTOptimizer,
-    ORTQuantizer,
-    ORTTrainer,
-    ORTTrainingArguments,
-)
+from optimum.onnxruntime import ONNX_DECODER_NAME, ONNX_DECODER_WITH_PAST_NAME, ORTOptimizer, ORTQuantizer
 from optimum.onnxruntime.configuration import (
     AutoCalibrationConfig,
     AutoOptimizationConfig,
@@ -26,19 +18,14 @@
     QuantizationConfig,
 )
 from safetensors.torch import save_file
-from transformers import TrainerCallback
-from transformers.modeling_utils import no_init_weights
-from transformers.utils.logging import set_verbosity_error
 
 from ...generators.dataset_generator import DatasetGenerator
 from ...task_utils import TEXT_GENERATION_TASKS
 from ..base import Backend
+from ..transformers_utils import random_init_weights
 from .config import ORTConfig
 from .utils import TASKS_TO_ORTMODELS, TASKS_TO_ORTSD, format_calibration_config, format_quantization_config
 
-# disable transformers logging
-set_verbosity_error()
-
 LOGGER = getLogger("onnxruntime")
 
 
@@ -49,82 +36,81 @@ def __init__(self, config: ORTConfig) -> None:
         super().__init__(config)
         self.validate_task()
 
-        if self.config.library == "diffusers":
-            self.ortmodel_class = get_class(TASKS_TO_ORTSD[self.config.task])
-            LOGGER.info(f"Using ORTDiffusion class {self.ortmodel_class.__name__}")
-        elif self.config.task in TASKS_TO_ORTMODELS:
-            self.ortmodel_class = get_class(TASKS_TO_ORTMODELS[self.config.task])
-            LOGGER.info(f"Using ORTModel class {self.ortmodel_class.__name__}")
-        else:
-            raise NotImplementedError(f"ORTBackend does not support task {self.config.task}")
+        self.session_options = SessionOptions()
+        if self.config.session_options:
+            LOGGER.info("\t+ Processing session options")
+            for key, value in self.config.session_options.items():
+                setattr(self.session_options, key, value)
 
         LOGGER.info("\t+ Creating backend temporary directory")
         self.tmpdir = TemporaryDirectory()
 
-        self.session_options = SessionOptions()
-        for key, value in self.config.session_options.items():
-            setattr(self.session_options, key, value)
-
         if self.config.no_weights:
+            LOGGER.info("\t+ Loading no weights ORTModel")
             self.load_ortmodel_with_no_weights()
         else:
+            LOGGER.info("\t+ Loading pretrained ORTModel")
             self.load_ortmodel_from_pretrained()
 
-        if self.is_trt_text_generation:
-            return
-
         if self.is_optimized or self.is_quantized:
-            original_model = self.config.model
-            self.config.model = self.pretrained_model.model_save_dir
+            original_model, self.config.model = self.config.model, self.pretrained_model.model_save_dir
 
         if self.is_optimized:
+            LOGGER.info("\t+ Applying ORT optimization")
             self.optimize_onnx_files()
+            self.config.model = self.optimized_model
 
         if self.is_quantized:
+            LOGGER.info("\t+ Applying ORT quantization")
             self.quantize_onnx_files()
+            self.config.model = self.quantized_model
 
         if self.is_optimized or self.is_quantized:
-            original_export = self.config.export
-            self.load_ortmodel_from_pretrained()  # load optimized/quantized model
-            self.config.export = original_export
-            self.config.model = original_model
+            original_export, self.config.export = self.config.export, False
+            LOGGER.info("\t+ Loading optimized/quantized ORTModel")
+            self.load_ortmodel_from_pretrained()
+            self.config.model, self.config.export = original_model, original_export
 
         self.validate_provider()
+        self.tmpdir.cleanup()
 
     def validate_task(self) -> None:
-        if self.config.task not in {**TASKS_TO_ORTMODELS, **TASKS_TO_ORTSD}:
+        if self.config.task in TASKS_TO_ORTSD:
+            self.ortmodel_class = get_class(TASKS_TO_ORTSD[self.config.task])
+            LOGGER.info(f"Using ORTStableDiffusion class {self.ortmodel_class.__name__}")
+        elif self.config.task in TASKS_TO_ORTMODELS:
+            self.ortmodel_class = get_class(TASKS_TO_ORTMODELS[self.config.task])
+            LOGGER.info(f"Using ORTModel class {self.ortmodel_class.__name__}")
+        else:
             raise NotImplementedError(f"ORTBackend does not support task {self.config.task}")
 
     def validate_provider(self) -> None:
-        assert (
-            self.pretrained_model.providers[0] == self.config.provider
-        ), f"{self.config.provider} is not first in providers list: {self.pretrained_model.providers}"
+        if not self.pretrained_model.providers[0] == self.config.provider:
+            raise ValueError(
+                f"{self.config.provider} is not first in providers list: {self.pretrained_model.providers}"
+            )
 
     def create_no_weights_model(self) -> None:
+        self.no_weights_model = os.path.join(self.tmpdir.name, "no_weights_model")
         LOGGER.info("\t+ Creating no weights model directory")
-        self.no_weights_model = os.path.join(self.tmpdir.name, "no_weights")
         os.makedirs(self.no_weights_model, exist_ok=True)
-
-        LOGGER.info("\t+ Saving pretrained config")
-        self.pretrained_config.save_pretrained(save_directory=self.no_weights_model)
-
         LOGGER.info("\t+ Creating no weights model state dict")
         state_dict = torch.nn.Linear(1, 1).state_dict()
+        LOGGER.info("\t+ Saving no weights model safetensors")
+        safetensors = os.path.join(self.no_weights_model, "model.safetensors")
+        save_file(tensors=state_dict, filename=safetensors, metadata={"format": "pt"})
 
-        LOGGER.info("\t+ Saving no weights model state dict")
-        save_file(
-            filename=os.path.join(self.no_weights_model, "model.safetensors"),
-            metadata={"format": "pt"},
-            tensors=state_dict,
-        )
+        if self.config.library == "transformers":
+            LOGGER.info("\t+ Saving no weights model pretrained config")
+            self.pretrained_config.save_pretrained(save_directory=self.no_weights_model)
 
     def load_ortmodel_with_no_weights(self) -> None:
+        LOGGER.info("\t+ Creating no weights model")
         self.create_no_weights_model()
 
-        with no_init_weights():
-            original_model = self.config.model
-            self.config.model = self.no_weights_model
-            LOGGER.info("\t+ Loading no weights model")
+        with random_init_weights():
+            original_model, self.config.model = self.config.model, self.no_weights_model
+            LOGGER.info("\t+ Loading no weights ORTModel")
             self.load_ortmodel_from_pretrained()
             self.config.model = original_model
 
@@ -140,10 +126,6 @@ def load_ortmodel_from_pretrained(self) -> None:
             **self.ortmodel_kwargs,
         )
 
-    @property
-    def is_trt_text_generation(self) -> bool:
-        return self.config.provider == "TensorrtExecutionProvider" and self.config.task in TEXT_GENERATION_TASKS
-
     @property
     def is_optimized(self) -> bool:
         return (self.config.auto_optimization is not None) or self.config.optimization
@@ -189,7 +171,7 @@ def inputs_names(self) -> List[str]:
 
     def optimize_onnx_files(self) -> None:
         LOGGER.info("\t+ Attempting optimization")
-        optimized_model_path = os.path.join(self.tmpdir.name, "optimized")
+        self.optimized_model = os.path.join(self.tmpdir.name, "optimized")
         LOGGER.info("\t+ Processing optimization config")
         if self.config.auto_optimization is not None:
             optimization_config = AutoOptimizationConfig.with_optimization_level(
@@ -206,24 +188,20 @@ def optimize_onnx_files(self) -> None:
         LOGGER.info("\t+ Optimizing ORTModel")
         optimizer.optimize(
             optimization_config,
-            save_dir=optimized_model_path,
+            save_dir=self.optimized_model,
             # TODO: add support for these
             use_external_data_format=None,
             one_external_file=True,
             file_suffix="",
         )
-
         if self.pretrained_processor is not None:
-            self.pretrained_processor.save_pretrained(optimized_model_path)
-
+            self.pretrained_processor.save_pretrained(self.optimized_model)
         if self.pretrained_config is not None:
-            self.pretrained_config.save_pretrained(optimized_model_path)
-
-        self.config.model = optimized_model_path
+            self.pretrained_config.save_pretrained(self.optimized_model)
 
     def quantize_onnx_files(self) -> None:
         LOGGER.info("\t+ Attempting quantization")
-        quantized_model_path = f"{self.tmpdir.name}/quantized"
+        self.quantized_model = f"{self.tmpdir.name}/quantized_model"
 
         if self.is_calibrated and len(self.onnx_files_names) > 1:
             raise NotImplementedError(
@@ -286,7 +264,7 @@ def quantize_onnx_files(self) -> None:
 
             LOGGER.info("\t+ Quantizing model")
             quantizer.quantize(
-                save_dir=quantized_model_path,
+                save_dir=self.quantized_model,
                 quantization_config=quantization_config,
                 calibration_tensors_range=calibration_tensors_range,
                 # TODO: add support for these (maybe)
@@ -294,56 +272,25 @@ def quantize_onnx_files(self) -> None:
                 preprocessor=None,
                 file_suffix="",
             )
-
         if self.pretrained_processor is not None:
-            self.pretrained_processor.save_pretrained(quantized_model_path)
-
+            self.pretrained_processor.save_pretrained(self.quantized_model)
         if self.pretrained_config is not None:
-            self.pretrained_config.save_pretrained(quantized_model_path)
-
-        self.config.model = quantized_model_path
-
-    def prepare_for_inference(self, **kwargs) -> None:
-        if self.is_trt_text_generation:
-            LOGGER.info("\t+ Creating dynamic shapes for Tensorrt engine. Engine creation might take a while.")
-            batch_size = kwargs["batch_size"]
-            max_new_tokens = kwargs["max_new_tokens"]
-            sequence_length = kwargs["sequence_length"]
-            self.config.provider_options = {
-                **self.config.provider_options,
-                "trt_profile_min_shapes": (
-                    f"input_ids:{batch_size}x{sequence_length},"
-                    f"attention_mask:{batch_size}x{sequence_length},"
-                    f"position_ids:{batch_size}x{sequence_length}"
-                ),
-                "trt_profile_max_shapes": (
-                    f"input_ids:{batch_size}x{sequence_length + max_new_tokens},"
-                    f"attention_mask:{batch_size}x{sequence_length + max_new_tokens},"
-                    f"position_ids:{batch_size}x{sequence_length + max_new_tokens}"
-                ),
-                "trt_profile_opt_shapes": (
-                    f"input_ids:{batch_size}x{sequence_length + max_new_tokens},"
-                    f"attention_mask:{batch_size}x{sequence_length + max_new_tokens},"
-                    f"position_ids:{batch_size}x{sequence_length + max_new_tokens}"
-                ),
-            }
-            self.load_ortmodel_from_pretrained()
-            self.validate_provider()
+            self.pretrained_config.save_pretrained(self.quantized_model)
 
     def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        inputs = super().prepare_inputs(inputs)
+
         if self.config.library == "diffusers":
+            return {"prompt": inputs["prompt"]}
+        else:
+            for key, value in list(inputs.items()):
+                if key in self.inputs_names:
+                    inputs[key] = value.to(self.config.device)
+                else:
+                    LOGGER.warning(f"Input {key} is not in expected inputs names. Removing it.")
+                    inputs.pop(key)
             return inputs
 
-        LOGGER.info(f"\t+ Moving inputs tensors to device {self.config.device}")
-        for key, value in list(inputs.items()):
-            if key in self.inputs_names:
-                inputs[key] = value.to(self.config.device)
-            else:
-                LOGGER.warning(f"Input {key} is not in expected inputs names. Removing it.")
-                inputs.pop(key)
-
-        return inputs
-
     def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:
         return self.pretrained_model.forward(**inputs, **kwargs)
 
@@ -353,29 +300,6 @@ def generate(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDic
     def call(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:
         return self.pretrained_model(**inputs, **kwargs)
 
-    def train(
-        self,
-        training_dataset: Dataset,
-        training_arguments: Dict[str, Any],
-        training_callbacks: List[TrainerCallback],
-        training_data_collator: Callable[[List[Dict[str, Any]]], Dict[str, Any]],
-    ) -> None:
-        LOGGER.info("\t+ Setting dataset format to `torch`")
-        training_dataset.set_format(type="torch", columns=list(training_dataset.features.keys()))
-        LOGGER.info("\t+ Wrapping training arguments with optimum.onnxruntime.ORTTrainingArguments")
-        training_arguments = ORTTrainingArguments(**training_arguments)
-        LOGGER.info("\t+ Wrapping model with optimum.onnxruntime.ORTTrainer")
-        trainer = ORTTrainer(
-            model=self.pretrained_model,
-            args=training_arguments,
-            callbacks=training_callbacks,
-            train_dataset=training_dataset,
-            data_collator=training_data_collator,
-        )
-        LOGGER.info("\t+ Starting training")
-        trainer.train()
-        LOGGER.info("\t+ Training finished successfully")
-
     def clean(self) -> None:
         super().clean()
 
diff --git a/optimum_benchmark/backends/onnxruntime/config.py b/optimum_benchmark/backends/onnxruntime/config.py
index 19ad747d..07101f78 100644
--- a/optimum_benchmark/backends/onnxruntime/config.py
+++ b/optimum_benchmark/backends/onnxruntime/config.py
@@ -1,10 +1,9 @@
-import os
 from dataclasses import dataclass, field
 from typing import Any, Dict, Optional
 
 from ...import_utils import onnxruntime_version
+from ...task_utils import TEXT_GENERATION_TASKS
 from ..config import BackendConfig
-from ..peft_utils import PEFT_CONFIGS, PEFT_TASKS_TYPES
 
 QUANTIZATION_CONFIG = {
     "is_static": False,
@@ -22,8 +21,6 @@
     # is_static is mandatory
 }
 
-TRT_PROVIDER_OPTIONS = {"trt_engine_cache_enable": True, "trt_engine_cache_path": "/tmp/trt_cache"}
-
 IO_BINDING_LIBRARIES = ["transformers", "timm"]
 IO_BINDING_PROVIDERS = ["CPUExecutionProvider", "CUDAExecutionProvider"]
 DEVICE_PROVIDER_MAP = {"cpu": "CPUExecutionProvider", "cuda": "CUDAExecutionProvider"}
@@ -46,7 +43,7 @@ class ORTConfig(BackendConfig):
 
     # provider options
     provider: Optional[str] = None
-    provider_options: Dict[str, Any] = field(default_factory=lambda: {})
+    provider_options: Dict[str, Any] = field(default_factory=dict)
 
     # inference options
     use_io_binding: Optional[bool] = None
@@ -76,10 +73,6 @@ class ORTConfig(BackendConfig):
     calibration: bool = False
     calibration_config: Dict[str, Any] = field(default_factory=dict)
 
-    # peft options
-    peft_strategy: Optional[str] = None
-    peft_config: Dict[str, Any] = field(default_factory=dict)
-
     def __post_init__(self):
         super().__post_init__()
 
@@ -95,9 +88,8 @@ def __post_init__(self):
         if self.use_io_binding is None:
             self.use_io_binding = self.provider in IO_BINDING_PROVIDERS and self.library in IO_BINDING_LIBRARIES
 
-        if self.provider == "TensorrtExecutionProvider":
-            self.provider_options = {**TRT_PROVIDER_OPTIONS, **self.provider_options}
-            os.makedirs(self.provider_options["trt_engine_cache_path"], exist_ok=True)
+        if self.provider == "TensorrtExecutionProvider" and self.task in TEXT_GENERATION_TASKS:
+            raise NotImplementedError("we don't support TensorRT for text generation tasks")
 
         if self.quantization:
             self.quantization_config = {**QUANTIZATION_CONFIG, **self.quantization_config}
@@ -118,14 +110,3 @@ def __post_init__(self):
 
         if self.calibration:
             self.calibration_config = {**CALIBRATION_CONFIG, **self.calibration_config}
-
-        if self.peft_strategy is not None:
-            if self.peft_strategy not in PEFT_CONFIGS:
-                raise ValueError(
-                    f"`peft_strategy` must be one of {list(PEFT_CONFIGS.keys())}. Got {self.peft_strategy} instead."
-                )
-            PEFT_CONFIG = PEFT_CONFIGS[self.peft_strategy]
-            self.peft_config = {**PEFT_CONFIG, **self.peft_config}
-
-            if self.peft_config["task_type"] is None:
-                raise ValueError(f"`peft_config.task_type` must be set to one of the following {PEFT_TASKS_TYPES}")
diff --git a/optimum_benchmark/backends/openvino/backend.py b/optimum_benchmark/backends/openvino/backend.py
index e883c3ac..ae2d2918 100644
--- a/optimum_benchmark/backends/openvino/backend.py
+++ b/optimum_benchmark/backends/openvino/backend.py
@@ -12,13 +12,12 @@
 from optimum.intel.openvino import OVConfig as OVQuantizationConfig  # naming conflict
 from optimum.intel.openvino import OVQuantizer
 from safetensors.torch import save_file
-from transformers.modeling_utils import no_init_weights
 from transformers.utils.logging import set_verbosity_error
 
 from ...generators.dataset_generator import DatasetGenerator
 from ...task_utils import TEXT_GENERATION_TASKS
 from ..base import Backend
-from ..transformers_utils import randomize_weights
+from ..transformers_utils import random_init_weights
 from .config import OVConfig
 from .utils import TASKS_TO_OVMODEL
 
@@ -35,27 +34,35 @@ def __init__(self, config: OVConfig) -> None:
         super().__init__(config)
         self.validate_task()
 
-        self.ovmodel_class = get_class(TASKS_TO_OVMODEL[self.config.task])
-        LOGGER.info(f"\t+ Using OVModel class {self.ovmodel_class.__name__}")
-
         if self.config.inter_op_num_threads is not None:
-            self.set_inter_op_num_threads()
+            LOGGER.info(f"\t+ Setting inter_op_num_threads to {self.config.inter_op_num_threads}")
+            self.config.openvino_config[properties.inference_num_threads()] = self.config.inter_op_num_threads
 
+        LOGGER.info("\t+ Creating backend temporary directory")
         self.tmpdir = TemporaryDirectory()
 
         if self.config.quantization:
             if self.config.no_weights:
+                LOGGER.info("\t+ Loading no weights AutoModel")
                 self.load_automodel_with_no_weights()
             else:
+                LOGGER.info("\t+ Loading pretrained AutoModel")
                 self.load_automodel_from_pretrained()
-            original_export = self.config.export
-            self.config.export = False
+
+            LOGGER.info("\t+ Applying post-training quantization")
             self.quantize_automodel()
+
+            original_model, self.config.model = self.config.model, self.quantized_model
+            original_export, self.config.export = self.config.export, False
+            LOGGER.info("\t+ Loading quantized OVModel")
             self.load_ovmodel_from_pretrained()
-            self.config.export = original_export
+            self.config.model, self.config.export = original_model, original_export
+
         elif self.config.no_weights:
+            LOGGER.info("\t+ Loading no weights OVModel")
             self.load_ovmodel_with_no_weights()
         else:
+            LOGGER.info("\t+ Loading pretrained OVModel")
             self.load_ovmodel_from_pretrained()
 
         self.tmpdir.cleanup()
@@ -64,40 +71,33 @@ def validate_task(self) -> None:
         if self.config.task not in TASKS_TO_OVMODEL:
             raise NotImplementedError(f"OVBackend does not support task {self.config.task}")
 
-    def set_inter_op_num_threads(self) -> None:
-        LOGGER.info(f"\t+ Setting inter_op_num_threads to {self.config.inter_op_num_threads}")
-        self.config.openvino_config[properties.inference_num_threads()] = self.config.inter_op_num_threads
+        self.ovmodel_class = get_class(TASKS_TO_OVMODEL[self.config.task])
+        LOGGER.info(f"\t+ Using OVModel class {self.ovmodel_class.__name__}")
 
     def create_no_weights_model(self) -> None:
+        self.no_weights_model = os.path.join(self.tmpdir.name, "no_weights_model")
         LOGGER.info("\t+ Creating no weights model directory")
-        self.no_weights_model = os.path.join(self.tmpdir.name, "no_weights")
         os.makedirs(self.no_weights_model, exist_ok=True)
-
-        LOGGER.info("\t+ Saving pretrained config")
-        self.pretrained_config.save_pretrained(save_directory=self.no_weights_model)
-
         LOGGER.info("\t+ Creating no weights model state dict")
         state_dict = torch.nn.Linear(1, 1).state_dict()
+        LOGGER.info("\t+ Saving no weights model safetensors")
+        safetensors = os.path.join(self.no_weights_model, "model.safetensors")
+        save_file(tensors=state_dict, filename=safetensors, metadata={"format": "pt"})
 
-        LOGGER.info("\t+ Saving no weights model state dict")
-        save_file(
-            filename=os.path.join(self.no_weights_model, "model.safetensors"),
-            metadata={"format": "pt"},
-            tensors=state_dict,
-        )
+        if self.config.library == "transformers":
+            LOGGER.info("\t+ Saving no weights model pretrained config")
+            self.pretrained_config.save_pretrained(save_directory=self.no_weights_model)
 
     def load_automodel_with_no_weights(self) -> None:
+        LOGGER.info("\t+ Creating no weights model")
         self.create_no_weights_model()
 
-        with no_init_weights():
-            original_model = self.config.model
-            self.config.model = self.no_weights_model
-            LOGGER.info("\t+ Loading no weights model")
+        with random_init_weights():
+            original_model, self.config.model = self.config.model, self.no_weights_model
+            LOGGER.info("\t+ Loading no weights AutoModel")
             self.load_automodel_from_pretrained()
             self.config.model = original_model
 
-        LOGGER.info("\t+ Randomizing weights")
-        randomize_weights(self.pretrained_model)
         LOGGER.info("\t+ Tying model weights")
         self.pretrained_model.tie_weights()
 
@@ -105,14 +105,16 @@ def load_automodel_from_pretrained(self) -> None:
         self.pretrained_model = self.automodel_class.from_pretrained(self.config.model, **self.config.hub_kwargs)
 
     def load_ovmodel_with_no_weights(self) -> None:
+        LOGGER.info("\t+ Creating no weights model")
         self.create_no_weights_model()
 
-        with no_init_weights():
-            original_model = self.config.model
-            self.config.model = self.no_weights_model
-            LOGGER.info("\t+ Loading OVModel with no weights")
+        with random_init_weights():
+            original_model, self.config.model = self.config.model, self.no_weights_model
+            original_export, self.config.export = self.config.export, True
+            LOGGER.info("\t+ Loading no weights OVModel")
             self.load_ovmodel_from_pretrained()
             self.config.model = original_model
+            self.config.export = original_export
 
     def load_ovmodel_from_pretrained(self) -> None:
         self.pretrained_model = self.ovmodel_class.from_pretrained(
@@ -135,7 +137,7 @@ def ovmodel_kwargs(self) -> Dict[str, Any]:
 
     def quantize_automodel(self) -> None:
         LOGGER.info("\t+ Attempting quantization")
-        quantized_model_path = f"{self.tmpdir.name}/quantized"
+        self.quantized_model = f"{self.tmpdir.name}/quantized_model"
         LOGGER.info("\t+ Processing quantization config")
         quantization_config = OVQuantizationConfig(**self.config.quantization_config)
         LOGGER.info("\t+ Creating quantizer")
@@ -154,7 +156,7 @@ def quantize_automodel(self) -> None:
 
         LOGGER.info("\t+ Quantizing model")
         quantizer.quantize(
-            save_directory=quantized_model_path,
+            save_directory=self.quantized_model,
             quantization_config=quantization_config,
             calibration_dataset=calibration_dataset,
             # TODO: add support for these (maybe)
@@ -164,7 +166,6 @@ def quantize_automodel(self) -> None:
             file_name=None,
             batch_size=1,
         )
-        self.config.model = quantized_model_path
 
     def prepare_for_inference(self, **kwargs) -> None:
         if self.config.reshape:
@@ -188,6 +189,8 @@ def prepare_for_inference(self, **kwargs) -> None:
             self.pretrained_model.compile()
 
     def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        inputs = super().prepare_inputs(inputs)
+
         if self.config.library == "diffusers":
             return {"prompt": inputs["prompt"]}
 
diff --git a/optimum_benchmark/backends/peft_utils.py b/optimum_benchmark/backends/peft_utils.py
index 8ec7d1fa..037d4e87 100644
--- a/optimum_benchmark/backends/peft_utils.py
+++ b/optimum_benchmark/backends/peft_utils.py
@@ -1,102 +1,13 @@
-from typing import Type
+from typing import Any, Dict
+
+from transformers import PreTrainedModel
 
 from ..import_utils import is_peft_available
 
 if is_peft_available():
-    from peft import (
-        AdaLoraConfig,
-        IA3Config,
-        LoraConfig,
-        PeftConfig,
-        PrefixTuningConfig,
-        PromptEncoderConfig,
-        PromptLearningConfig,
-    )
-
-PEFT_TASKS_TYPES = ["SEQ_CLS", "SEQ_2_SEQ_LM", "CAUSAL_LM", "TOKEN_CLS", "QUESTION_ANS", "FEATURE_EXTRACTION"]
-
-PEFT_CONFIG = {
-    "base_model_name_or_path": None,
-    "revision": None,  # str
-    "peft_type": None,  # PeftType: can't be changed anyway
-    "task_type": None,  # TaskType: SEQ_CLS, SEQ_2_SEQ_LM, CAUSAL_LM, TOKEN_CLS, QUESTION_ANS, FEATURE_EXTRACTION
-    "inference_mode": False,
-}
-LORA_CONFIG = {
-    **PEFT_CONFIG,  # inherits from PEFT_CONFIG
-    "auto_mapping": None,  # dict
-    "r": 8,  # int
-    "target_modules": None,  # List[str] | str
-    "lora_alpha": 8,  # int
-    "lora_dropout": 0,  # float
-    "fan_in_fan_out": False,  # bool
-    "bias": "none",  # str
-    "modules_to_save": None,  # List[str]
-    "init_lora_weights": True,  # bool
-    "layers_to_transform": None,  # List[int] | int
-    "layers_pattern": None,  # str
-}
-ADA_LORA_CONFIG = {
-    **LORA_CONFIG,  # inherits from LORA_CONFIG
-    "target_r": None,  # int
-    "init_r": None,  # int
-    "tinit": None,  # int
-    "tfinal": None,  # int
-    "deltaT": None,  # int
-    "beta1": None,  # float
-    "beta2": None,  # float
-    "orth_reg_weight": None,  # float
-    "total_step": None,  # Optional[int]
-    "rank_pattern": None,  # Optional[dict]
-}
-PROMPT_TUNING_CONFIG = {
-    **PEFT_CONFIG,  # inherits from PEFT_CONFIG
-    "num_virtual_tokens": None,  # int
-    "token_dim": None,  # int
-    "num_transformer_submodules": None,  # int
-    "num_attention_heads": None,  # int
-    "num_layers": None,  # int
-}
-PREFIX_TUNING_CONFIG = {
-    **PROMPT_TUNING_CONFIG,  # inherits from PROMPT_TUNING_CONFIG
-    "encoder_hidden_size": None,  # int
-    "prefix_projection": False,  # bool
-}
-P_TUNING_CONFIG = {
-    **PROMPT_TUNING_CONFIG,  # inherits from PROMPT_TUNING_CONFIG
-    "encoder_reparameterization_type": None,  # Union[str, PromptEncoderReparameterizationType]
-    "encoder_hidden_size": None,  # int
-    "encoder_num_layers": None,  # int
-    "encoder_dropout": None,  # float
-}
-IA3_CONFIG = {
-    **PEFT_CONFIG,  # inherits from PEFT_CONFIG
-    "target_modules": None,  # List[str] | str
-    "feedforward_modules": None,  # List[str] | str
-    "fan_in_fan_out": False,  # bool
-    "modules_to_save": None,  # List[str]
-    "init_ia3_weights": True,  # bool
-}
-PEFT_CONFIGS = {
-    "lora": LORA_CONFIG,
-    "prefix_tuning": PREFIX_TUNING_CONFIG,
-    "prompt_tuning": PROMPT_TUNING_CONFIG,
-    "p_tuning": P_TUNING_CONFIG,
-    "ada_lora": ADA_LORA_CONFIG,
-    "ia3": IA3_CONFIG,
-}
+    from peft import PEFT_TYPE_TO_CONFIG_MAPPING, get_peft_model  # type: ignore
 
 
-def get_peft_config_class(peft_strategy: str) -> Type["PeftConfig"]:
-    if peft_strategy == "lora":
-        return LoraConfig
-    elif peft_strategy == "ada_lora":
-        return AdaLoraConfig
-    elif peft_strategy == "prompt_tuning":
-        return PromptLearningConfig
-    elif peft_strategy == "prefix_tuning":
-        return PrefixTuningConfig
-    elif peft_strategy == "p_tuning":
-        return PromptEncoderConfig
-    elif peft_strategy == "ia3":
-        return IA3Config
+def apply_peft(model: PreTrainedModel, peft_type: str, peft_config: Dict[str, Any]) -> PreTrainedModel:
+    peft_config = PEFT_TYPE_TO_CONFIG_MAPPING[peft_type](**peft_config)
+    return get_peft_model(model=model, peft_config=peft_config)
diff --git a/optimum_benchmark/backends/text_generation_inference/__init__.py b/optimum_benchmark/backends/py_tgi/__init__.py
similarity index 100%
rename from optimum_benchmark/backends/text_generation_inference/__init__.py
rename to optimum_benchmark/backends/py_tgi/__init__.py
diff --git a/optimum_benchmark/backends/text_generation_inference/backend.py b/optimum_benchmark/backends/py_tgi/backend.py
similarity index 53%
rename from optimum_benchmark/backends/text_generation_inference/backend.py
rename to optimum_benchmark/backends/py_tgi/backend.py
index 8132b276..42e1b9e9 100644
--- a/optimum_benchmark/backends/text_generation_inference/backend.py
+++ b/optimum_benchmark/backends/py_tgi/backend.py
@@ -5,151 +5,133 @@
 from typing import Any, Dict, List
 
 import torch
-from huggingface_hub import snapshot_download
 from py_tgi import TGI
-from safetensors.torch import save_model
-from transformers import logging as transformers_logging
+from safetensors.torch import save_file
+from transformers import GenerationConfig
 
-from ...system_utils import is_nvidia_system, is_rocm_system
 from ...task_utils import TEXT_GENERATION_TASKS
 from ..base import Backend
-from ..transformers_utils import randomize_weights
-from .config import TGIConfig
+from ..transformers_utils import random_init_weights
+from .config import PyTGIConfig
 
 # bachend logger
-LOGGER = getLogger("text-generation-inference")
+LOGGER = getLogger("py-tgi")
 
-# disable other loggers
-transformers_logging.set_verbosity_error()
 
+class PyTGIBackend(Backend[PyTGIConfig]):
+    NAME: str = "py-tgi"
 
-class TGIBackend(Backend[TGIConfig]):
-    NAME: str = "text-generation-inference"
-
-    def __init__(self, config: TGIConfig) -> None:
+    def __init__(self, config: PyTGIConfig) -> None:
         super().__init__(config)
         self.validate_task()
 
-        if self.config.device == "cuda" and is_nvidia_system():
-            self.devices = None
-            self.gpus = self.config.device_ids
-            LOGGER.info(f"\t+ CUDA devices: {self.gpus}")
-        if self.config.device == "cuda" and is_rocm_system():
-            self.gpus = None
-            device_ids = list(map(int, self.config.device_ids.split(",")))
-            renderDs = [file for file in os.listdir("/dev/dri") if file.startswith("renderD")]
-            self.devices = ["/dev/kfd"] + [f"/dev/dri/{renderDs[i]}" for i in device_ids]
-            LOGGER.info(f"\t+ ROCm devices: {self.devices}")
-        else:
-            self.gpus = None
-            self.devices = None
-            LOGGER.info("\t+ CPU device")
+        if self.generation_config is None:
+            self.generation_config = GenerationConfig()
 
         LOGGER.info("\t+ Creating backend temporary directory")
-        self.tmp_dir = TemporaryDirectory()
+        self.tmpdir = TemporaryDirectory()
 
         if self.config.no_weights:
+            LOGGER.info("\t+ Loading no weights model")
             self.load_model_with_no_weights()
         else:
+            LOGGER.info("\t+ Downloading pretrained model")
             self.download_pretrained_model()
+            LOGGER.info("\t+ Preparing generation config")
+            self.prepare_generation_config()
+            LOGGER.info("\t+ Loading pretrained model")
             self.load_model_from_pretrained()
 
+        self.tmpdir.cleanup()
+
     def validate_task(self) -> None:
         if self.config.task not in TEXT_GENERATION_TASKS:
             raise NotImplementedError(f"TGI does not support task {self.config.task}")
 
     def download_pretrained_model(self) -> None:
         LOGGER.info("\t+ Downloading pretrained model")
-        snapshot_download(self.config.model, **self.config.hub_kwargs)
+        with torch.device("meta"):
+            self.automodel_class.from_pretrained(self.config.model, **self.config.hub_kwargs)
 
-    def prepare_pretrained_model(self) -> None:
-        LOGGER.info("\t+ Modifying pretrained generation config")
-        self.generation_config.eos_token_id = -100
-        self.generation_config.pad_token_id = -101
-
-        LOGGER.info("\t+ Saving new pretrained generation config")
+    def prepare_generation_config(self) -> None:
+        LOGGER.info("\t+ Modifying generation config for fixed length generation")
+        self.generation_config.eos_token_id = None
+        self.generation_config.pad_token_id = None
         model_cache_folder = f"models/{self.config.model}".replace("/", "--")
         model_cache_path = f"{self.config.volume}/{model_cache_folder}"
-
         snapshot_file = f"{model_cache_path}/refs/{self.config.hub_kwargs.get('revision', 'main')}"
         snapshot_ref = open(snapshot_file, "r").read().strip()
-
         model_snapshot_path = f"{model_cache_path}/snapshots/{snapshot_ref}"
+        LOGGER.info("\t+ Saving new pretrained generation config")
         self.generation_config.save_pretrained(save_directory=model_snapshot_path)
 
-    def load_model_from_pretrained(self) -> None:
-        self.prepare_pretrained_model()
-        self.start_tgi_server()
-
     def create_no_weights_model(self) -> None:
+        self.no_weights_model = os.path.join(self.tmp_dir.name, "no_weights_model")
         LOGGER.info("\t+ Creating no weights model directory")
-        self.no_weights_model = os.path.join(self.config.volume, "no_weights_model")
         os.makedirs(self.no_weights_model, exist_ok=True)
-
-        LOGGER.info("\t+ Saving pretrained config")
-        self.pretrained_config.save_pretrained(save_directory=self.no_weights_model)
-
-        LOGGER.info("\t+ Saving pretrained tokenizer")
-        self.pretrained_processor.save_pretrained(save_directory=self.no_weights_model)
-
-        LOGGER.info("\t+ Saving no weights model")
-        save_model(
-            filename=os.path.join(self.no_weights_model, "model.safetensors"),
-            model=torch.nn.Linear(1, 1),
-            metadata={"format": "pt"},
-        )
-        # unlike transformers api, TGI won't accept an empty model.safetensors
+        LOGGER.info("\t+ Creating no weights model state dict")
+        state_dict = torch.nn.Linear(1, 1).state_dict()
+        LOGGER.info("\t+ Saving no weights model safetensors")
+        safetensor = os.path.join(self.no_weights_model, "model.safetensors")
+        save_file(tensors=state_dict, filename=safetensor, metadata={"format": "pt"})
+        # unlike Transformers api, TGI won't accept any missing tensors
         # so we need to materialize the model and resave it
         LOGGER.info(f"\t+ Loading no weights model from {self.no_weights_model}")
-        self.pretrained_model = self.automodel_class.from_pretrained(
-            self.no_weights_model,
-            **self.config.hub_kwargs,
-            device_map="auto",  # for faster/safer loading
-        )
-
-        LOGGER.info("\t+ Randomizing weights")
-        randomize_weights(self.pretrained_model)
-
+        with random_init_weights():
+            self.pretrained_model = self.automodel_class.from_pretrained(
+                self.no_weights_model, **self.config.hub_kwargs, device_map="auto", _fast_init=False
+            )
         LOGGER.info("\t+ Saving no weights model")
         self.pretrained_model.save_pretrained(save_directory=self.no_weights_model)
-        self.delete_pretrained_model()
-
-        LOGGER.info("\t+ Saving generation config")
-        self.generation_config.eos_token_id = -100
-        self.generation_config.pad_token_id = -101
+        LOGGER.info("\t+ Saving no weights model pretrained config")
+        self.pretrained_config.save_pretrained(save_directory=self.no_weights_model)
+        LOGGER.info("\t+ Saving no weights model pretrained processor")
+        self.pretrained_processor.save_pretrained(save_directory=self.no_weights_model)
+        LOGGER.info("\t+ Modifying generation config for fixed length generation")
+        self.generation_config.eos_token_id = None
+        self.generation_config.pad_token_id = None
+        LOGGER.info("\t+ Saving new pretrained generation config")
         self.generation_config.save_pretrained(save_directory=self.no_weights_model)
 
     def load_model_with_no_weights(self) -> None:
+        LOGGER.info("\t+ Creating no weights model")
         self.create_no_weights_model()
-        original_model = self.config.model
-        self.config.model = "data/no_weights_model"
-        self.start_tgi_server()
-        self.config.model = original_model
 
-    def start_tgi_server(self) -> None:
+        original_volume, self.config.volume = self.config.volume, self.tmp_dir.name
+        original_model, self.config.model = self.config.model, "/data/no_weights_model"
+        LOGGER.info("\t+ Loading no weights model")
+        self.load_model_from_pretrained()
+        self.config.model, self.config.volume = original_model, original_volume
+
+    def load_model_from_pretrained(self) -> None:
         self.pretrained_model = TGI(
+            # model
             model=self.config.model,
             dtype=self.config.dtype,
-            image=self.config.image,
             quantize=self.config.quantize,
-            port=self.config.port,
-            volume=self.config.volume,
-            address=self.config.address,
+            # docker
+            image=self.config.image,
             shm_size=self.config.shm_size,
-            gpus=self.gpus,
-            devices=self.devices,
+            address=self.config.address,
+            volume=self.config.volume,
+            port=self.config.port,
+            # device
+            gpus=self.config.gpus,
+            devices=self.config.devices,
+            # sharding
             sharded=self.config.sharded,
             num_shard=self.config.num_shard,
+            # other
             disable_custom_kernels=self.config.disable_custom_kernels,
-            revision=self.config.hub_kwargs.get("revision", "main"),
             trust_remote_code=self.config.hub_kwargs.get("trust_remote_code", False),
+            revision=self.config.hub_kwargs.get("revision", "main"),
         )
 
     def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
-        if "input_ids" in inputs:
-            return {"prompt": self.pretrained_processor.batch_decode(inputs["input_ids"].tolist())}
-        elif "inputs" in inputs:
+        if "inputs" in inputs:
             return {"prompt": self.pretrained_processor.batch_decode(inputs["inputs"].tolist())}
+        elif "input_ids" in inputs:
+            return {"prompt": self.pretrained_processor.batch_decode(inputs["input_ids"].tolist())}
         else:
             raise ValueError("inputs must contain either input_ids or inputs")
 
@@ -158,16 +140,14 @@ def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> List[str]:
 
     def generate(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> List[str]:
         return self.pretrained_model.generate(
-            **inputs,
-            do_sample=kwargs.get("do_sample", False),
-            max_new_tokens=kwargs.get("max_new_tokens", 1),
+            **inputs, do_sample=kwargs.get("do_sample", False), max_new_tokens=kwargs.get("max_new_tokens", 1)
         )
 
     def clean(self) -> None:
         super().clean()
 
-        if hasattr(self, "tmp_dir"):
+        if hasattr(self, "tmpdir"):
             LOGGER.info("\t+ Cleaning temporary directory")
-            self.tmp_dir.cleanup()
+            self.tmpdir.cleanup()
 
         gc.collect()
diff --git a/optimum_benchmark/backends/text_generation_inference/config.py b/optimum_benchmark/backends/py_tgi/config.py
similarity index 58%
rename from optimum_benchmark/backends/text_generation_inference/config.py
rename to optimum_benchmark/backends/py_tgi/config.py
index 2e88597d..62e91321 100644
--- a/optimum_benchmark/backends/text_generation_inference/config.py
+++ b/optimum_benchmark/backends/py_tgi/config.py
@@ -1,27 +1,31 @@
 import os
 from dataclasses import dataclass
-from typing import Optional
+from typing import List, Optional
 
 from ...import_utils import py_tgi_version
+from ...system_utils import is_nvidia_system, is_rocm_system
 from ..config import BackendConfig
 
 
 @dataclass
-class TGIConfig(BackendConfig):
-    name: str = "text-generation-inference"
+class PyTGIConfig(BackendConfig):
+    name: str = "py-tgi"
     version: Optional[str] = py_tgi_version()
-    _target_: str = "optimum_benchmark.backends.text_generation_inference.backend.TGIBackend"
+    _target_: str = "optimum_benchmark.backends.py_tgi.backend.PyTGIBackend"
 
     # optimum benchmark specific
     no_weights: bool = False
 
     # docker options
     image: str = "ghcr.io/huggingface/text-generation-inference:latest"
-    volume: str = f"{os.path.expanduser('~')}/.cache/huggingface/hub"
+    volume: str = os.path.expanduser("~/.cache/huggingface/hub")
     address: str = "127.0.0.1"
     shm_size: str = "1g"
     port: int = 1111
 
+    gpus: Optional[str] = None  # "0,1,2,3"
+    devices: Optional[List[str]] = None  # ["/dev/dri/renderD128", "/dev/dri/renderD129"]
+
     # sharding options
     sharded: Optional[bool] = None  # None, True, False
     num_shard: Optional[int] = None  # None, 1, 2, 4, 8, 16, 32, 64
@@ -41,3 +45,11 @@ def __post_init__(self):
         if self.quantize is not None:
             if self.quantize not in ["bitsandbytes-nf4", "bitsandbytes-fp4", "awq", "gptq"]:
                 raise ValueError(f"Invalid value for quantize: {self.quantize}")
+
+        if self.gpus is None and self.device == "cuda" and is_nvidia_system():
+            self.gpus = self.device_ids
+
+        if self.devices is None and self.device == "cuda" and is_rocm_system():
+            device_ids = list(map(int, self.device_ids.split(",")))
+            renderDs = [file for file in os.listdir("/dev/dri") if file.startswith("renderD")]
+            self.devices = ["/dev/kfd"] + [f"/dev/dri/{renderDs[i]}" for i in device_ids]
diff --git a/optimum_benchmark/backends/pytorch/backend.py b/optimum_benchmark/backends/pytorch/backend.py
index d76789db..9c377f12 100644
--- a/optimum_benchmark/backends/pytorch/backend.py
+++ b/optimum_benchmark/backends/pytorch/backend.py
@@ -5,30 +5,30 @@
 from tempfile import TemporaryDirectory
 from typing import Any, Callable, Dict, List
 
-import datasets.utils.logging as datasets_logging
 import torch
-import transformers.utils.logging as transformers_logging
 from datasets import Dataset
 from safetensors.torch import save_file
-from transformers import Trainer, TrainerCallback, TrainerState, TrainingArguments
-from transformers.modeling_utils import no_init_weights
-
-from ...import_utils import is_deepspeed_available, is_peft_available
+from transformers import (
+    AwqConfig,
+    BitsAndBytesConfig,
+    GPTQConfig,
+    Trainer,
+    TrainerCallback,
+    TrainerState,
+    TrainingArguments,
+)
+
+from ...import_utils import is_deepspeed_available, is_torch_distributed_available
 from ..base import Backend
-from ..peft_utils import get_peft_config_class
-from ..transformers_utils import randomize_weights
+from ..peft_utils import apply_peft
+from ..transformers_utils import random_init_weights
 from .config import PyTorchConfig
 
-if is_peft_available():
-    from peft import get_peft_model  # type: ignore
-
 if is_deepspeed_available():
-    from deepspeed import init_inference  # type: ignore
-
+    from deepspeed import init_inference
 
-# disable other loggers
-datasets_logging.set_verbosity_error()
-transformers_logging.set_verbosity_error()
+if is_torch_distributed_available():
+    import torch.distributed
 
 # bachend logger
 LOGGER = getLogger("pytorch")
@@ -41,7 +41,7 @@ def __init__(self, config: PyTorchConfig):
         super().__init__(config)
         self.validate_library()
 
-        # Threads
+        # Thread settings
         if self.config.inter_op_num_threads is not None:
             LOGGER.info(f"\t+ Setting pytorch inter_op_num_threads({self.config.inter_op_num_threads}))")
             torch.set_num_threads(self.config.inter_op_num_threads)
@@ -63,13 +63,17 @@ def __init__(self, config: PyTorchConfig):
         else:
             self.quantization_config = None
 
+        if self.config.deepspeed_inference:
+            if self.quantization_config is not None:
+                raise ValueError("Deepspeed-Inference is not compatible with Transformers quantization")
+
         LOGGER.info("\t+ Creating backend temporary directory")
         self.tmpdir = TemporaryDirectory()
 
-        if self.config.no_weights and self.config.library == "diffusers":
-            raise ValueError("Diffusion pipelines are not supported with no_weights=True")
+        if self.config.no_weights and (self.config.library == "diffusers" or self.config.library == "timm"):
+            raise ValueError("Diffusion pipelines and Timm models don't support no weights")
         elif self.config.no_weights:
-            LOGGER.info("\t+ Loading model with no weights")
+            LOGGER.info("\t+ Loading model with random weights")
             self.load_model_with_no_weights()
         else:
             LOGGER.info("\t+ Loading model with pretrained weights")
@@ -103,19 +107,11 @@ def __init__(self, config: PyTorchConfig):
                     self.pretrained_model.forward, **self.config.torch_compile_config
                 )
 
-        if self.config.peft_strategy is not None:
-            LOGGER.info("\t+ Using PEFT")
-            peft_config_class = get_peft_config_class(self.config.peft_strategy)
-            peft_config = peft_config_class(**self.config.peft_config)
-            self.pretrained_model = get_peft_model(self.pretrained_model, peft_config=peft_config)
+        if self.config.peft_type is not None:
+            LOGGER.info("\t+ Applying PEFT")
+            self.pretrained_model = apply_peft(self.pretrained_model, self.config.peft_type, self.config.peft_config)
 
-        if self.config.deepspeed_inference:
-            LOGGER.info("\t+ Using DeepSpeed-Inference")
-            self.pretrained_model = init_inference(
-                self.pretrained_model,
-                config=self.config.deepspeed_inference_config,
-                dtype=getattr(self.pretrained_model, "dtype", None),
-            )
+        self.tmpdir.cleanup()
 
     def validate_library(self) -> None:
         if self.config.library == "timm":
@@ -130,38 +126,46 @@ def validate_library(self) -> None:
     def load_model_from_pretrained(self) -> None:
         if self.config.library == "timm":
             LOGGER.info("\t+ Loading Timm model")
-            self.pretrained_model = self.automodel_class(self.config.model)
-            self.pretrained_model.to(self.config.device)
+            self.pretrained_model = self.automodel_class(model_name=self.config.model)
+            if self.config.device != "cpu":
+                LOGGER.info(f"\t+ Moving model to device: {self.config.device}")
+                self.pretrained_model.to(self.config.device)
         elif self.config.library == "diffusers":
             LOGGER.info("\t+ Loading Diffusion pipeline")
             self.pretrained_model = self.automodel_class.from_pretrained(
-                pretrained_model_name_or_path=self.config.model,
+                pretrained_model_or_path=self.config.model,
                 device_map=self.config.device_map,
                 **self.config.hub_kwargs,
                 **self.automodel_kwargs,
             )
-            if self.config.device_map is None:
+            if self.config.device_map is None and self.config.device != "cpu":
                 LOGGER.info(f"\t+ Moving pipeline to device: {self.config.device}")
                 self.pretrained_model.to(self.config.device)
-        elif self.is_bnb_quantized:
-            LOGGER.info("\t+ Loading BnB quantized model")
+        elif self.config.deepspeed_inference:
+            with torch.device("cpu"):
+                LOGGER.info("\t+ Loading DeepSpeed model directly on CPU to avoid OOM")
+                self.pretrained_model = self.automodel_class.from_pretrained(
+                    pretrained_model_name_or_path=self.config.model, **self.config.hub_kwargs, **self.automodel_kwargs
+                )
+
+            torch.distributed.barrier()  # better safe than hanging
+            LOGGER.info("\t+ Initializing DeepSpeed Inference")
+            self.pretrained_model = init_inference(self.pretrained_model, config=self.config.deepspeed_inference_config)
+            torch.distributed.barrier()  # better safe than hanging
+        elif self.is_quantized:
+            # we can't use device context manager since the model is quantized
+            LOGGER.info("\t+ Loading Quantized model")
             self.pretrained_model = self.automodel_class.from_pretrained(
                 pretrained_model_name_or_path=self.config.model,
                 device_map=self.config.device_map,
                 **self.config.hub_kwargs,
                 **self.automodel_kwargs,
             )
-        elif self.is_gptq_quantized or self.is_awq_quantized:
-            LOGGER.info("\t+ Loading quantized model")
-            self.pretrained_model = self.automodel_class.from_pretrained(
-                pretrained_model_name_or_path=self.config.model,
-                # for gptq, we need to specify the device_map to either auto
-                # or a cuda adevice to avoid any modules being assigned to cpu ¯\_(ツ)_/¯
-                device_map=self.config.device_map or torch.device(self.config.device),
-                **self.config.hub_kwargs,
-                **self.automodel_kwargs,
-            )
+            if self.config.device_map is None and self.config.device != "cpu":
+                LOGGER.info(f"\t+ Moving model to device: {self.config.device}")
+                self.pretrained_model.to(self.config.device)
         elif self.config.device_map is not None:
+            # we can't use device context manager since device_map is specified
             LOGGER.info(f"\t+ Loading model with device map: {self.config.device_map}")
             self.pretrained_model = self.automodel_class.from_pretrained(
                 pretrained_model_name_or_path=self.config.model,
@@ -170,8 +174,6 @@ def load_model_from_pretrained(self) -> None:
                 **self.automodel_kwargs,
             )
         else:
-            # this is the fastest way to load a model on a specific device
-            # but not compatible with all quantization methods (and pipelines)
             LOGGER.info(f"\t+ Loading model directly on device: {self.config.device}")
             with torch.device(self.config.device):
                 self.pretrained_model = self.automodel_class.from_pretrained(
@@ -179,75 +181,68 @@ def load_model_from_pretrained(self) -> None:
                 )
 
     def create_no_weights_model(self) -> None:
-        LOGGER.info("\t+ Creating no weights model state_dict")
+        if self.pretrained_config is None:
+            raise ValueError("Can't create no weights model without a pretrained config")
+
+        self.no_weights_model = os.path.join(self.tmpdir.name, "no_weights_model")
+        LOGGER.info("\t+ Creating no weights model directory")
+        os.makedirs(self.no_weights_model, exist_ok=True)
+        LOGGER.info("\t+ Creating no weights model state dict")
         state_dict = torch.nn.Linear(1, 1).state_dict()
 
         if self.is_exllamav2:
-            # for exllamav2 we need to add g_idx to the state_dict which
-            # requires some information about linear layers dimensions
+            LOGGER.info("\t+ Adding g_idx to no weights model state dict")
             with torch.device("meta"):
                 meta_model = self.automodel_class.from_config(self.pretrained_config)
             for name, module in meta_model.named_modules():
                 if hasattr(module, "in_features"):
                     state_dict[name + ".g_idx"] = torch.ones((module.in_features,), dtype=torch.int32)
 
+        LOGGER.info("\t+ Saving no weights model safetensors")
+        safetensors = os.path.join(self.no_weights_model, "model.safetensors")
+        save_file(tensors=state_dict, filename=safetensors, metadata={"format": "pt"})
+
         if self.is_quantized:
-            # tricking from_pretrained to load the model as if it was quantized
+            LOGGER.info("\t+ Adding quantization config to no weights model's pretrained config")
             self.pretrained_config.quantization_config = self.quantization_config.to_dict()
-
-        LOGGER.info("\t+ Creating no weights model directory")
-        self.no_weights_model = os.path.join(self.tmpdir.name, "no_weights")
-        os.makedirs(self.no_weights_model, exist_ok=True)
+            # tricking from_pretrained to load the model as if it was quantized
 
         LOGGER.info("\t+ Saving no weights model pretrained config")
-        self.pretrained_config.save_pretrained(save_directory=self.no_weights_model)
-
-        LOGGER.info("\t+ Saving no weights model state_dict")
-        save_file(
-            filename=os.path.join(self.no_weights_model, "model.safetensors"),
-            metadata={"format": "pt"},
-            tensors=state_dict,
-        )
+        if self.config.library == "transformers":
+            self.pretrained_config.save_pretrained(save_directory=self.no_weights_model)
 
     def load_model_with_no_weights(self) -> None:
+        LOGGER.info("\t+ Creating no weights model")
         self.create_no_weights_model()
 
-        with no_init_weights():
-            original_model = self.config.model
-            self.config.model = self.no_weights_model
-            LOGGER.info("\t+ Loading no weights model")
+        with random_init_weights():
+            original_model, self.config.model = self.config.model, self.no_weights_model
+            LOGGER.info("\t+ Loading no weights AutoModel")
             self.load_model_from_pretrained()
             self.config.model = original_model
 
-        LOGGER.info("\t+ Randomizing model weights")
-        randomize_weights(self.pretrained_model)
+        # dunno how necessary this is
         LOGGER.info("\t+ Tying model weights")
         self.pretrained_model.tie_weights()
 
     def process_quantization_config(self) -> None:
         if self.is_gptq_quantized:
             LOGGER.info("\t+ Processing GPTQ config")
-            from transformers import GPTQConfig
-
             self.quantization_config = GPTQConfig(
                 **dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config)
             )
         elif self.is_awq_quantized:
             LOGGER.info("\t+ Processing AWQ config")
-            from transformers import AwqConfig
-
             self.quantization_config = AwqConfig(
                 **dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config)
             )
         elif self.is_bnb_quantized:
             LOGGER.info("\t+ Processing BitsAndBytes config")
-            from transformers import BitsAndBytesConfig
-
             self.quantization_config = BitsAndBytesConfig(
                 **dict(getattr(self.pretrained_config, "quantization_config", {}), **self.config.quantization_config)
             )
         else:
-            self.quantization_config = None
+            raise ValueError(f"Quantization scheme {self.config.quantization_scheme} not recognized")
 
     @property
     def is_quantized(self) -> bool:
@@ -256,36 +251,38 @@ def is_quantized(self) -> bool:
     @property
     def is_bnb_quantized(self) -> bool:
         return self.config.quantization_scheme == "bnb" or (
-            hasattr(self.pretrained_config, "quantization_config")
-            and self.pretrained_config.quantization_config.get("quant_method", None) == "bnb"
+            getattr(self.pretrained_config, "quantization_config", {}).get("quant_method", None) == "bnb"
         )
 
     @property
     def is_gptq_quantized(self) -> bool:
         return self.config.quantization_scheme == "gptq" or (
-            hasattr(self.pretrained_config, "quantization_config")
-            and self.pretrained_config.quantization_config.get("quant_method", None) == "gptq"
+            getattr(self.pretrained_config, "quantization_config", {}).get("quant_method", None) == "gptq"
         )
 
     @property
     def is_awq_quantized(self) -> bool:
         return self.config.quantization_scheme == "awq" or (
-            hasattr(self.pretrained_config, "quantization_config")
-            and self.pretrained_config.quantization_config.get("quant_method", None) == "awq"
+            getattr(self.pretrained_config, "quantization_config", {}).get("quant_method", None) == "awq"
         )
 
     @property
     def is_exllamav2(self) -> bool:
-        return (
-            self.is_gptq_quantized
-            and hasattr(self.quantization_config, "exllama_config")
-            and self.quantization_config.exllama_config.get("version", None) == 2
-        )
+        dummy_exllama = {"exllama_version": None}
+        return (self.is_gptq_quantized or self.is_awq_quantized) and (
+            getattr(self.quantization_config, "exllama_config", dummy_exllama)["exllama_version"]
+            or getattr(self.pretrained_config, "quantization_config", {}).get("exllama_config", dummy_exllama)[
+                "exllama_version"
+            ]
+        ) == 2
 
     @property
     def automodel_kwargs(self) -> Dict[str, Any]:
         kwargs = {}
 
+        if self.is_quantized:
+            kwargs["quantization_config"] = self.quantization_config
+
         if self.config.torch_dtype is not None:
             kwargs["torch_dtype"] = getattr(torch, self.config.torch_dtype)
 
@@ -295,24 +292,23 @@ def automodel_kwargs(self) -> Dict[str, Any]:
         if self.config.low_cpu_mem_usage is not None:
             kwargs["low_cpu_mem_usage"] = self.config.low_cpu_mem_usage
 
-        if self.is_quantized:
+        if self.config.no_weights:
+            # we use our own context manager to load the model with random weights
             kwargs["_fast_init"] = False
-            kwargs["quantization_config"] = self.quantization_config
 
         return kwargs
 
     def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        inputs = super().prepare_inputs(inputs)
+
         if self.config.library == "diffusers":
             return {"prompt": inputs["prompt"]}
-
-        LOGGER.info(f"\t+ Moving inputs tensors to device {self.config.device}")
-        for key, value in inputs.items():
-            inputs[key] = value.to(self.config.device)
-
-        if self.config.library == "timm":
-            return {"x": inputs["pixel_values"]}
-
-        return inputs
+        elif self.config.library == "timm":
+            return {"x": inputs["pixel_values"].to(self.config.device)}
+        else:
+            for key, value in inputs.items():
+                inputs[key] = value.to(self.config.device)
+            return inputs
 
     @torch.inference_mode()
     def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:
@@ -335,9 +331,9 @@ def train(
         training_callbacks: List[TrainerCallback],
         training_data_collator: Callable[[List[Dict[str, Any]]], Dict[str, Any]],
     ) -> TrainerState:
-        LOGGER.info("\t+ Wrapping training arguments with transformers.TrainingArguments")
+        LOGGER.info(f"\t+ Wrapping training arguments with {TrainingArguments.__name__}")
         training_arguments = TrainingArguments(**training_arguments)
-        LOGGER.info("\t+ Wrapping model with transformers.Trainer")
+        LOGGER.info(f"\t+ Wrapping model with {Trainer.__name__}")
         trainer = Trainer(
             args=training_arguments,
             model=self.pretrained_model,
@@ -347,7 +343,7 @@ def train(
         )
         LOGGER.info("\t+ Starting training")
         trainer.train()
-        LOGGER.info("\t+ Training finished successfully")
+        LOGGER.info("\t+ Finished training")
 
     def seed(self):
         super().seed()
diff --git a/optimum_benchmark/backends/pytorch/config.py b/optimum_benchmark/backends/pytorch/config.py
index 7902719d..3efeb63c 100644
--- a/optimum_benchmark/backends/pytorch/config.py
+++ b/optimum_benchmark/backends/pytorch/config.py
@@ -4,7 +4,6 @@
 from ...import_utils import torch_version
 from ...system_utils import is_rocm_system
 from ..config import BackendConfig
-from ..peft_utils import PEFT_CONFIGS, PEFT_TASKS_TYPES
 
 DEVICE_MAPS = ["auto", "sequential"]
 AMP_DTYPES = ["bfloat16", "float16"]
@@ -56,7 +55,7 @@ class PyTorchConfig(BackendConfig):
     deepspeed_inference_config: Dict[str, Any] = field(default_factory=dict)
 
     # peft options
-    peft_strategy: Optional[str] = None
+    peft_type: Optional[str] = None
     peft_config: Dict[str, Any] = field(default_factory=dict)
 
     def __post_init__(self):
@@ -86,14 +85,3 @@ def __post_init__(self):
             if self.quantization_config:
                 QUANTIZATION_CONFIG = QUANTIZATION_CONFIGS[self.quantization_scheme]
                 self.quantization_config = {**QUANTIZATION_CONFIG, **self.quantization_config}
-
-        if self.peft_strategy is not None:
-            if self.peft_strategy not in PEFT_CONFIGS:
-                raise ValueError(
-                    f"`peft_strategy` must be one of {list(PEFT_CONFIGS.keys())}. Got {self.peft_strategy} instead."
-                )
-            PEFT_CONFIG = PEFT_CONFIGS[self.peft_strategy]
-            self.peft_config = {**PEFT_CONFIG, **self.peft_config}
-
-            if self.peft_config["task_type"] is None:
-                raise ValueError(f"`peft_config.task_type` must be set to one of the following {PEFT_TASKS_TYPES}")
diff --git a/optimum_benchmark/backends/tensorrt_llm/backend.py b/optimum_benchmark/backends/tensorrt_llm/backend.py
index 7a3b1984..302141f5 100644
--- a/optimum_benchmark/backends/tensorrt_llm/backend.py
+++ b/optimum_benchmark/backends/tensorrt_llm/backend.py
@@ -1,7 +1,10 @@
+import os
 from logging import getLogger
 from typing import Any, Dict
 
+import torch
 from hydra.utils import get_class
+from safetensors.torch import save_file
 from transformers.utils import ModelOutput
 
 from ..base import Backend
@@ -18,6 +21,7 @@ def __init__(self, config: TRTLLMConfig):
         super().__init__(config)
         self.validate_model_type()
 
+        LOGGER.info("\t+ Loading pretrained TRTLLMModel")
         self.load_trtmodel_from_pretrained()
 
     def validate_model_type(self) -> None:
@@ -27,6 +31,18 @@ def validate_model_type(self) -> None:
         self.trtmodel_class = get_class(MODEL_TYPE_TO_TRTLLMMODEL[self.model_type])
         LOGGER.info(f"\t+ Using TRTLLMModel class {self.trtmodel_class.__name__}")
 
+    def create_no_weights_model(self) -> None:
+        self.no_weights_model = os.path.join(self.tmpdir.name, "no_weights_model")
+        LOGGER.info("\t+ Creating no weights model state dict")
+        state_dict = torch.nn.Linear(1, 1).state_dict()
+        LOGGER.info("\t+ Saving no weights model safetensors")
+        safetensors = os.path.join(self.no_weights_model, "model.safetensors")
+        save_file(tensors=state_dict, filename=safetensors, metadata={"format": "pt"})
+
+        if self.config.library == "transformers":
+            LOGGER.info("\t+ Saving no weights model pretrained config")
+            self.pretrained_config.save_pretrained(save_directory=self.no_weights_model)
+
     def load_trtmodel_from_pretrained(self) -> None:
         self.pretrained_model = self.trtmodel_class.from_pretrained(
             self.config.model,
diff --git a/optimum_benchmark/backends/timm_utils.py b/optimum_benchmark/backends/timm_utils.py
index 07105003..22a017f9 100644
--- a/optimum_benchmark/backends/timm_utils.py
+++ b/optimum_benchmark/backends/timm_utils.py
@@ -1,18 +1,14 @@
-from typing import Any, Dict, Optional
+from typing import Any, Dict
 
-from ..import_utils import is_timm_available, is_torch_available, is_transformers_available
+from transformers import PretrainedConfig
 
-if is_torch_available():
-    import torch
+from ..import_utils import is_timm_available
 
 if is_timm_available():
-    import timm
+    import timm  # type: ignore
 
-if is_transformers_available():
-    from transformers import PretrainedConfig
 
-
-def get_timm_pretrained_config(model_name: str) -> "PretrainedConfig":
+def get_timm_pretrained_config(model_name: str) -> PretrainedConfig:
     model_source, model_name = timm.models.parse_model_name(model_name)
     if model_source == "hf-hub":
         # For model names specified in the form `hf-hub:path/architecture_name@revision`,
@@ -23,15 +19,7 @@ def get_timm_pretrained_config(model_name: str) -> "PretrainedConfig":
     return timm.get_pretrained_cfg(model_name)
 
 
-def get_timm_pre_processor(model: str) -> Optional["torch.nn.Module"]:
-    try:
-        pretrained_config = get_timm_pretrained_config(model)
-        return timm.data.create_transform(**timm.data.resolve_data_config(pretrained_config))
-    except Exception:
-        return None
-
-
-def extract_timm_shapes_from_config(config: "PretrainedConfig") -> Dict[str, Any]:
+def extract_timm_shapes_from_config(config: PretrainedConfig) -> Dict[str, Any]:
     artifacts_dict = {}
 
     config_dict = {k: v for k, v in config.to_dict().items() if v is not None}
diff --git a/optimum_benchmark/backends/torch_ort/backend.py b/optimum_benchmark/backends/torch_ort/backend.py
index 52bede74..7f8863f3 100644
--- a/optimum_benchmark/backends/torch_ort/backend.py
+++ b/optimum_benchmark/backends/torch_ort/backend.py
@@ -8,22 +8,13 @@
 from datasets import Dataset
 from optimum.onnxruntime import ORTTrainer, ORTTrainingArguments
 from safetensors.torch import save_file
-from transformers import TrainerCallback, TrainerState
-from transformers.modeling_utils import no_init_weights
-from transformers.utils.logging import set_verbosity_error
+from transformers import TrainerCallback
 
-from ...import_utils import is_peft_available
 from ..base import Backend
-from ..peft_utils import get_peft_config_class
-from ..transformers_utils import randomize_weights
+from ..peft_utils import apply_peft
+from ..transformers_utils import random_init_weights
 from .config import TorchORTConfig
 
-if is_peft_available():
-    from peft import get_peft_model  # type: ignore
-
-# disable transformers logging
-set_verbosity_error()
-
 LOGGER = getLogger("torch-ort")
 
 
@@ -38,52 +29,46 @@ def __init__(self, config: TorchORTConfig):
         self.tmpdir = TemporaryDirectory()
 
         if self.config.no_weights:
+            LOGGER.info("\t+ Loading no weights AutoModel")
             self.load_automodel_with_no_weights()
         else:
+            LOGGER.info("\t+ Loading pretrained AutoModel")
             self.load_automodel_from_pretrained()
 
-        if self.config.peft_strategy is not None:
-            LOGGER.info("\t+ Using PEFT")
-            peft_config_class = get_peft_config_class(self.config.peft_strategy)
-            peft_config = peft_config_class(**self.config.peft_config)
-            self.pretrained_model = get_peft_model(self.pretrained_model, peft_config=peft_config)
+        if self.config.peft_type is not None:
+            LOGGER.info("\t+ Applying PEFT")
+            self.pretrained_model = apply_peft(self.pretrained_model, self.config.peft_type, self.config.peft_config)
+
+        self.tmpdir.cleanup()
 
     def validate_library(self) -> None:
         if self.config.library == "transformers":
-            LOGGER.info(f"Using AutoModel: {self.automodel_class.__name__}")
+            LOGGER.info(f"Using AutoModel class {self.automodel_class.__name__}")
         else:
             raise NotImplementedError(f"TorchORTBackend does not support {self.config.library} library")
 
     def create_no_weights_model(self) -> None:
-        LOGGER.info("\t+ Creating no weights model directory")
-        self.no_weights_model = os.path.join(self.tmpdir.name, "no_weights")
-        os.makedirs(self.no_weights_model, exist_ok=True)
-
-        LOGGER.info("\t+ Saving pretrained config")
-        self.pretrained_config.save_pretrained(save_directory=self.no_weights_model)
-
-        LOGGER.info("\t+ Creating no weights model state_dict")
+        self.no_weights_model = os.path.join(self.tmpdir.name, "no_weights_model")
+        LOGGER.info("\t+ Creating no weights model state dict")
         state_dict = torch.nn.Linear(1, 1).state_dict()
+        LOGGER.info("\t+ Saving no weights model safetensors")
+        safetensors = os.path.join(self.no_weights_model, "model.safetensors")
+        save_file(tensors=state_dict, filename=safetensors, metadata={"format": "pt"})
 
-        LOGGER.info("\t+ Saving no weights model state_dict")
-        save_file(
-            filename=os.path.join(self.no_weights_model, "model.safetensors"),
-            metadata={"format": "pt"},
-            tensors=state_dict,
-        )
+        if self.config.library == "transformers":
+            LOGGER.info("\t+ Saving no weights model pretrained config")
+            self.pretrained_config.save_pretrained(save_directory=self.no_weights_model)
 
     def load_automodel_with_no_weights(self) -> None:
+        LOGGER.info("\t+ Creating no weights model")
         self.create_no_weights_model()
 
-        with no_init_weights():
-            original_model = self.config.model
-            self.config.model = self.no_weights_model
-            LOGGER.info("\t+ Loading no weights model")
+        with random_init_weights():
+            original_model, self.config.model = self.config.model, self.no_weights_model
+            LOGGER.info("\t+ Loading no weights AutoModel")
             self.load_automodel_from_pretrained()
             self.config.model = original_model
 
-        LOGGER.info("\t+ Randomizing model weights")
-        randomize_weights(self.pretrained_model)
         LOGGER.info("\t+ Tying model weights")
         self.pretrained_model.tie_weights()
 
@@ -107,9 +92,7 @@ def train(
         training_arguments: Dict[str, Any],
         training_callbacks: List[TrainerCallback],
         training_data_collator: Callable[[List[Dict[str, Any]]], Dict[str, Any]],
-    ) -> TrainerState:
-        LOGGER.info("\t+ Setting dataset format to `torch`")
-        training_dataset.set_format(type="torch", columns=list(training_dataset.features.keys()))
+    ):
         LOGGER.info(f"\t+ Wrapping training arguments with {ORTTrainingArguments.__name__}")
         training_arguments = ORTTrainingArguments(**training_arguments)
         LOGGER.info(f"\t+ Wrapping model with {ORTTrainer.__name__}")
@@ -122,9 +105,7 @@ def train(
         )
         LOGGER.info("\t+ Starting training")
         trainer.train()
-        LOGGER.info("\t+ Training finished successfully")
-
-        return trainer.state
+        LOGGER.info("\t+ Finished training")
 
     def clean(self) -> None:
         super().clean()
diff --git a/optimum_benchmark/backends/torch_ort/config.py b/optimum_benchmark/backends/torch_ort/config.py
index 8559022f..252ee72b 100644
--- a/optimum_benchmark/backends/torch_ort/config.py
+++ b/optimum_benchmark/backends/torch_ort/config.py
@@ -3,7 +3,6 @@
 
 from ...import_utils import torch_ort_version
 from ..config import BackendConfig
-from ..peft_utils import PEFT_CONFIGS, PEFT_TASKS_TYPES
 
 
 @dataclass
@@ -17,7 +16,7 @@ class TorchORTConfig(BackendConfig):
     torch_dtype: Optional[str] = None
 
     # peft options
-    peft_strategy: Optional[str] = None
+    peft_type: Optional[str] = None
     peft_config: Dict[str, Any] = field(default_factory=dict)
 
     def __post_init__(self):
@@ -25,14 +24,3 @@ def __post_init__(self):
 
         if self.device != "cuda":
             raise ValueError(f"TorchORTBackend only supports CUDA devices, got {self.device}")
-
-        if self.peft_strategy is not None:
-            if self.peft_strategy not in PEFT_CONFIGS:
-                raise ValueError(
-                    f"`peft_strategy` must be one of {list(PEFT_CONFIGS.keys())}. Got {self.peft_strategy} instead."
-                )
-            PEFT_CONFIG = PEFT_CONFIGS[self.peft_strategy]
-            self.peft_config = {**PEFT_CONFIG, **self.peft_config}
-
-            if self.peft_config["task_type"] is None:
-                raise ValueError(f"`peft_config.task_type` must be set to one of the following {PEFT_TASKS_TYPES}")
diff --git a/optimum_benchmark/backends/transformers_utils.py b/optimum_benchmark/backends/transformers_utils.py
index b47e3030..93c35560 100644
--- a/optimum_benchmark/backends/transformers_utils.py
+++ b/optimum_benchmark/backends/transformers_utils.py
@@ -1,24 +1,21 @@
 import os
+from contextlib import contextmanager
 from typing import Any, Dict, Optional, Union
 
-from ..import_utils import is_torch_available, is_transformers_available
+import torch
+from transformers import (
+    AutoConfig,
+    AutoProcessor,
+    AutoTokenizer,
+    FeatureExtractionMixin,
+    GenerationConfig,
+    ImageProcessingMixin,
+    PretrainedConfig,
+    PreTrainedTokenizer,
+    ProcessorMixin,
+)
 
-if is_torch_available():
-    import torch
-
-if is_transformers_available():
-    from transformers import (
-        AutoConfig,
-        AutoProcessor,
-        FeatureExtractionMixin,
-        GenerationConfig,
-        ImageProcessingMixin,
-        PretrainedConfig,
-        PreTrainedTokenizer,
-        ProcessorMixin,
-    )
-
-    PretrainedProcessor = Union[FeatureExtractionMixin, ImageProcessingMixin, PreTrainedTokenizer, ProcessorMixin]
+PretrainedProcessor = Union[FeatureExtractionMixin, ImageProcessingMixin, PreTrainedTokenizer, ProcessorMixin]
 
 
 def get_transformers_cache_dir() -> str:
@@ -43,7 +40,10 @@ def get_transformers_pretrained_processor(model: str, **kwargs) -> Optional["Pre
         # sometimes contains information about the model's input shapes that are not available in the config
         return AutoProcessor.from_pretrained(model, **kwargs)
     except Exception:
-        return None
+        try:
+            return AutoTokenizer.from_pretrained(model, **kwargs)
+        except Exception:
+            return None
 
 
 def extract_transformers_shapes_from_artifacts(
@@ -119,20 +119,37 @@ def extract_transformers_shapes_from_artifacts(
     return shapes
 
 
-def randomize_weights(model: "torch.nn.Module") -> None:
-    for param in model.parameters():
-        if param.data.is_floating_point():
-            if torch.cuda.is_available() and param.device.type != "cuda":
-                param.data.cuda().normal_(mean=0.0, std=0.2).cpu()
-            elif torch.backends.mps.is_available() and param.device.type != "mps":
-                param.data.to("mps").normal_(mean=0.0, std=0.2).cpu()
-            else:
-                param.data.normal_(mean=0.0, std=0.2)
-
-        elif param.data.dtype in (torch.int32, torch.int16, torch.int8):
-            if torch.cuda.is_available() and param.device.type != "cuda":
-                param.data.copy_(torch.randint(-127, 127, param.data.shape, device="cuda"))
-            elif torch.backends.mps.is_available() and param.device.type != "mps":
-                param.data.copy_(torch.randint(-127, 127, param.data.shape, device="mps"))
-            else:
-                param.data.copy_(torch.randint(-127, 127, param.data.shape))
+TORCH_INIT_FUNCTIONS = {
+    "normal_": torch.nn.init.normal_,
+    "uniform_": torch.nn.init.uniform_,
+    "trunc_normal_": torch.nn.init.trunc_normal_,
+    "xavier_normal_": torch.nn.init.xavier_normal_,
+    "xavier_uniform_": torch.nn.init.xavier_uniform_,
+    "kaiming_normal_": torch.nn.init.kaiming_normal_,
+    "kaiming_uniform_": torch.nn.init.kaiming_uniform_,
+    "normal": torch.nn.init.normal,
+    "uniform": torch.nn.init.uniform,
+    "xavier_normal": torch.nn.init.xavier_normal,
+    "xavier_uniform": torch.nn.init.xavier_uniform,
+    "kaiming_normal": torch.nn.init.kaiming_normal,
+    "kaiming_uniform": torch.nn.init.kaiming_uniform,
+}
+
+
+def fast_rand(tensor: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
+    return torch.nn.init.uniform_(tensor)
+
+
+@contextmanager
+def random_init_weights():
+    # Replace the initialization functions
+    for name, init_func in TORCH_INIT_FUNCTIONS.items():
+        if name != "uniform_":
+            setattr(torch.nn.init, name, fast_rand)
+    try:
+        yield
+    finally:
+        # Restore the original initialization functions
+        for name, init_func in TORCH_INIT_FUNCTIONS.items():
+            if name != "uniform_":
+                setattr(torch.nn.init, name, init_func)
diff --git a/optimum_benchmark/cli.py b/optimum_benchmark/cli.py
index f91a3b2c..0a9254ab 100644
--- a/optimum_benchmark/cli.py
+++ b/optimum_benchmark/cli.py
@@ -9,9 +9,9 @@
 from .backends.neural_compressor.config import INCConfig
 from .backends.onnxruntime.config import ORTConfig
 from .backends.openvino.config import OVConfig
+from .backends.py_tgi.config import PyTGIConfig
 from .backends.pytorch.config import PyTorchConfig
 from .backends.tensorrt_llm.config import TRTLLMConfig
-from .backends.text_generation_inference.config import TGIConfig
 from .backends.torch_ort.config import TorchORTConfig
 from .benchmarks.inference.config import InferenceConfig
 from .benchmarks.report import BenchmarkReport
@@ -33,7 +33,7 @@
 cs.store(group="backend", name=TorchORTConfig.name, node=TorchORTConfig)
 cs.store(group="backend", name=TRTLLMConfig.name, node=TRTLLMConfig)
 cs.store(group="backend", name=INCConfig.name, node=INCConfig)
-cs.store(group="backend", name=TGIConfig.name, node=TGIConfig)
+cs.store(group="backend", name=PyTGIConfig.name, node=PyTGIConfig)
 # benchmarks configurations
 cs.store(group="benchmark", name=TrainingConfig.name, node=TrainingConfig)
 cs.store(group="benchmark", name=InferenceConfig.name, node=InferenceConfig)
diff --git a/optimum_benchmark/launchers/torchrun/launcher.py b/optimum_benchmark/launchers/torchrun/launcher.py
index 1f4d2d53..436ac0f2 100644
--- a/optimum_benchmark/launchers/torchrun/launcher.py
+++ b/optimum_benchmark/launchers/torchrun/launcher.py
@@ -83,7 +83,7 @@ def entrypoint(worker, queue, lock, log_level, *worker_args):
 
     rank = int(os.environ["RANK"])
     torch.cuda.set_device(rank) if torch.cuda.is_available() else None
-    setup_logging(level=log_level, prefix=f"RANK-{rank}") if rank == 0 else None
+    setup_logging(level=log_level, prefix=f"RANK-{rank}") if rank == 0 else setup_logging(level="ERROR")
 
     torch.distributed.init_process_group(backend="nccl" if torch.cuda.is_available() else "gloo")
     torch.distributed.barrier()
diff --git a/optimum_benchmark/system_utils.py b/optimum_benchmark/system_utils.py
index 070bf805..e2500d7b 100644
--- a/optimum_benchmark/system_utils.py
+++ b/optimum_benchmark/system_utils.py
@@ -91,7 +91,7 @@ def get_gpus():
     elif is_rocm_system():
         if not is_amdsmi_available() and not is_pyrsmi_available():
             raise ValueError(
-                "Either the library amdsmi or pyrsmi is required to run memory benchmark on AMD GPUs, but is not installed. "
+                "Either the library amdsmi or pyrsmi is required to run memory benchmark on AMD GPUs, but neither is installed."
             )
 
         gpus = []
@@ -139,7 +139,7 @@ def get_gpu_vram_mb() -> List[int]:
     elif is_rocm_system():
         if not is_amdsmi_available() and not is_pyrsmi_available():
             raise ValueError(
-                "Either the library amdsmi or pyrsmi is required to run memory benchmark on AMD GPUs, but is not installed. "
+                "Either the library amdsmi or pyrsmi is required to run memory benchmark on AMD GPUs, but neither is installed."
             )
 
         if is_amdsmi_available():
@@ -182,7 +182,7 @@ def get_gpu_device_ids() -> str:
     elif is_nvidia_system():
         if not is_pynvml_available():
             raise ValueError(
-                "The library pynvml is required to run memory benchmark on NVIDIA GPUs, but is not installed. "
+                "The library pynvml is required to get GPU device ids, but is not installed. "
                 "Please install the official and NVIDIA maintained PyNVML library through `pip install nvidia-ml-py`."
             )
 
@@ -191,22 +191,26 @@ def get_gpu_device_ids() -> str:
         device_ids = ",".join(str(i) for i in device_ids)
         pynvml.nvmlShutdown()
     elif is_rocm_system():
-        if not is_amdsmi_available():
+        if not is_amdsmi_available() or not is_pyrsmi_available():
             raise ValueError(
-                "The library amdsmi is required to run memory benchmark on AMD GPUs, but is not installed. "
-                "Please install the official and AMD maintained amdsmi library from https://github.com/ROCm/amdsmi."
+                "Either the library amdsmi or pyrsmi is required to get GPU device ids, but neither is installed."
             )
 
-        amdsmi.amdsmi_init()
-        rocm_version = get_rocm_version()
-
-        if rocm_version >= "5.7":
-            device_ids = list(range(len(amdsmi.amdsmi_get_processor_handles())))
-        else:
-            device_ids = list(range(len(amdsmi.amdsmi_get_device_handles())))
+        if is_amdsmi_available():
+            amdsmi.amdsmi_init()
+            rocm_version = get_rocm_version()
+            if rocm_version >= "5.7":
+                device_ids = list(range(len(amdsmi.amdsmi_get_processor_handles())))
+            else:
+                device_ids = list(range(len(amdsmi.amdsmi_get_device_handles())))
+            device_ids = ",".join(str(i) for i in device_ids)
+            amdsmi.amdsmi_shut_down()
 
-        device_ids = ",".join(str(i) for i in device_ids)
-        amdsmi.amdsmi_shut_down()
+        elif is_pyrsmi_available():
+            rocml.smi_initialize()
+            device_ids = list(range(rocml.smi_get_device_count()))
+            device_ids = ",".join(str(i) for i in device_ids)
+            rocml.smi_shutdown()
     else:
         raise ValueError("Couldn't infer GPU device ids.")
 
diff --git a/optimum_benchmark/task_utils.py b/optimum_benchmark/task_utils.py
index bd7d7999..dfa3f808 100644
--- a/optimum_benchmark/task_utils.py
+++ b/optimum_benchmark/task_utils.py
@@ -58,8 +58,8 @@
     "zero-shot-object-detection": "TFAutoModelForZeroShotObjectDetection",
 }
 _DIFFUSERS_TASKS_TO_MODEL_LOADERS = {
-    "stable-diffusion": "StableDiffusionPipeline",
-    "stable-diffusion-xl": "StableDiffusionXLImg2ImgPipeline",
+    "stable-diffusion": "AutoPipelineForText2Image",
+    "stable-diffusion-xl": "AutoPipelineForText2Image",
 }
 _TIMM_TASKS_TO_MODEL_LOADERS = {"image-classification": "create_model"}
 _LIBRARY_TO_TF_TASKS_TO_MODEL_LOADER_MAP = {"transformers": _TRANSFORMERS_TASKS_TO_TF_MODEL_LOADERS}
diff --git a/setup.py b/setup.py
index c911ceb5..dba055ff 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,5 @@
 import importlib.util
 import os
-import subprocess
 
 from setuptools import find_packages, setup
 
@@ -23,32 +22,16 @@
     "pandas",
 ]
 
-# We may allow to install CUDA or RoCm dependencies even
-# when building in a non-CUDA or non-ROCm environment.
 USE_CUDA = os.environ.get("USE_CUDA", None) == "1"
 USE_ROCM = os.environ.get("USE_ROCM", None) == "1"
 
 if USE_CUDA:
     INSTALL_REQUIRES.append("nvidia-ml-py")
-else:
-    try:
-        subprocess.run(["nvidia-smi"], stdout=subprocess.DEVNULL)
-        INSTALL_REQUIRES.append("nvidia-ml-py")
-    except FileNotFoundError:
-        pass
 
-# we keep this as a check that amdsmi is installed since it's not available on pypi
 PYRSMI = "pyrsmi@git+https://github.com/ROCm/pyrsmi.git"
 if USE_ROCM:
     if not importlib.util.find_spec("amdsmi"):
         INSTALL_REQUIRES.append(PYRSMI)
-else:
-    try:
-        subprocess.run(["rocm-smi"], stdout=subprocess.DEVNULL)
-        if not importlib.util.find_spec("amdsmi"):
-            INSTALL_REQUIRES.append(PYRSMI)
-    except FileNotFoundError:
-        pass
 
 if PYRSMI in INSTALL_REQUIRES:
     print("ROCm GPU detected without amdsmi installed. Using pyrsmi instead but some features may not work.")
@@ -57,15 +40,15 @@
 EXTRAS_REQUIRE = {
     "quality": ["ruff"],
     "testing": ["pytest", "hydra-joblib-launcher"],
-    # api-based backends
+    # optimum backends
     "openvino": [f"optimum[openvino,nncf]>={MIN_OPTIMUM_VERSION}"],
     "onnxruntime": [f"optimum[onnxruntime]>={MIN_OPTIMUM_VERSION}"],
-    "neural-compressor": [f"optimum[neural-compressor]>={MIN_OPTIMUM_VERSION}"],
-    "torch-ort": [f"optimum>={MIN_OPTIMUM_VERSION}", "onnxruntime-training", "torch-ort"],
     "onnxruntime-gpu": [f"optimum[onnxruntime-gpu]>={MIN_OPTIMUM_VERSION}"],
+    "neural-compressor": [f"optimum[neural-compressor]>={MIN_OPTIMUM_VERSION}"],
+    "torch-ort": ["torch-ort", "onnxruntime-training", f"optimum>={MIN_OPTIMUM_VERSION}"],
     # docker-based backends
-    "text-generation-inference": ["py-tgi"],
-    # specific settings
+    "py-tgi": ["py-tgi==0.1.3"],
+    # third-party features
     "codecarbon": ["codecarbon"],
     "deepspeed": ["deepspeed"],
     "diffusers": ["diffusers"],
diff --git a/tests/configs/_bert_sweep_.yaml b/tests/configs/_bert_sweep_.yaml
index f618a34f..08c7bbde 100644
--- a/tests/configs/_bert_sweep_.yaml
+++ b/tests/configs/_bert_sweep_.yaml
@@ -1,5 +1,6 @@
 hydra:
   sweeper:
     params:
+      backend.no_weights: true,false
       backend.task: fill-mask,text-classification,token-classification,question-answering
       backend.model: hf-internal-testing/tiny-random-bert,hf-internal-testing/tiny-random-roberta
diff --git a/tests/configs/_gpt_.yaml b/tests/configs/_gpt_.yaml
new file mode 100644
index 00000000..17847b2a
--- /dev/null
+++ b/tests/configs/_gpt_.yaml
@@ -0,0 +1,2 @@
+backend:
+  model: gpt2
diff --git a/tests/configs/_gpt_naive_mp_.yaml b/tests/configs/_gpt_naive_mp_.yaml
deleted file mode 100644
index cf2adfd3..00000000
--- a/tests/configs/_gpt_naive_mp_.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-backend:
-  model: gpt2
-  task: text-generation
-  library: transformers
-  device_ids: 0,1
-  device_map: auto
diff --git a/tests/configs/_gpt_peft_.yaml b/tests/configs/_gpt_peft_.yaml
deleted file mode 100644
index d99267e4..00000000
--- a/tests/configs/_gpt_peft_.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-backend:
-  model: gpt2
-  task: text-generation
-  library: transformers
-  peft_strategy: lora
-  peft_config:
-    task_type: CAUSAL_LM
diff --git a/tests/configs/_gpt_sweep_.yaml b/tests/configs/_gpt_sweep_.yaml
index 1ff5e2c7..1e3325a9 100644
--- a/tests/configs/_gpt_sweep_.yaml
+++ b/tests/configs/_gpt_sweep_.yaml
@@ -2,4 +2,5 @@ hydra:
   sweeper:
     params:
       backend.task: text-generation
+      backend.no_weights: true,false
       backend.model: hf-internal-testing/tiny-random-gpt2,IlyasMoutawwakil/tiny-random-llama
diff --git a/tests/configs/_inference_.yaml b/tests/configs/_inference_.yaml
index ef429e8b..b72082b8 100644
--- a/tests/configs/_inference_.yaml
+++ b/tests/configs/_inference_.yaml
@@ -2,7 +2,11 @@ defaults:
   - benchmark: inference
 
 benchmark:
+  memory: true
+  latency: true
+
   duration: 1
   warmup_runs: 1
-  new_tokens: 2
-  memory: true
+  generate_kwargs:
+    max_new_tokens: 5
+    min_new_tokens: 5
diff --git a/tests/configs/_naive_mp_.yaml b/tests/configs/_naive_mp_.yaml
new file mode 100644
index 00000000..108e8b55
--- /dev/null
+++ b/tests/configs/_naive_mp_.yaml
@@ -0,0 +1,3 @@
+backend:
+  device_ids: 0,1
+  device_map: auto
diff --git a/tests/configs/_peft_.yaml b/tests/configs/_peft_.yaml
new file mode 100644
index 00000000..d82a6476
--- /dev/null
+++ b/tests/configs/_peft_.yaml
@@ -0,0 +1,2 @@
+backend:
+  peft_type: LORA
diff --git a/tests/configs/_timm_.yaml b/tests/configs/_timm_.yaml
index 0b374c8a..22d47cdd 100644
--- a/tests/configs/_timm_.yaml
+++ b/tests/configs/_timm_.yaml
@@ -1,5 +1,2 @@
 backend:
-  library: timm
-  task: image-classification
   model: timm/tiny_vit_21m_224.in1k
-
diff --git a/tests/configs/cpu_inference_py_tgi_gpt.yaml b/tests/configs/cpu_inference_py_tgi_gpt.yaml
new file mode 100644
index 00000000..c0805b71
--- /dev/null
+++ b/tests/configs/cpu_inference_py_tgi_gpt.yaml
@@ -0,0 +1,10 @@
+defaults:
+  - backend: py-tgi
+  # order of inheritance, last one overrides previous ones
+  - _base_ # inherits from base config
+  - _inference_ # inherits from inference config
+  - _gpt_ # inherits from gpt config
+  - _cpu_ # inherits from cpu config
+  - _self_ # hydra 1.1 compatibility
+
+experiment_name: cpu_inference_py_tgi_gpt
diff --git a/tests/configs/cuda_inference_pytorch_gpt_naive_mp.yaml b/tests/configs/cuda_inference_pytorch_gpt_naive_mp.yaml
index 6e19ba18..0bd1dcd8 100644
--- a/tests/configs/cuda_inference_pytorch_gpt_naive_mp.yaml
+++ b/tests/configs/cuda_inference_pytorch_gpt_naive_mp.yaml
@@ -3,7 +3,8 @@ defaults:
   # order of inheritance, last one overrides previous ones
   - _base_ # inherits from base config
   - _inference_ # inherits from inference config
-  - _gpt_naive_mp_ # inherits from lm naive mp config
+  - _naive_mp_ # inherits from lm naive mp config
+  - _gpt_ # inherits from gpt config
   - _cuda_ # inherits from cpu config
   - _self_ # hydra 1.1 compatibility
 
diff --git a/tests/configs/cuda_training_pytorch_gpt_naive_mp.yaml b/tests/configs/cuda_training_pytorch_gpt_naive_mp.yaml
index ab6d4bc2..f9ae53fb 100644
--- a/tests/configs/cuda_training_pytorch_gpt_naive_mp.yaml
+++ b/tests/configs/cuda_training_pytorch_gpt_naive_mp.yaml
@@ -3,7 +3,8 @@ defaults:
   # order of inheritance, last one overrides previous ones
   - _base_ # inherits from base config
   - _training_ # inherits from training config
-  - _gpt_naive_mp_ # inherits from lm naive mp config
+  - _naive_mp_ # inherits from lm naive mp config
+  - _gpt_ # inherits from gpt config
   - _cuda_ # inherits from cpu config
   - _self_ # hydra 1.1 compatibility
 
diff --git a/tests/configs/cuda_training_pytorch_gpt_peft.yaml b/tests/configs/cuda_training_pytorch_gpt_peft.yaml
index 1ee6f473..ce473e6b 100644
--- a/tests/configs/cuda_training_pytorch_gpt_peft.yaml
+++ b/tests/configs/cuda_training_pytorch_gpt_peft.yaml
@@ -3,7 +3,8 @@ defaults:
   # order of inheritance, last one overrides previous ones
   - _base_ # inherits from base config
   - _training_ # inherits from training config
-  - _gpt_peft_ # inherits from language modeling peft config
+  - _peft_ # inherits from language modeling peft config
+  - _gpt_ # inherits from gpt config
   - _cuda_ # inherits from cpu config
   - _self_ # hydra 1.1 compatibility
 
diff --git a/tests/configs/cuda_training_torch_ort_gpt_peft.yaml b/tests/configs/cuda_training_torch_ort_gpt_peft.yaml
index 665dec16..6730f3d3 100644
--- a/tests/configs/cuda_training_torch_ort_gpt_peft.yaml
+++ b/tests/configs/cuda_training_torch_ort_gpt_peft.yaml
@@ -3,7 +3,8 @@ defaults:
   # order of inheritance, last one overrides previous ones
   - _base_ # inherits from base config
   - _training_ # inherits from training config
-  - _gpt_peft_ # inherits from language modeling peft config
+  - _peft_ # inherits from language modeling peft config
+  - _gpt_ # inherits from gpt config
   - _cuda_ # inherits from cpu config
   - _self_ # hydra 1.1 compatibility