diff --git a/examples/running-llamas/configs/fp16+bt.yaml b/examples/running-llamas/configs/fp16+bt.yaml
new file mode 100644
index 00000000..d7db1000
--- /dev/null
+++ b/examples/running-llamas/configs/fp16+bt.yaml
@@ -0,0 +1,8 @@
+defaults:
+  - _base_
+  - _self_
+
+experiment_name: fp16+bt
+
+backend:
+  to_bettertransformer: true
diff --git a/examples/running-llamas/configs/fp16+fa2.yaml b/examples/running-llamas/configs/fp16+fa2.yaml
new file mode 100644
index 00000000..93959afb
--- /dev/null
+++ b/examples/running-llamas/configs/fp16+fa2.yaml
@@ -0,0 +1,8 @@
+defaults:
+  - _base_
+  - _self_
+
+experiment_name: fp16+fa2
+
+backend:
+  use_flash_attention_2: true
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/.hydra/config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/.hydra/config.yaml
deleted file mode 100644
index a249298d..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/.hydra/config.yaml
+++ /dev/null
@@ -1,76 +0,0 @@
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  continuous_isolation: true
-  isolation_check_interval: null
-  delete_cache: false
-  no_weights: true
-  device_map: null
-  torch_dtype: float16
-  eval_mode: ${is_inference:${benchmark.name}}
-  disable_grad: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  to_bettertransformer: false
-  use_flash_attention_2: false
-  quantization_scheme: null
-  quantization_config: {}
-  data_parallel: false
-  deepspeed_inference: false
-  deepspeed_inference_config: {}
-  peft_strategy: null
-  peft_config: {}
-launcher:
-  name: process
-  _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher
-  start_method: spawn
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 1
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: fp16
-model: NousResearch/Llama-2-7b-hf
-task: ${infer_task:${model}}
-device: cuda
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.14.1
-  optimum_commit: null
-  transformers_version: 4.35.2
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540671
-  gpus:
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/.hydra/hydra.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/.hydra/hydra.yaml
deleted file mode 100644
index c863e7c3..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/.hydra/hydra.yaml
+++ /dev/null
@@ -1,177 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name}
-  sweep:
-    dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name}
-    subdir: ${benchmark.input_shapes.batch_size}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128
-      model: NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task:
-    - benchmark.input_shapes.batch_size=1
-    - model=NousResearch/Llama-2-7b-hf
-  job:
-    name: cli
-    chdir: true
-    override_dirname: benchmark.input_shapes.batch_size=1,model=NousResearch/Llama-2-7b-hf
-    id: '0'
-    num: 0
-    config_name: fp16
-    env_set:
-      CUDA_VISIBLE_DEVICES: '0'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1
-    choices:
-      benchmark: inference
-      launcher: process
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/.hydra/overrides.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/.hydra/overrides.yaml
deleted file mode 100644
index 6af3cf2d..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/.hydra/overrides.yaml
+++ /dev/null
@@ -1,2 +0,0 @@
-- benchmark.input_shapes.batch_size=1
-- model=NousResearch/Llama-2-7b-hf
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/hydra_config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/hydra_config.yaml
deleted file mode 100644
index 1e7e68a8..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/hydra_config.yaml
+++ /dev/null
@@ -1,83 +0,0 @@
-launcher:
-  name: process
-  _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher
-  start_method: spawn
-backend:
-  name: pytorch
-  version: 2.1.1+cu118
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  continuous_isolation: true
-  isolation_check_interval: 1.0
-  delete_cache: false
-  no_weights: true
-  device_map: null
-  torch_dtype: float16
-  eval_mode: true
-  disable_grad: true
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  to_bettertransformer: false
-  use_flash_attention_2: false
-  quantization_scheme: null
-  quantization_config: {}
-  data_parallel: false
-  deepspeed_inference: false
-  deepspeed_inference_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 1
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: false
-  can_generate: true
-  forward_kwargs: {}
-  generate_kwargs:
-    num_return_sequences: 1
-    max_new_tokens: 512
-    min_new_tokens: 512
-    do_sample: false
-    use_cache: true
-    pad_token_id: 0
-    num_beams: 1
-experiment_name: fp16
-model: NousResearch/Llama-2-7b-hf
-task: text-generation
-device: cuda
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.14.1
-  optimum_commit: null
-  transformers_version: 4.35.2
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540671
-  gpus:
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/inference_results.csv b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/inference_results.csv
deleted file mode 100644
index 53e050a6..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/inference_results.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),decode.latency(s),decode.throughput(tokens/s),generate.peak_memory(MB),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB)
-0.0335,29.9,15239,15239,13738,13767,13.5,37.9,13.5,37.9,15954,15954,14356,14480
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/.hydra/config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/.hydra/config.yaml
deleted file mode 100644
index aff4cbc5..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/.hydra/config.yaml
+++ /dev/null
@@ -1,76 +0,0 @@
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  continuous_isolation: true
-  isolation_check_interval: null
-  delete_cache: false
-  no_weights: true
-  device_map: null
-  torch_dtype: float16
-  eval_mode: ${is_inference:${benchmark.name}}
-  disable_grad: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  to_bettertransformer: false
-  use_flash_attention_2: false
-  quantization_scheme: null
-  quantization_config: {}
-  data_parallel: false
-  deepspeed_inference: false
-  deepspeed_inference_config: {}
-  peft_strategy: null
-  peft_config: {}
-launcher:
-  name: process
-  _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher
-  start_method: spawn
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 128
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: fp16
-model: NousResearch/Llama-2-7b-hf
-task: ${infer_task:${model}}
-device: cuda
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.14.1
-  optimum_commit: null
-  transformers_version: 4.35.2
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540671
-  gpus:
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/.hydra/hydra.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/.hydra/hydra.yaml
deleted file mode 100644
index a8752cc5..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/.hydra/hydra.yaml
+++ /dev/null
@@ -1,177 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name}
-  sweep:
-    dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name}
-    subdir: ${benchmark.input_shapes.batch_size}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128
-      model: NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task:
-    - benchmark.input_shapes.batch_size=128
-    - model=NousResearch/Llama-2-7b-hf
-  job:
-    name: cli
-    chdir: true
-    override_dirname: benchmark.input_shapes.batch_size=128,model=NousResearch/Llama-2-7b-hf
-    id: '14'
-    num: 14
-    config_name: fp16
-    env_set:
-      CUDA_VISIBLE_DEVICES: '0'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128
-    choices:
-      benchmark: inference
-      launcher: process
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/.hydra/overrides.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/.hydra/overrides.yaml
deleted file mode 100644
index 74593dc2..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/.hydra/overrides.yaml
+++ /dev/null
@@ -1,2 +0,0 @@
-- benchmark.input_shapes.batch_size=128
-- model=NousResearch/Llama-2-7b-hf
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/hydra_config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/hydra_config.yaml
deleted file mode 100644
index 02ec5c58..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/hydra_config.yaml
+++ /dev/null
@@ -1,83 +0,0 @@
-launcher:
-  name: process
-  _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher
-  start_method: spawn
-backend:
-  name: pytorch
-  version: 2.1.1+cu118
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  continuous_isolation: true
-  isolation_check_interval: 1.0
-  delete_cache: false
-  no_weights: true
-  device_map: null
-  torch_dtype: float16
-  eval_mode: true
-  disable_grad: true
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  to_bettertransformer: false
-  use_flash_attention_2: false
-  quantization_scheme: null
-  quantization_config: {}
-  data_parallel: false
-  deepspeed_inference: false
-  deepspeed_inference_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 128
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: false
-  can_generate: true
-  forward_kwargs: {}
-  generate_kwargs:
-    num_return_sequences: 1
-    max_new_tokens: 512
-    min_new_tokens: 512
-    do_sample: false
-    use_cache: true
-    pad_token_id: 0
-    num_beams: 1
-experiment_name: fp16
-model: NousResearch/Llama-2-7b-hf
-task: text-generation
-device: cuda
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.14.1
-  optimum_commit: null
-  transformers_version: 4.35.2
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540671
-  gpus:
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/.hydra/config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/.hydra/config.yaml
deleted file mode 100644
index 0f3121ee..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/.hydra/config.yaml
+++ /dev/null
@@ -1,76 +0,0 @@
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  continuous_isolation: true
-  isolation_check_interval: null
-  delete_cache: false
-  no_weights: true
-  device_map: null
-  torch_dtype: float16
-  eval_mode: ${is_inference:${benchmark.name}}
-  disable_grad: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  to_bettertransformer: false
-  use_flash_attention_2: false
-  quantization_scheme: null
-  quantization_config: {}
-  data_parallel: false
-  deepspeed_inference: false
-  deepspeed_inference_config: {}
-  peft_strategy: null
-  peft_config: {}
-launcher:
-  name: process
-  _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher
-  start_method: spawn
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 16
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: fp16
-model: NousResearch/Llama-2-7b-hf
-task: ${infer_task:${model}}
-device: cuda
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.14.1
-  optimum_commit: null
-  transformers_version: 4.35.2
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540671
-  gpus:
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/.hydra/hydra.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/.hydra/hydra.yaml
deleted file mode 100644
index 20399860..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/.hydra/hydra.yaml
+++ /dev/null
@@ -1,177 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name}
-  sweep:
-    dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name}
-    subdir: ${benchmark.input_shapes.batch_size}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128
-      model: NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task:
-    - benchmark.input_shapes.batch_size=16
-    - model=NousResearch/Llama-2-7b-hf
-  job:
-    name: cli
-    chdir: true
-    override_dirname: benchmark.input_shapes.batch_size=16,model=NousResearch/Llama-2-7b-hf
-    id: '8'
-    num: 8
-    config_name: fp16
-    env_set:
-      CUDA_VISIBLE_DEVICES: '0'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16
-    choices:
-      benchmark: inference
-      launcher: process
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/.hydra/overrides.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/.hydra/overrides.yaml
deleted file mode 100644
index 46c7d465..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/.hydra/overrides.yaml
+++ /dev/null
@@ -1,2 +0,0 @@
-- benchmark.input_shapes.batch_size=16
-- model=NousResearch/Llama-2-7b-hf
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/hydra_config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/hydra_config.yaml
deleted file mode 100644
index ad990cc6..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/hydra_config.yaml
+++ /dev/null
@@ -1,83 +0,0 @@
-launcher:
-  name: process
-  _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher
-  start_method: spawn
-backend:
-  name: pytorch
-  version: 2.1.1+cu118
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  continuous_isolation: true
-  isolation_check_interval: 1.0
-  delete_cache: false
-  no_weights: true
-  device_map: null
-  torch_dtype: float16
-  eval_mode: true
-  disable_grad: true
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  to_bettertransformer: false
-  use_flash_attention_2: false
-  quantization_scheme: null
-  quantization_config: {}
-  data_parallel: false
-  deepspeed_inference: false
-  deepspeed_inference_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 16
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: false
-  can_generate: true
-  forward_kwargs: {}
-  generate_kwargs:
-    num_return_sequences: 1
-    max_new_tokens: 512
-    min_new_tokens: 512
-    do_sample: false
-    use_cache: true
-    pad_token_id: 0
-    num_beams: 1
-experiment_name: fp16
-model: NousResearch/Llama-2-7b-hf
-task: text-generation
-device: cuda
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.14.1
-  optimum_commit: null
-  transformers_version: 4.35.2
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540671
-  gpus:
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/inference_results.csv b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/inference_results.csv
deleted file mode 100644
index 40d786cb..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/inference_results.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),decode.latency(s),decode.throughput(tokens/s),generate.peak_memory(MB),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB)
-0.414,38.6,19251,19251,16520,17779,17.5,468.0,17.1,478.0,28076,28076,26442,84420
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/.hydra/config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/.hydra/config.yaml
deleted file mode 100644
index c3fed0b3..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/.hydra/config.yaml
+++ /dev/null
@@ -1,76 +0,0 @@
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  continuous_isolation: true
-  isolation_check_interval: null
-  delete_cache: false
-  no_weights: true
-  device_map: null
-  torch_dtype: float16
-  eval_mode: ${is_inference:${benchmark.name}}
-  disable_grad: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  to_bettertransformer: false
-  use_flash_attention_2: false
-  quantization_scheme: null
-  quantization_config: {}
-  data_parallel: false
-  deepspeed_inference: false
-  deepspeed_inference_config: {}
-  peft_strategy: null
-  peft_config: {}
-launcher:
-  name: process
-  _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher
-  start_method: spawn
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 2
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: fp16
-model: NousResearch/Llama-2-7b-hf
-task: ${infer_task:${model}}
-device: cuda
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.14.1
-  optimum_commit: null
-  transformers_version: 4.35.2
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540671
-  gpus:
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/.hydra/hydra.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/.hydra/hydra.yaml
deleted file mode 100644
index 0fff6497..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/.hydra/hydra.yaml
+++ /dev/null
@@ -1,177 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name}
-  sweep:
-    dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name}
-    subdir: ${benchmark.input_shapes.batch_size}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128
-      model: NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task:
-    - benchmark.input_shapes.batch_size=2
-    - model=NousResearch/Llama-2-7b-hf
-  job:
-    name: cli
-    chdir: true
-    override_dirname: benchmark.input_shapes.batch_size=2,model=NousResearch/Llama-2-7b-hf
-    id: '2'
-    num: 2
-    config_name: fp16
-    env_set:
-      CUDA_VISIBLE_DEVICES: '0'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2
-    choices:
-      benchmark: inference
-      launcher: process
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/.hydra/overrides.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/.hydra/overrides.yaml
deleted file mode 100644
index cfae74df..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/.hydra/overrides.yaml
+++ /dev/null
@@ -1,2 +0,0 @@
-- benchmark.input_shapes.batch_size=2
-- model=NousResearch/Llama-2-7b-hf
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/hydra_config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/hydra_config.yaml
deleted file mode 100644
index 2d2cdd5e..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/hydra_config.yaml
+++ /dev/null
@@ -1,83 +0,0 @@
-launcher:
-  name: process
-  _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher
-  start_method: spawn
-backend:
-  name: pytorch
-  version: 2.1.1+cu118
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  continuous_isolation: true
-  isolation_check_interval: 1.0
-  delete_cache: false
-  no_weights: true
-  device_map: null
-  torch_dtype: float16
-  eval_mode: true
-  disable_grad: true
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  to_bettertransformer: false
-  use_flash_attention_2: false
-  quantization_scheme: null
-  quantization_config: {}
-  data_parallel: false
-  deepspeed_inference: false
-  deepspeed_inference_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 2
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: false
-  can_generate: true
-  forward_kwargs: {}
-  generate_kwargs:
-    num_return_sequences: 1
-    max_new_tokens: 512
-    min_new_tokens: 512
-    do_sample: false
-    use_cache: true
-    pad_token_id: 0
-    num_beams: 1
-experiment_name: fp16
-model: NousResearch/Llama-2-7b-hf
-task: text-generation
-device: cuda
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.14.1
-  optimum_commit: null
-  transformers_version: 4.35.2
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540671
-  gpus:
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/inference_results.csv b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/inference_results.csv
deleted file mode 100644
index 3fe725e1..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/inference_results.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),decode.latency(s),decode.throughput(tokens/s),generate.peak_memory(MB),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB)
-0.0589,34.0,15482,15482,13924,14011,13.6,75.3,13.5,75.7,17082,17082,15162,15609
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/.hydra/config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/.hydra/config.yaml
deleted file mode 100644
index 14e60142..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/.hydra/config.yaml
+++ /dev/null
@@ -1,76 +0,0 @@
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  continuous_isolation: true
-  isolation_check_interval: null
-  delete_cache: false
-  no_weights: true
-  device_map: null
-  torch_dtype: float16
-  eval_mode: ${is_inference:${benchmark.name}}
-  disable_grad: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  to_bettertransformer: false
-  use_flash_attention_2: false
-  quantization_scheme: null
-  quantization_config: {}
-  data_parallel: false
-  deepspeed_inference: false
-  deepspeed_inference_config: {}
-  peft_strategy: null
-  peft_config: {}
-launcher:
-  name: process
-  _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher
-  start_method: spawn
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 32
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: fp16
-model: NousResearch/Llama-2-7b-hf
-task: ${infer_task:${model}}
-device: cuda
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.14.1
-  optimum_commit: null
-  transformers_version: 4.35.2
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540671
-  gpus:
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/.hydra/hydra.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/.hydra/hydra.yaml
deleted file mode 100644
index de1322ef..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/.hydra/hydra.yaml
+++ /dev/null
@@ -1,177 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name}
-  sweep:
-    dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name}
-    subdir: ${benchmark.input_shapes.batch_size}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128
-      model: NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task:
-    - benchmark.input_shapes.batch_size=32
-    - model=NousResearch/Llama-2-7b-hf
-  job:
-    name: cli
-    chdir: true
-    override_dirname: benchmark.input_shapes.batch_size=32,model=NousResearch/Llama-2-7b-hf
-    id: '10'
-    num: 10
-    config_name: fp16
-    env_set:
-      CUDA_VISIBLE_DEVICES: '0'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32
-    choices:
-      benchmark: inference
-      launcher: process
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/.hydra/overrides.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/.hydra/overrides.yaml
deleted file mode 100644
index 425e1ca1..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/.hydra/overrides.yaml
+++ /dev/null
@@ -1,2 +0,0 @@
-- benchmark.input_shapes.batch_size=32
-- model=NousResearch/Llama-2-7b-hf
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/hydra_config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/hydra_config.yaml
deleted file mode 100644
index 1f782598..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/hydra_config.yaml
+++ /dev/null
@@ -1,83 +0,0 @@
-launcher:
-  name: process
-  _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher
-  start_method: spawn
-backend:
-  name: pytorch
-  version: 2.1.1+cu118
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  continuous_isolation: true
-  isolation_check_interval: 1.0
-  delete_cache: false
-  no_weights: true
-  device_map: null
-  torch_dtype: float16
-  eval_mode: true
-  disable_grad: true
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  to_bettertransformer: false
-  use_flash_attention_2: false
-  quantization_scheme: null
-  quantization_config: {}
-  data_parallel: false
-  deepspeed_inference: false
-  deepspeed_inference_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 32
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: false
-  can_generate: true
-  forward_kwargs: {}
-  generate_kwargs:
-    num_return_sequences: 1
-    max_new_tokens: 512
-    min_new_tokens: 512
-    do_sample: false
-    use_cache: true
-    pad_token_id: 0
-    num_beams: 1
-experiment_name: fp16
-model: NousResearch/Llama-2-7b-hf
-task: text-generation
-device: cuda
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.14.1
-  optimum_commit: null
-  transformers_version: 4.35.2
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540671
-  gpus:
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/inference_results.csv b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/inference_results.csv
deleted file mode 100644
index e32d1f98..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/inference_results.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),decode.latency(s),decode.throughput(tokens/s),generate.peak_memory(MB),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB)
-0.817,39.2,23464,23464,19487,21992,26.9,609.0,26.1,627.0,53265,53265,39331,84422
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/.hydra/config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/.hydra/config.yaml
deleted file mode 100644
index f6ed928d..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/.hydra/config.yaml
+++ /dev/null
@@ -1,76 +0,0 @@
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  continuous_isolation: true
-  isolation_check_interval: null
-  delete_cache: false
-  no_weights: true
-  device_map: null
-  torch_dtype: float16
-  eval_mode: ${is_inference:${benchmark.name}}
-  disable_grad: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  to_bettertransformer: false
-  use_flash_attention_2: false
-  quantization_scheme: null
-  quantization_config: {}
-  data_parallel: false
-  deepspeed_inference: false
-  deepspeed_inference_config: {}
-  peft_strategy: null
-  peft_config: {}
-launcher:
-  name: process
-  _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher
-  start_method: spawn
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 4
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: fp16
-model: NousResearch/Llama-2-7b-hf
-task: ${infer_task:${model}}
-device: cuda
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.14.1
-  optimum_commit: null
-  transformers_version: 4.35.2
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540671
-  gpus:
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/.hydra/hydra.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/.hydra/hydra.yaml
deleted file mode 100644
index 83b04824..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/.hydra/hydra.yaml
+++ /dev/null
@@ -1,177 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name}
-  sweep:
-    dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name}
-    subdir: ${benchmark.input_shapes.batch_size}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128
-      model: NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task:
-    - benchmark.input_shapes.batch_size=4
-    - model=NousResearch/Llama-2-7b-hf
-  job:
-    name: cli
-    chdir: true
-    override_dirname: benchmark.input_shapes.batch_size=4,model=NousResearch/Llama-2-7b-hf
-    id: '4'
-    num: 4
-    config_name: fp16
-    env_set:
-      CUDA_VISIBLE_DEVICES: '0'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4
-    choices:
-      benchmark: inference
-      launcher: process
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/.hydra/overrides.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/.hydra/overrides.yaml
deleted file mode 100644
index 2c9eacd6..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/.hydra/overrides.yaml
+++ /dev/null
@@ -1,2 +0,0 @@
-- benchmark.input_shapes.batch_size=4
-- model=NousResearch/Llama-2-7b-hf
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/hydra_config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/hydra_config.yaml
deleted file mode 100644
index dde859c4..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/hydra_config.yaml
+++ /dev/null
@@ -1,83 +0,0 @@
-launcher:
-  name: process
-  _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher
-  start_method: spawn
-backend:
-  name: pytorch
-  version: 2.1.1+cu118
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  continuous_isolation: true
-  isolation_check_interval: 1.0
-  delete_cache: false
-  no_weights: true
-  device_map: null
-  torch_dtype: float16
-  eval_mode: true
-  disable_grad: true
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  to_bettertransformer: false
-  use_flash_attention_2: false
-  quantization_scheme: null
-  quantization_config: {}
-  data_parallel: false
-  deepspeed_inference: false
-  deepspeed_inference_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 4
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: false
-  can_generate: true
-  forward_kwargs: {}
-  generate_kwargs:
-    num_return_sequences: 1
-    max_new_tokens: 512
-    min_new_tokens: 512
-    do_sample: false
-    use_cache: true
-    pad_token_id: 0
-    num_beams: 1
-experiment_name: fp16
-model: NousResearch/Llama-2-7b-hf
-task: text-generation
-device: cuda
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.14.1
-  optimum_commit: null
-  transformers_version: 4.35.2
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540671
-  gpus:
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/inference_results.csv b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/inference_results.csv
deleted file mode 100644
index ee533513..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/inference_results.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),decode.latency(s),decode.throughput(tokens/s),generate.peak_memory(MB),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB)
-0.109,36.7,16107,16107,14295,14636,13.7,149.0,13.6,150.0,26434,26434,16774,24960
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/.hydra/config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/.hydra/config.yaml
deleted file mode 100644
index 97f42016..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/.hydra/config.yaml
+++ /dev/null
@@ -1,76 +0,0 @@
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  continuous_isolation: true
-  isolation_check_interval: null
-  delete_cache: false
-  no_weights: true
-  device_map: null
-  torch_dtype: float16
-  eval_mode: ${is_inference:${benchmark.name}}
-  disable_grad: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  to_bettertransformer: false
-  use_flash_attention_2: false
-  quantization_scheme: null
-  quantization_config: {}
-  data_parallel: false
-  deepspeed_inference: false
-  deepspeed_inference_config: {}
-  peft_strategy: null
-  peft_config: {}
-launcher:
-  name: process
-  _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher
-  start_method: spawn
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 64
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: fp16
-model: NousResearch/Llama-2-7b-hf
-task: ${infer_task:${model}}
-device: cuda
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.14.1
-  optimum_commit: null
-  transformers_version: 4.35.2
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540671
-  gpus:
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/.hydra/hydra.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/.hydra/hydra.yaml
deleted file mode 100644
index b391fd16..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/.hydra/hydra.yaml
+++ /dev/null
@@ -1,177 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name}
-  sweep:
-    dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name}
-    subdir: ${benchmark.input_shapes.batch_size}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128
-      model: NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task:
-    - benchmark.input_shapes.batch_size=64
-    - model=NousResearch/Llama-2-7b-hf
-  job:
-    name: cli
-    chdir: true
-    override_dirname: benchmark.input_shapes.batch_size=64,model=NousResearch/Llama-2-7b-hf
-    id: '12'
-    num: 12
-    config_name: fp16
-    env_set:
-      CUDA_VISIBLE_DEVICES: '0'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64
-    choices:
-      benchmark: inference
-      launcher: process
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/.hydra/overrides.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/.hydra/overrides.yaml
deleted file mode 100644
index 55afae42..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/.hydra/overrides.yaml
+++ /dev/null
@@ -1,2 +0,0 @@
-- benchmark.input_shapes.batch_size=64
-- model=NousResearch/Llama-2-7b-hf
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/hydra_config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/hydra_config.yaml
deleted file mode 100644
index 20bc3fdd..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/hydra_config.yaml
+++ /dev/null
@@ -1,83 +0,0 @@
-launcher:
-  name: process
-  _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher
-  start_method: spawn
-backend:
-  name: pytorch
-  version: 2.1.1+cu118
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  continuous_isolation: true
-  isolation_check_interval: 1.0
-  delete_cache: false
-  no_weights: true
-  device_map: null
-  torch_dtype: float16
-  eval_mode: true
-  disable_grad: true
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  to_bettertransformer: false
-  use_flash_attention_2: false
-  quantization_scheme: null
-  quantization_config: {}
-  data_parallel: false
-  deepspeed_inference: false
-  deepspeed_inference_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 64
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: false
-  can_generate: true
-  forward_kwargs: {}
-  generate_kwargs:
-    num_return_sequences: 1
-    max_new_tokens: 512
-    min_new_tokens: 512
-    do_sample: false
-    use_cache: true
-    pad_token_id: 0
-    num_beams: 1
-experiment_name: fp16
-model: NousResearch/Llama-2-7b-hf
-task: text-generation
-device: cuda
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.14.1
-  optimum_commit: null
-  transformers_version: 4.35.2
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540671
-  gpus:
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/inference_results.csv b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/inference_results.csv
deleted file mode 100644
index 0a2f82a7..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/inference_results.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),decode.latency(s),decode.throughput(tokens/s),generate.peak_memory(MB),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB)
-1.64,39.0,31911,31911,25422,30440,45.9,714.0,44.3,738.0,67584,67584,65112,84420
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/.hydra/config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/.hydra/config.yaml
deleted file mode 100644
index 2be7665d..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/.hydra/config.yaml
+++ /dev/null
@@ -1,76 +0,0 @@
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  continuous_isolation: true
-  isolation_check_interval: null
-  delete_cache: false
-  no_weights: true
-  device_map: null
-  torch_dtype: float16
-  eval_mode: ${is_inference:${benchmark.name}}
-  disable_grad: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  to_bettertransformer: false
-  use_flash_attention_2: false
-  quantization_scheme: null
-  quantization_config: {}
-  data_parallel: false
-  deepspeed_inference: false
-  deepspeed_inference_config: {}
-  peft_strategy: null
-  peft_config: {}
-launcher:
-  name: process
-  _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher
-  start_method: spawn
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 8
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: fp16
-model: NousResearch/Llama-2-7b-hf
-task: ${infer_task:${model}}
-device: cuda
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.14.1
-  optimum_commit: null
-  transformers_version: 4.35.2
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540671
-  gpus:
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/.hydra/hydra.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/.hydra/hydra.yaml
deleted file mode 100644
index 1b3320a7..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/.hydra/hydra.yaml
+++ /dev/null
@@ -1,177 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name}
-  sweep:
-    dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name}
-    subdir: ${benchmark.input_shapes.batch_size}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128
-      model: NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task:
-    - benchmark.input_shapes.batch_size=8
-    - model=NousResearch/Llama-2-7b-hf
-  job:
-    name: cli
-    chdir: true
-    override_dirname: benchmark.input_shapes.batch_size=8,model=NousResearch/Llama-2-7b-hf
-    id: '6'
-    num: 6
-    config_name: fp16
-    env_set:
-      CUDA_VISIBLE_DEVICES: '0'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8
-    choices:
-      benchmark: inference
-      launcher: process
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/.hydra/overrides.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/.hydra/overrides.yaml
deleted file mode 100644
index 1389ebb3..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/.hydra/overrides.yaml
+++ /dev/null
@@ -1,2 +0,0 @@
-- benchmark.input_shapes.batch_size=8
-- model=NousResearch/Llama-2-7b-hf
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/hydra_config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/hydra_config.yaml
deleted file mode 100644
index cf0ee8dd..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/hydra_config.yaml
+++ /dev/null
@@ -1,83 +0,0 @@
-launcher:
-  name: process
-  _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher
-  start_method: spawn
-backend:
-  name: pytorch
-  version: 2.1.1+cu118
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  continuous_isolation: true
-  isolation_check_interval: 1.0
-  delete_cache: false
-  no_weights: true
-  device_map: null
-  torch_dtype: float16
-  eval_mode: true
-  disable_grad: true
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  to_bettertransformer: false
-  use_flash_attention_2: false
-  quantization_scheme: null
-  quantization_config: {}
-  data_parallel: false
-  deepspeed_inference: false
-  deepspeed_inference_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 8
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: false
-  can_generate: true
-  forward_kwargs: {}
-  generate_kwargs:
-    num_return_sequences: 1
-    max_new_tokens: 512
-    min_new_tokens: 512
-    do_sample: false
-    use_cache: true
-    pad_token_id: 0
-    num_beams: 1
-experiment_name: fp16
-model: NousResearch/Llama-2-7b-hf
-task: text-generation
-device: cuda
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.14.1
-  optimum_commit: null
-  transformers_version: 4.35.2
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540671
-  gpus:
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/inference_results.csv b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/inference_results.csv
deleted file mode 100644
index f1c9c922..00000000
--- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/inference_results.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),decode.latency(s),decode.throughput(tokens/s),generate.peak_memory(MB),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB)
-0.206,38.8,17172,17172,15037,15701,14.0,293.0,13.8,296.0,64977,64977,19997,63503