diff --git a/.gitignore b/.gitignore
index a589733f..30063812 100644
--- a/.gitignore
+++ b/.gitignore
@@ -168,6 +168,4 @@ data/
 version.txt
 
 actions-runner/
-experiments/
-examples/
-results/
\ No newline at end of file
+experiments/
\ No newline at end of file
diff --git a/examples/running-llamas/README.md b/examples/running-llamas/README.md
index dca82a4e..fbc5984d 100644
--- a/examples/running-llamas/README.md
+++ b/examples/running-llamas/README.md
@@ -7,7 +7,7 @@ A set of benchmarks on Meta's LLaMA2's inference.
 You will need to install these quantization packages:
 
 ```bash
-pip install auto-gptq # or install it from source
+pip install auto-gptq 
 ```
 
 ## Running
@@ -15,8 +15,8 @@ pip install auto-gptq # or install it from source
 Then run these commands from this directory:
 
 ```bash
-optimum-benchmark --config-dir configs/ --config-name _base_ --multirun
-optimum-benchmark --config-dir configs/ --config-name gptq --multirun
+optimum-benchmark --config-dir configs/ --config-name fp16 --multirun
+optimum-benchmark --config-dir configs/ --config-name bnb-4bit --multirun
 ```
 
 This will create a folder called `experiments` with the results of the benchmarks with an inference `batch_size` ranging from 1 to 16 and an input `sequence_length` (prompt size) of 256.
diff --git a/examples/running-llamas/artifacts/A100-80GB/forward_latency_plot.png b/examples/running-llamas/artifacts/A100-80GB/forward_latency_plot.png
deleted file mode 100644
index 4e7dd369..00000000
Binary files a/examples/running-llamas/artifacts/A100-80GB/forward_latency_plot.png and /dev/null differ
diff --git a/examples/running-llamas/artifacts/A100-80GB/forward_memory_plot.png b/examples/running-llamas/artifacts/A100-80GB/forward_memory_plot.png
deleted file mode 100644
index ff81266e..00000000
Binary files a/examples/running-llamas/artifacts/A100-80GB/forward_memory_plot.png and /dev/null differ
diff --git a/examples/running-llamas/artifacts/A100-80GB/full_report.csv b/examples/running-llamas/artifacts/A100-80GB/full_report.csv
deleted file mode 100644
index f05eb1e4..00000000
--- a/examples/running-llamas/artifacts/A100-80GB/full_report.csv
+++ /dev/null
@@ -1,11 +0,0 @@
-experiment_name,backend.name,backend.version,backend._target_,backend.seed,backend.inter_op_num_threads,backend.intra_op_num_threads,backend.initial_isolation_check,backend.continous_isolation_check,backend.delete_cache,backend.no_weights,backend.device_map,backend.torch_dtype,backend.disable_grad,backend.eval_mode,backend.amp_autocast,backend.amp_dtype,backend.torch_compile,backend.bettertransformer,backend.quantization_scheme,backend.use_ddp,backend.peft_strategy,benchmark.name,benchmark._target_,benchmark.duration,benchmark.warmup_runs,benchmark.memory,benchmark.energy,benchmark.input_shapes.batch_size,benchmark.input_shapes.sequence_length,benchmark.input_shapes.num_choices,benchmark.input_shapes.feature_size,benchmark.input_shapes.nb_max_frames,benchmark.input_shapes.audio_sequence_length,benchmark.new_tokens,benchmark.can_diffuse,benchmark.can_generate,benchmark.generate_kwargs.max_new_tokens,benchmark.generate_kwargs.min_new_tokens,benchmark.generate_kwargs.do_sample,benchmark.generate_kwargs.use_cache,benchmark.generate_kwargs.pad_token_id,benchmark.generate_kwargs.num_beams,model,device,task,hub_kwargs.revision,hub_kwargs.cache_dir,hub_kwargs.force_download,hub_kwargs.local_files_only,environment.optimum_version,environment.optimum_commit,environment.transformers_version,environment.transformers_commit,environment.accelerate_version,environment.accelerate_commit,environment.diffusers_version,environment.diffusers_commit,environment.python_version,environment.system,environment.cpu,environment.cpu_count,environment.cpu_ram_mb,environment.gpus,forward.latency(s),forward.throughput(samples/s),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB)
-fp16-batch_size(16)-sequence_length(256)-new_tokens(512),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,256,1,80,3000,16000,512,False,True,512,512,False,True,0,1,meta-llama/Llama-2-7b-hf,cuda:0,text-generation,main,,False,False,1.13.2,,4.34.1,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.402,39.8,19165,16520,17779,17.4,471.0,27988,26442,84511
-fp16-batch_size(8)-sequence_length(256)-new_tokens(512),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,256,1,80,3000,16000,512,False,True,512,512,False,True,0,1,meta-llama/Llama-2-7b-hf,cuda,text-generation,main,,False,False,1.13.2,,4.34.1,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.204,39.2,17087,15037,15701,14.1,290.0,64889,19997,63503
-gptq-batch_size(16)-sequence_length(256)-new_tokens(512),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,16,256,1,80,3000,16000,512,False,True,512,512,False,True,0,1,TheBloke/Llama-2-7B-GPTQ,cuda:0,text-generation,main,,False,False,1.13.2,,4.34.1,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.415,38.6,10900,7080,8604,24.6,333.0,65676,17002,83596
-fp16-batch_size(4)-sequence_length(256)-new_tokens(512),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,256,1,80,3000,16000,512,False,True,512,512,False,True,0,1,meta-llama/Llama-2-7b-hf,cuda,text-generation,main,,False,False,1.13.2,,4.34.1,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.107,37.4,16022,14295,14636,13.9,147.0,26346,16774,24960
-gptq-batch_size(8)-sequence_length(256)-new_tokens(512),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,8,256,1,80,3000,16000,512,False,True,512,512,False,True,0,1,TheBloke/Llama-2-7B-GPTQ,cuda,text-generation,main,,False,False,1.13.2,,4.34.1,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.223,35.9,8826,5597,6530,19.9,206.0,56629,10557,54333
-fp16-batch_size(2)-sequence_length(256)-new_tokens(512),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,256,1,80,3000,16000,512,False,True,512,512,False,True,0,1,meta-llama/Llama-2-7b-hf,cuda,text-generation,main,,False,False,1.13.2,,4.34.1,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.0579,34.5,15392,13924,14006,13.6,75.3,17003,15162,15617
-gptq-batch_size(4)-sequence_length(256)-new_tokens(512),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,4,256,1,80,3000,16000,512,False,True,512,512,False,True,0,1,TheBloke/Llama-2-7B-GPTQ,cuda,text-generation,main,,False,False,1.13.2,,4.34.1,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.122,32.8,7761,4855,5465,15.3,134.0,18085,7335,15789
-fp16-batch_size(1)-sequence_length(256)-new_tokens(512),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,512,False,True,512,512,False,True,0,1,meta-llama/Llama-2-7b-hf,cuda,text-generation,main,,False,False,1.13.2,,4.34.1,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.0328,30.5,15153,13738,13767,13.5,37.9,15866,14356,14480
-gptq-batch_size(2)-sequence_length(256)-new_tokens(512),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,2,256,1,80,3000,16000,512,False,True,512,512,False,True,0,1,TheBloke/Llama-2-7B-GPTQ,cuda,text-generation,main,,False,False,1.13.2,,4.34.1,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.0706,28.3,6872,4484,4575,15.4,66.5,8822,5722,6526
-gptq-batch_size(1)-sequence_length(256)-new_tokens(512),pytorch,2.1.0+cu118,optimum_benchmark.backends.pytorch.backend.PyTorchBackend,42,,,False,False,False,False,,float16,True,True,False,,False,False,,False,,inference,optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark,10,10,True,False,1,256,1,80,3000,16000,512,False,True,512,512,False,True,0,1,TheBloke/Llama-2-7B-GPTQ,cuda,text-generation,main,,False,False,1.13.2,,4.34.1,,0.24.1,,,,3.10.12,Linux, AMD EPYC 7742 64-Core Processor,128,540684,"['NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB', 'NVIDIA A100-SXM4-80GB']",0.0458,21.8,6746,4298,4450,14.8,34.6,7606,4916,5309
diff --git a/examples/running-llamas/artifacts/A100-80GB/generate_memory_plot.png b/examples/running-llamas/artifacts/A100-80GB/generate_memory_plot.png
deleted file mode 100644
index 8f5c9e34..00000000
Binary files a/examples/running-llamas/artifacts/A100-80GB/generate_memory_plot.png and /dev/null differ
diff --git a/examples/running-llamas/artifacts/A100-80GB/generate_throughput_plot.png b/examples/running-llamas/artifacts/A100-80GB/generate_throughput_plot.png
deleted file mode 100644
index dafd743e..00000000
Binary files a/examples/running-llamas/artifacts/A100-80GB/generate_throughput_plot.png and /dev/null differ
diff --git a/examples/running-llamas/artifacts/A100-80GB/rich_table.svg b/examples/running-llamas/artifacts/A100-80GB/rich_table.svg
deleted file mode 100644
index 09ae22bf..00000000
--- a/examples/running-llamas/artifacts/A100-80GB/rich_table.svg
+++ /dev/null
@@ -1,163 +0,0 @@
-<svg class="rich-terminal" viewBox="0 0 1836 708.8" xmlns="http://www.w3.org/2000/svg">
-    <!-- Generated with Rich https://www.textualize.io -->
-    <style>
-
-    @font-face {
-        font-family: "Fira Code";
-        src: local("FiraCode-Regular"),
-                url("https://cdnjs.cloudflare.com/ajax/libs/firacode/6.2.0/woff2/FiraCode-Regular.woff2") format("woff2"),
-                url("https://cdnjs.cloudflare.com/ajax/libs/firacode/6.2.0/woff/FiraCode-Regular.woff") format("woff");
-        font-style: normal;
-        font-weight: 400;
-    }
-    @font-face {
-        font-family: "Fira Code";
-        src: local("FiraCode-Bold"),
-                url("https://cdnjs.cloudflare.com/ajax/libs/firacode/6.2.0/woff2/FiraCode-Bold.woff2") format("woff2"),
-                url("https://cdnjs.cloudflare.com/ajax/libs/firacode/6.2.0/woff/FiraCode-Bold.woff") format("woff");
-        font-style: bold;
-        font-weight: 700;
-    }
-
-    .terminal-3945870313-matrix {
-        font-family: Fira Code, monospace;
-        font-size: 20px;
-        line-height: 24.4px;
-        font-variant-east-asian: full-width;
-    }
-
-    .terminal-3945870313-title {
-        font-size: 18px;
-        font-weight: bold;
-        font-family: arial;
-    }
-
-    .terminal-3945870313-r1 { fill: #d9d9d9 }
-.terminal-3945870313-r2 { fill: #d9d9d9;font-weight: bold }
-    </style>
-
-    <defs>
-    <clipPath id="terminal-3945870313-clip-terminal">
-      <rect x="0" y="0" width="1816.8" height="657.8" />
-    </clipPath>
-    <clipPath id="terminal-3945870313-line-0">
-    <rect x="0" y="1.5" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-1">
-    <rect x="0" y="25.9" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-2">
-    <rect x="0" y="50.3" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-3">
-    <rect x="0" y="74.7" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-4">
-    <rect x="0" y="99.1" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-5">
-    <rect x="0" y="123.5" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-6">
-    <rect x="0" y="147.9" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-7">
-    <rect x="0" y="172.3" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-8">
-    <rect x="0" y="196.7" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-9">
-    <rect x="0" y="221.1" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-10">
-    <rect x="0" y="245.5" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-11">
-    <rect x="0" y="269.9" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-12">
-    <rect x="0" y="294.3" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-13">
-    <rect x="0" y="318.7" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-14">
-    <rect x="0" y="343.1" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-15">
-    <rect x="0" y="367.5" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-16">
-    <rect x="0" y="391.9" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-17">
-    <rect x="0" y="416.3" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-18">
-    <rect x="0" y="440.7" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-19">
-    <rect x="0" y="465.1" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-20">
-    <rect x="0" y="489.5" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-21">
-    <rect x="0" y="513.9" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-22">
-    <rect x="0" y="538.3" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-23">
-    <rect x="0" y="562.7" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-24">
-    <rect x="0" y="587.1" width="1817.8" height="24.65"/>
-            </clipPath>
-<clipPath id="terminal-3945870313-line-25">
-    <rect x="0" y="611.5" width="1817.8" height="24.65"/>
-            </clipPath>
-    </defs>
-
-    <rect fill="#0c0c0c" stroke="rgba(255,255,255,0.35)" stroke-width="1" x="1" y="1" width="1834" height="706.8" rx="8"/><text class="terminal-3945870313-title" fill="#d9d9d9" text-anchor="middle" x="917" y="27">Inference&#160;Report</text>
-            <g transform="translate(26,22)">
-            <circle cx="0" cy="0" r="7" fill="#ff5f57"/>
-            <circle cx="22" cy="0" r="7" fill="#febc2e"/>
-            <circle cx="44" cy="0" r="7" fill="#28c840"/>
-            </g>
-        
-    <g transform="translate(9, 41)" clip-path="url(#terminal-3945870313-clip-terminal)">
-    
-    <g class="terminal-3945870313-matrix">
-    <text class="terminal-3945870313-r1" x="0" y="20" textLength="1817.8" clip-path="url(#terminal-3945870313-line-0)">┏━━━━━━━━━━┳━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┓</text><text class="terminal-3945870313-r1" x="1817.8" y="20" textLength="12.2" clip-path="url(#terminal-3945870313-line-0)">
-</text><text class="terminal-3945870313-r1" x="0" y="44.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-1)">┃</text><text class="terminal-3945870313-r1" x="134.2" y="44.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-1)">┃</text><text class="terminal-3945870313-r1" x="219.6" y="44.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-1)">┃</text><text class="terminal-3945870313-r1" x="353.8" y="44.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-1)">┃</text><text class="terminal-3945870313-r1" x="488" y="44.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-1)">┃</text><text class="terminal-3945870313-r1" x="622.2" y="44.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-1)">┃</text><text class="terminal-3945870313-r2" x="646.6" y="44.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-1)">&#160;Forward</text><text class="terminal-3945870313-r1" x="756.4" y="44.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-1)">┃</text><text class="terminal-3945870313-r2" x="780.8" y="44.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-1)">&#160;Forward</text><text class="terminal-3945870313-r1" x="890.6" y="44.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-1)">┃</text><text class="terminal-3945870313-r2" x="915" y="44.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-1)">&#160;Forward</text><text class="terminal-3945870313-r1" x="1024.8" y="44.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-1)">┃</text><text class="terminal-3945870313-r1" x="1159" y="44.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-1)">┃</text><text class="terminal-3945870313-r2" x="1183.4" y="44.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-1)">Generate</text><text class="terminal-3945870313-r1" x="1293.2" y="44.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-1)">┃</text><text class="terminal-3945870313-r2" x="1317.6" y="44.4" textLength="85.4" clip-path="url(#terminal-3945870313-line-1)">Genera…</text><text class="terminal-3945870313-r1" x="1415.2" y="44.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-1)">┃</text><text class="terminal-3945870313-r2" x="1439.6" y="44.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-1)">Generate</text><text class="terminal-3945870313-r1" x="1549.4" y="44.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-1)">┃</text><text class="terminal-3945870313-r1" x="1671.4" y="44.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-1)">┃</text><text class="terminal-3945870313-r1" x="1805.6" y="44.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-1)">┃</text><text class="terminal-3945870313-r1" x="1817.8" y="44.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-1)">
-</text><text class="terminal-3945870313-r1" x="0" y="68.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-2)">┃</text><text class="terminal-3945870313-r1" x="134.2" y="68.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-2)">┃</text><text class="terminal-3945870313-r1" x="219.6" y="68.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-2)">┃</text><text class="terminal-3945870313-r1" x="353.8" y="68.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-2)">┃</text><text class="terminal-3945870313-r1" x="488" y="68.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-2)">┃</text><text class="terminal-3945870313-r1" x="622.2" y="68.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-2)">┃</text><text class="terminal-3945870313-r2" x="646.6" y="68.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-2)">&#160;&#160;&#160;&#160;&#160;Max</text><text class="terminal-3945870313-r1" x="756.4" y="68.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-2)">┃</text><text class="terminal-3945870313-r2" x="780.8" y="68.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-2)">&#160;&#160;&#160;&#160;&#160;Max</text><text class="terminal-3945870313-r1" x="890.6" y="68.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-2)">┃</text><text class="terminal-3945870313-r2" x="915" y="68.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-2)">&#160;&#160;&#160;&#160;&#160;Max</text><text class="terminal-3945870313-r1" x="1024.8" y="68.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-2)">┃</text><text class="terminal-3945870313-r1" x="1159" y="68.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-2)">┃</text><text class="terminal-3945870313-r2" x="1183.4" y="68.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-2)">&#160;&#160;&#160;&#160;&#160;Max</text><text class="terminal-3945870313-r1" x="1293.2" y="68.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-2)">┃</text><text class="terminal-3945870313-r2" x="1317.6" y="68.8" textLength="85.4" clip-path="url(#terminal-3945870313-line-2)">&#160;&#160;&#160;&#160;Max</text><text class="terminal-3945870313-r1" x="1415.2" y="68.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-2)">┃</text><text class="terminal-3945870313-r2" x="1439.6" y="68.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-2)">&#160;&#160;&#160;&#160;&#160;Max</text><text class="terminal-3945870313-r1" x="1549.4" y="68.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-2)">┃</text><text class="terminal-3945870313-r1" x="1671.4" y="68.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-2)">┃</text><text class="terminal-3945870313-r1" x="1805.6" y="68.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-2)">┃</text><text class="terminal-3945870313-r1" x="1817.8" y="68.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-2)">
-</text><text class="terminal-3945870313-r1" x="0" y="93.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-3)">┃</text><text class="terminal-3945870313-r1" x="134.2" y="93.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-3)">┃</text><text class="terminal-3945870313-r1" x="219.6" y="93.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-3)">┃</text><text class="terminal-3945870313-r1" x="353.8" y="93.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-3)">┃</text><text class="terminal-3945870313-r2" x="378.2" y="93.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-3)">&#160;Forward</text><text class="terminal-3945870313-r1" x="488" y="93.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-3)">┃</text><text class="terminal-3945870313-r2" x="512.4" y="93.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-3)">&#160;Forward</text><text class="terminal-3945870313-r1" x="622.2" y="93.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-3)">┃</text><text class="terminal-3945870313-r2" x="646.6" y="93.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-3)">&#160;&#160;Memory</text><text class="terminal-3945870313-r1" x="756.4" y="93.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-3)">┃</text><text class="terminal-3945870313-r2" x="780.8" y="93.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-3)">&#160;&#160;Memory</text><text class="terminal-3945870313-r1" x="890.6" y="93.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-3)">┃</text><text class="terminal-3945870313-r2" x="915" y="93.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-3)">&#160;&#160;Memory</text><text class="terminal-3945870313-r1" x="1024.8" y="93.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-3)">┃</text><text class="terminal-3945870313-r2" x="1049.2" y="93.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-3)">Generate</text><text class="terminal-3945870313-r1" x="1159" y="93.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-3)">┃</text><text class="terminal-3945870313-r2" x="1183.4" y="93.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-3)">&#160;&#160;Memory</text><text class="terminal-3945870313-r1" x="1293.2" y="93.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-3)">┃</text><text class="terminal-3945870313-r2" x="1317.6" y="93.2" textLength="85.4" clip-path="url(#terminal-3945870313-line-3)">&#160;Memory</text><text class="terminal-3945870313-r1" x="1415.2" y="93.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-3)">┃</text><text class="terminal-3945870313-r2" x="1439.6" y="93.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-3)">&#160;&#160;Memory</text><text class="terminal-3945870313-r1" x="1549.4" y="93.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-3)">┃</text><text class="terminal-3945870313-r1" x="1671.4" y="93.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-3)">┃</text><text class="terminal-3945870313-r1" x="1805.6" y="93.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-3)">┃</text><text class="terminal-3945870313-r1" x="1817.8" y="93.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-3)">
-</text><text class="terminal-3945870313-r1" x="0" y="117.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-4)">┃</text><text class="terminal-3945870313-r2" x="24.4" y="117.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-4)">Experim…</text><text class="terminal-3945870313-r1" x="134.2" y="117.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-4)">┃</text><text class="terminal-3945870313-r1" x="219.6" y="117.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-4)">┃</text><text class="terminal-3945870313-r2" x="244" y="117.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-4)">&#160;&#160;&#160;Batch</text><text class="terminal-3945870313-r1" x="353.8" y="117.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-4)">┃</text><text class="terminal-3945870313-r2" x="378.2" y="117.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-4)">&#160;Latency</text><text class="terminal-3945870313-r1" x="488" y="117.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-4)">┃</text><text class="terminal-3945870313-r2" x="512.4" y="117.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-4)">Through…</text><text class="terminal-3945870313-r1" x="622.2" y="117.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-4)">┃</text><text class="terminal-3945870313-r2" x="646.6" y="117.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-4)">&#160;&#160;&#160;&#160;Used</text><text class="terminal-3945870313-r1" x="756.4" y="117.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-4)">┃</text><text class="terminal-3945870313-r2" x="780.8" y="117.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-4)">Allocat…</text><text class="terminal-3945870313-r1" x="890.6" y="117.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-4)">┃</text><text class="terminal-3945870313-r2" x="915" y="117.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-4)">Reserved</text><text class="terminal-3945870313-r1" x="1024.8" y="117.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-4)">┃</text><text class="terminal-3945870313-r2" x="1049.2" y="117.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-4)">Through…</text><text class="terminal-3945870313-r1" x="1159" y="117.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-4)">┃</text><text class="terminal-3945870313-r2" x="1183.4" y="117.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-4)">&#160;&#160;&#160;&#160;Used</text><text class="terminal-3945870313-r1" x="1293.2" y="117.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-4)">┃</text><text class="terminal-3945870313-r2" x="1317.6" y="117.6" textLength="85.4" clip-path="url(#terminal-3945870313-line-4)">Alloca…</text><text class="terminal-3945870313-r1" x="1415.2" y="117.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-4)">┃</text><text class="terminal-3945870313-r2" x="1439.6" y="117.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-4)">Reserved</text><text class="terminal-3945870313-r1" x="1549.4" y="117.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-4)">┃</text><text class="terminal-3945870313-r2" x="1573.8" y="117.6" textLength="85.4" clip-path="url(#terminal-3945870313-line-4)">Quanti…</text><text class="terminal-3945870313-r1" x="1671.4" y="117.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-4)">┃</text><text class="terminal-3945870313-r1" x="1805.6" y="117.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-4)">┃</text><text class="terminal-3945870313-r1" x="1817.8" y="117.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-4)">
-</text><text class="terminal-3945870313-r1" x="0" y="142" textLength="12.2" clip-path="url(#terminal-3945870313-line-5)">┃</text><text class="terminal-3945870313-r2" x="24.4" y="142" textLength="97.6" clip-path="url(#terminal-3945870313-line-5)">Name&#160;&#160;&#160;&#160;</text><text class="terminal-3945870313-r1" x="134.2" y="142" textLength="12.2" clip-path="url(#terminal-3945870313-line-5)">┃</text><text class="terminal-3945870313-r2" x="158.6" y="142" textLength="48.8" clip-path="url(#terminal-3945870313-line-5)">&#160;GPU</text><text class="terminal-3945870313-r1" x="219.6" y="142" textLength="12.2" clip-path="url(#terminal-3945870313-line-5)">┃</text><text class="terminal-3945870313-r2" x="244" y="142" textLength="97.6" clip-path="url(#terminal-3945870313-line-5)">&#160;&#160;&#160;&#160;Size</text><text class="terminal-3945870313-r1" x="353.8" y="142" textLength="12.2" clip-path="url(#terminal-3945870313-line-5)">┃</text><text class="terminal-3945870313-r2" x="378.2" y="142" textLength="97.6" clip-path="url(#terminal-3945870313-line-5)">&#160;&#160;&#160;&#160;&#160;(s)</text><text class="terminal-3945870313-r1" x="488" y="142" textLength="12.2" clip-path="url(#terminal-3945870313-line-5)">┃</text><text class="terminal-3945870313-r2" x="512.4" y="142" textLength="97.6" clip-path="url(#terminal-3945870313-line-5)">(sample…</text><text class="terminal-3945870313-r1" x="622.2" y="142" textLength="12.2" clip-path="url(#terminal-3945870313-line-5)">┃</text><text class="terminal-3945870313-r2" x="646.6" y="142" textLength="97.6" clip-path="url(#terminal-3945870313-line-5)">&#160;&#160;&#160;&#160;(MB)</text><text class="terminal-3945870313-r1" x="756.4" y="142" textLength="12.2" clip-path="url(#terminal-3945870313-line-5)">┃</text><text class="terminal-3945870313-r2" x="780.8" y="142" textLength="97.6" clip-path="url(#terminal-3945870313-line-5)">&#160;&#160;&#160;&#160;(MB)</text><text class="terminal-3945870313-r1" x="890.6" y="142" textLength="12.2" clip-path="url(#terminal-3945870313-line-5)">┃</text><text class="terminal-3945870313-r2" x="915" y="142" textLength="97.6" clip-path="url(#terminal-3945870313-line-5)">&#160;&#160;&#160;&#160;(MB)</text><text class="terminal-3945870313-r1" x="1024.8" y="142" textLength="12.2" clip-path="url(#terminal-3945870313-line-5)">┃</text><text class="terminal-3945870313-r2" x="1049.2" y="142" textLength="97.6" clip-path="url(#terminal-3945870313-line-5)">(tokens…</text><text class="terminal-3945870313-r1" x="1159" y="142" textLength="12.2" clip-path="url(#terminal-3945870313-line-5)">┃</text><text class="terminal-3945870313-r2" x="1183.4" y="142" textLength="97.6" clip-path="url(#terminal-3945870313-line-5)">&#160;&#160;&#160;&#160;(MB)</text><text class="terminal-3945870313-r1" x="1293.2" y="142" textLength="12.2" clip-path="url(#terminal-3945870313-line-5)">┃</text><text class="terminal-3945870313-r2" x="1317.6" y="142" textLength="85.4" clip-path="url(#terminal-3945870313-line-5)">&#160;&#160;&#160;(MB)</text><text class="terminal-3945870313-r1" x="1415.2" y="142" textLength="12.2" clip-path="url(#terminal-3945870313-line-5)">┃</text><text class="terminal-3945870313-r2" x="1439.6" y="142" textLength="97.6" clip-path="url(#terminal-3945870313-line-5)">&#160;&#160;&#160;&#160;(MB)</text><text class="terminal-3945870313-r1" x="1549.4" y="142" textLength="12.2" clip-path="url(#terminal-3945870313-line-5)">┃</text><text class="terminal-3945870313-r2" x="1573.8" y="142" textLength="85.4" clip-path="url(#terminal-3945870313-line-5)">&#160;Scheme</text><text class="terminal-3945870313-r1" x="1671.4" y="142" textLength="12.2" clip-path="url(#terminal-3945870313-line-5)">┃</text><text class="terminal-3945870313-r2" x="1695.8" y="142" textLength="97.6" clip-path="url(#terminal-3945870313-line-5)">&#160;&#160;&#160;Group</text><text class="terminal-3945870313-r1" x="1805.6" y="142" textLength="12.2" clip-path="url(#terminal-3945870313-line-5)">┃</text><text class="terminal-3945870313-r1" x="1817.8" y="142" textLength="12.2" clip-path="url(#terminal-3945870313-line-5)">
-</text><text class="terminal-3945870313-r1" x="0" y="166.4" textLength="1817.8" clip-path="url(#terminal-3945870313-line-6)">┡━━━━━━━━━━╇━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━┩</text><text class="terminal-3945870313-r1" x="1817.8" y="166.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-6)">
-</text><text class="terminal-3945870313-r1" x="0" y="190.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-7)">│</text><text class="terminal-3945870313-r1" x="24.4" y="190.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-7)">fp16-ba…</text><text class="terminal-3945870313-r1" x="134.2" y="190.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-7)">│</text><text class="terminal-3945870313-r1" x="158.6" y="190.8" textLength="48.8" clip-path="url(#terminal-3945870313-line-7)">A100</text><text class="terminal-3945870313-r1" x="219.6" y="190.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-7)">│</text><text class="terminal-3945870313-r1" x="244" y="190.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-7)">&#160;&#160;&#160;&#160;&#160;&#160;16</text><text class="terminal-3945870313-r1" x="353.8" y="190.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-7)">│</text><text class="terminal-3945870313-r1" x="378.2" y="190.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-7)">4.02e-01</text><text class="terminal-3945870313-r1" x="488" y="190.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-7)">│</text><text class="terminal-3945870313-r1" x="512.4" y="190.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-7)">&#160;&#160;&#160;39.80</text><text class="terminal-3945870313-r1" x="622.2" y="190.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-7)">│</text><text class="terminal-3945870313-r1" x="646.6" y="190.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-7)">&#160;&#160;&#160;19165</text><text class="terminal-3945870313-r1" x="756.4" y="190.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-7)">│</text><text class="terminal-3945870313-r1" x="780.8" y="190.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-7)">&#160;&#160;&#160;16520</text><text class="terminal-3945870313-r1" x="890.6" y="190.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-7)">│</text><text class="terminal-3945870313-r1" x="915" y="190.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-7)">&#160;&#160;&#160;17779</text><text class="terminal-3945870313-r1" x="1024.8" y="190.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-7)">│</text><text class="terminal-3945870313-r1" x="1049.2" y="190.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-7)">&#160;&#160;471.00</text><text class="terminal-3945870313-r1" x="1159" y="190.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-7)">│</text><text class="terminal-3945870313-r1" x="1183.4" y="190.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-7)">&#160;&#160;&#160;27988</text><text class="terminal-3945870313-r1" x="1293.2" y="190.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-7)">│</text><text class="terminal-3945870313-r1" x="1317.6" y="190.8" textLength="85.4" clip-path="url(#terminal-3945870313-line-7)">&#160;&#160;26442</text><text class="terminal-3945870313-r1" x="1415.2" y="190.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-7)">│</text><text class="terminal-3945870313-r1" x="1439.6" y="190.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-7)">&#160;&#160;&#160;84511</text><text class="terminal-3945870313-r1" x="1549.4" y="190.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-7)">│</text><text class="terminal-3945870313-r1" x="1573.8" y="190.8" textLength="85.4" clip-path="url(#terminal-3945870313-line-7)">&#160;&#160;&#160;fp16</text><text class="terminal-3945870313-r1" x="1671.4" y="190.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-7)">│</text><text class="terminal-3945870313-r1" x="1695.8" y="190.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-7)">A100-fp…</text><text class="terminal-3945870313-r1" x="1805.6" y="190.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-7)">│</text><text class="terminal-3945870313-r1" x="1817.8" y="190.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-7)">
-</text><text class="terminal-3945870313-r1" x="0" y="215.2" textLength="1817.8" clip-path="url(#terminal-3945870313-line-8)">├──────────┼──────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼─────────┼──────────┼─────────┼──────────┤</text><text class="terminal-3945870313-r1" x="1817.8" y="215.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-8)">
-</text><text class="terminal-3945870313-r1" x="0" y="239.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-9)">│</text><text class="terminal-3945870313-r1" x="24.4" y="239.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-9)">fp16-ba…</text><text class="terminal-3945870313-r1" x="134.2" y="239.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-9)">│</text><text class="terminal-3945870313-r1" x="158.6" y="239.6" textLength="48.8" clip-path="url(#terminal-3945870313-line-9)">A100</text><text class="terminal-3945870313-r1" x="219.6" y="239.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-9)">│</text><text class="terminal-3945870313-r1" x="244" y="239.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-9)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;8</text><text class="terminal-3945870313-r1" x="353.8" y="239.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-9)">│</text><text class="terminal-3945870313-r1" x="378.2" y="239.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-9)">2.04e-01</text><text class="terminal-3945870313-r1" x="488" y="239.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-9)">│</text><text class="terminal-3945870313-r1" x="512.4" y="239.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-9)">&#160;&#160;&#160;39.20</text><text class="terminal-3945870313-r1" x="622.2" y="239.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-9)">│</text><text class="terminal-3945870313-r1" x="646.6" y="239.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-9)">&#160;&#160;&#160;17087</text><text class="terminal-3945870313-r1" x="756.4" y="239.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-9)">│</text><text class="terminal-3945870313-r1" x="780.8" y="239.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-9)">&#160;&#160;&#160;15037</text><text class="terminal-3945870313-r1" x="890.6" y="239.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-9)">│</text><text class="terminal-3945870313-r1" x="915" y="239.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-9)">&#160;&#160;&#160;15701</text><text class="terminal-3945870313-r1" x="1024.8" y="239.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-9)">│</text><text class="terminal-3945870313-r1" x="1049.2" y="239.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-9)">&#160;&#160;290.00</text><text class="terminal-3945870313-r1" x="1159" y="239.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-9)">│</text><text class="terminal-3945870313-r1" x="1183.4" y="239.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-9)">&#160;&#160;&#160;64889</text><text class="terminal-3945870313-r1" x="1293.2" y="239.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-9)">│</text><text class="terminal-3945870313-r1" x="1317.6" y="239.6" textLength="85.4" clip-path="url(#terminal-3945870313-line-9)">&#160;&#160;19997</text><text class="terminal-3945870313-r1" x="1415.2" y="239.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-9)">│</text><text class="terminal-3945870313-r1" x="1439.6" y="239.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-9)">&#160;&#160;&#160;63503</text><text class="terminal-3945870313-r1" x="1549.4" y="239.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-9)">│</text><text class="terminal-3945870313-r1" x="1573.8" y="239.6" textLength="85.4" clip-path="url(#terminal-3945870313-line-9)">&#160;&#160;&#160;fp16</text><text class="terminal-3945870313-r1" x="1671.4" y="239.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-9)">│</text><text class="terminal-3945870313-r1" x="1695.8" y="239.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-9)">A100-fp…</text><text class="terminal-3945870313-r1" x="1805.6" y="239.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-9)">│</text><text class="terminal-3945870313-r1" x="1817.8" y="239.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-9)">
-</text><text class="terminal-3945870313-r1" x="0" y="264" textLength="1817.8" clip-path="url(#terminal-3945870313-line-10)">├──────────┼──────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼─────────┼──────────┼─────────┼──────────┤</text><text class="terminal-3945870313-r1" x="1817.8" y="264" textLength="12.2" clip-path="url(#terminal-3945870313-line-10)">
-</text><text class="terminal-3945870313-r1" x="0" y="288.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-11)">│</text><text class="terminal-3945870313-r1" x="24.4" y="288.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-11)">gptq-ba…</text><text class="terminal-3945870313-r1" x="134.2" y="288.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-11)">│</text><text class="terminal-3945870313-r1" x="158.6" y="288.4" textLength="48.8" clip-path="url(#terminal-3945870313-line-11)">A100</text><text class="terminal-3945870313-r1" x="219.6" y="288.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-11)">│</text><text class="terminal-3945870313-r1" x="244" y="288.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-11)">&#160;&#160;&#160;&#160;&#160;&#160;16</text><text class="terminal-3945870313-r1" x="353.8" y="288.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-11)">│</text><text class="terminal-3945870313-r1" x="378.2" y="288.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-11)">4.15e-01</text><text class="terminal-3945870313-r1" x="488" y="288.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-11)">│</text><text class="terminal-3945870313-r1" x="512.4" y="288.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-11)">&#160;&#160;&#160;38.60</text><text class="terminal-3945870313-r1" x="622.2" y="288.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-11)">│</text><text class="terminal-3945870313-r1" x="646.6" y="288.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-11)">&#160;&#160;&#160;10900</text><text class="terminal-3945870313-r1" x="756.4" y="288.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-11)">│</text><text class="terminal-3945870313-r1" x="780.8" y="288.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-11)">&#160;&#160;&#160;&#160;7080</text><text class="terminal-3945870313-r1" x="890.6" y="288.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-11)">│</text><text class="terminal-3945870313-r1" x="915" y="288.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-11)">&#160;&#160;&#160;&#160;8604</text><text class="terminal-3945870313-r1" x="1024.8" y="288.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-11)">│</text><text class="terminal-3945870313-r1" x="1049.2" y="288.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-11)">&#160;&#160;333.00</text><text class="terminal-3945870313-r1" x="1159" y="288.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-11)">│</text><text class="terminal-3945870313-r1" x="1183.4" y="288.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-11)">&#160;&#160;&#160;65676</text><text class="terminal-3945870313-r1" x="1293.2" y="288.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-11)">│</text><text class="terminal-3945870313-r1" x="1317.6" y="288.4" textLength="85.4" clip-path="url(#terminal-3945870313-line-11)">&#160;&#160;17002</text><text class="terminal-3945870313-r1" x="1415.2" y="288.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-11)">│</text><text class="terminal-3945870313-r1" x="1439.6" y="288.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-11)">&#160;&#160;&#160;83596</text><text class="terminal-3945870313-r1" x="1549.4" y="288.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-11)">│</text><text class="terminal-3945870313-r1" x="1573.8" y="288.4" textLength="85.4" clip-path="url(#terminal-3945870313-line-11)">&#160;&#160;&#160;GPTQ</text><text class="terminal-3945870313-r1" x="1671.4" y="288.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-11)">│</text><text class="terminal-3945870313-r1" x="1695.8" y="288.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-11)">A100-GP…</text><text class="terminal-3945870313-r1" x="1805.6" y="288.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-11)">│</text><text class="terminal-3945870313-r1" x="1817.8" y="288.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-11)">
-</text><text class="terminal-3945870313-r1" x="0" y="312.8" textLength="1817.8" clip-path="url(#terminal-3945870313-line-12)">├──────────┼──────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼─────────┼──────────┼─────────┼──────────┤</text><text class="terminal-3945870313-r1" x="1817.8" y="312.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-12)">
-</text><text class="terminal-3945870313-r1" x="0" y="337.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-13)">│</text><text class="terminal-3945870313-r1" x="24.4" y="337.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-13)">fp16-ba…</text><text class="terminal-3945870313-r1" x="134.2" y="337.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-13)">│</text><text class="terminal-3945870313-r1" x="158.6" y="337.2" textLength="48.8" clip-path="url(#terminal-3945870313-line-13)">A100</text><text class="terminal-3945870313-r1" x="219.6" y="337.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-13)">│</text><text class="terminal-3945870313-r1" x="244" y="337.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-13)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;4</text><text class="terminal-3945870313-r1" x="353.8" y="337.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-13)">│</text><text class="terminal-3945870313-r1" x="378.2" y="337.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-13)">1.07e-01</text><text class="terminal-3945870313-r1" x="488" y="337.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-13)">│</text><text class="terminal-3945870313-r1" x="512.4" y="337.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-13)">&#160;&#160;&#160;37.40</text><text class="terminal-3945870313-r1" x="622.2" y="337.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-13)">│</text><text class="terminal-3945870313-r1" x="646.6" y="337.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-13)">&#160;&#160;&#160;16022</text><text class="terminal-3945870313-r1" x="756.4" y="337.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-13)">│</text><text class="terminal-3945870313-r1" x="780.8" y="337.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-13)">&#160;&#160;&#160;14295</text><text class="terminal-3945870313-r1" x="890.6" y="337.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-13)">│</text><text class="terminal-3945870313-r1" x="915" y="337.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-13)">&#160;&#160;&#160;14636</text><text class="terminal-3945870313-r1" x="1024.8" y="337.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-13)">│</text><text class="terminal-3945870313-r1" x="1049.2" y="337.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-13)">&#160;&#160;147.00</text><text class="terminal-3945870313-r1" x="1159" y="337.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-13)">│</text><text class="terminal-3945870313-r1" x="1183.4" y="337.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-13)">&#160;&#160;&#160;26346</text><text class="terminal-3945870313-r1" x="1293.2" y="337.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-13)">│</text><text class="terminal-3945870313-r1" x="1317.6" y="337.2" textLength="85.4" clip-path="url(#terminal-3945870313-line-13)">&#160;&#160;16774</text><text class="terminal-3945870313-r1" x="1415.2" y="337.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-13)">│</text><text class="terminal-3945870313-r1" x="1439.6" y="337.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-13)">&#160;&#160;&#160;24960</text><text class="terminal-3945870313-r1" x="1549.4" y="337.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-13)">│</text><text class="terminal-3945870313-r1" x="1573.8" y="337.2" textLength="85.4" clip-path="url(#terminal-3945870313-line-13)">&#160;&#160;&#160;fp16</text><text class="terminal-3945870313-r1" x="1671.4" y="337.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-13)">│</text><text class="terminal-3945870313-r1" x="1695.8" y="337.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-13)">A100-fp…</text><text class="terminal-3945870313-r1" x="1805.6" y="337.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-13)">│</text><text class="terminal-3945870313-r1" x="1817.8" y="337.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-13)">
-</text><text class="terminal-3945870313-r1" x="0" y="361.6" textLength="1817.8" clip-path="url(#terminal-3945870313-line-14)">├──────────┼──────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼─────────┼──────────┼─────────┼──────────┤</text><text class="terminal-3945870313-r1" x="1817.8" y="361.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-14)">
-</text><text class="terminal-3945870313-r1" x="0" y="386" textLength="12.2" clip-path="url(#terminal-3945870313-line-15)">│</text><text class="terminal-3945870313-r1" x="24.4" y="386" textLength="97.6" clip-path="url(#terminal-3945870313-line-15)">gptq-ba…</text><text class="terminal-3945870313-r1" x="134.2" y="386" textLength="12.2" clip-path="url(#terminal-3945870313-line-15)">│</text><text class="terminal-3945870313-r1" x="158.6" y="386" textLength="48.8" clip-path="url(#terminal-3945870313-line-15)">A100</text><text class="terminal-3945870313-r1" x="219.6" y="386" textLength="12.2" clip-path="url(#terminal-3945870313-line-15)">│</text><text class="terminal-3945870313-r1" x="244" y="386" textLength="97.6" clip-path="url(#terminal-3945870313-line-15)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;8</text><text class="terminal-3945870313-r1" x="353.8" y="386" textLength="12.2" clip-path="url(#terminal-3945870313-line-15)">│</text><text class="terminal-3945870313-r1" x="378.2" y="386" textLength="97.6" clip-path="url(#terminal-3945870313-line-15)">2.23e-01</text><text class="terminal-3945870313-r1" x="488" y="386" textLength="12.2" clip-path="url(#terminal-3945870313-line-15)">│</text><text class="terminal-3945870313-r1" x="512.4" y="386" textLength="97.6" clip-path="url(#terminal-3945870313-line-15)">&#160;&#160;&#160;35.90</text><text class="terminal-3945870313-r1" x="622.2" y="386" textLength="12.2" clip-path="url(#terminal-3945870313-line-15)">│</text><text class="terminal-3945870313-r1" x="646.6" y="386" textLength="97.6" clip-path="url(#terminal-3945870313-line-15)">&#160;&#160;&#160;&#160;8826</text><text class="terminal-3945870313-r1" x="756.4" y="386" textLength="12.2" clip-path="url(#terminal-3945870313-line-15)">│</text><text class="terminal-3945870313-r1" x="780.8" y="386" textLength="97.6" clip-path="url(#terminal-3945870313-line-15)">&#160;&#160;&#160;&#160;5597</text><text class="terminal-3945870313-r1" x="890.6" y="386" textLength="12.2" clip-path="url(#terminal-3945870313-line-15)">│</text><text class="terminal-3945870313-r1" x="915" y="386" textLength="97.6" clip-path="url(#terminal-3945870313-line-15)">&#160;&#160;&#160;&#160;6530</text><text class="terminal-3945870313-r1" x="1024.8" y="386" textLength="12.2" clip-path="url(#terminal-3945870313-line-15)">│</text><text class="terminal-3945870313-r1" x="1049.2" y="386" textLength="97.6" clip-path="url(#terminal-3945870313-line-15)">&#160;&#160;206.00</text><text class="terminal-3945870313-r1" x="1159" y="386" textLength="12.2" clip-path="url(#terminal-3945870313-line-15)">│</text><text class="terminal-3945870313-r1" x="1183.4" y="386" textLength="97.6" clip-path="url(#terminal-3945870313-line-15)">&#160;&#160;&#160;56629</text><text class="terminal-3945870313-r1" x="1293.2" y="386" textLength="12.2" clip-path="url(#terminal-3945870313-line-15)">│</text><text class="terminal-3945870313-r1" x="1317.6" y="386" textLength="85.4" clip-path="url(#terminal-3945870313-line-15)">&#160;&#160;10557</text><text class="terminal-3945870313-r1" x="1415.2" y="386" textLength="12.2" clip-path="url(#terminal-3945870313-line-15)">│</text><text class="terminal-3945870313-r1" x="1439.6" y="386" textLength="97.6" clip-path="url(#terminal-3945870313-line-15)">&#160;&#160;&#160;54333</text><text class="terminal-3945870313-r1" x="1549.4" y="386" textLength="12.2" clip-path="url(#terminal-3945870313-line-15)">│</text><text class="terminal-3945870313-r1" x="1573.8" y="386" textLength="85.4" clip-path="url(#terminal-3945870313-line-15)">&#160;&#160;&#160;GPTQ</text><text class="terminal-3945870313-r1" x="1671.4" y="386" textLength="12.2" clip-path="url(#terminal-3945870313-line-15)">│</text><text class="terminal-3945870313-r1" x="1695.8" y="386" textLength="97.6" clip-path="url(#terminal-3945870313-line-15)">A100-GP…</text><text class="terminal-3945870313-r1" x="1805.6" y="386" textLength="12.2" clip-path="url(#terminal-3945870313-line-15)">│</text><text class="terminal-3945870313-r1" x="1817.8" y="386" textLength="12.2" clip-path="url(#terminal-3945870313-line-15)">
-</text><text class="terminal-3945870313-r1" x="0" y="410.4" textLength="1817.8" clip-path="url(#terminal-3945870313-line-16)">├──────────┼──────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼─────────┼──────────┼─────────┼──────────┤</text><text class="terminal-3945870313-r1" x="1817.8" y="410.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-16)">
-</text><text class="terminal-3945870313-r1" x="0" y="434.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-17)">│</text><text class="terminal-3945870313-r1" x="24.4" y="434.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-17)">fp16-ba…</text><text class="terminal-3945870313-r1" x="134.2" y="434.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-17)">│</text><text class="terminal-3945870313-r1" x="158.6" y="434.8" textLength="48.8" clip-path="url(#terminal-3945870313-line-17)">A100</text><text class="terminal-3945870313-r1" x="219.6" y="434.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-17)">│</text><text class="terminal-3945870313-r1" x="244" y="434.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-17)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;2</text><text class="terminal-3945870313-r1" x="353.8" y="434.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-17)">│</text><text class="terminal-3945870313-r1" x="378.2" y="434.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-17)">5.79e-02</text><text class="terminal-3945870313-r1" x="488" y="434.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-17)">│</text><text class="terminal-3945870313-r1" x="512.4" y="434.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-17)">&#160;&#160;&#160;34.50</text><text class="terminal-3945870313-r1" x="622.2" y="434.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-17)">│</text><text class="terminal-3945870313-r1" x="646.6" y="434.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-17)">&#160;&#160;&#160;15392</text><text class="terminal-3945870313-r1" x="756.4" y="434.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-17)">│</text><text class="terminal-3945870313-r1" x="780.8" y="434.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-17)">&#160;&#160;&#160;13924</text><text class="terminal-3945870313-r1" x="890.6" y="434.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-17)">│</text><text class="terminal-3945870313-r1" x="915" y="434.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-17)">&#160;&#160;&#160;14006</text><text class="terminal-3945870313-r1" x="1024.8" y="434.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-17)">│</text><text class="terminal-3945870313-r1" x="1049.2" y="434.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-17)">&#160;&#160;&#160;75.30</text><text class="terminal-3945870313-r1" x="1159" y="434.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-17)">│</text><text class="terminal-3945870313-r1" x="1183.4" y="434.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-17)">&#160;&#160;&#160;17003</text><text class="terminal-3945870313-r1" x="1293.2" y="434.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-17)">│</text><text class="terminal-3945870313-r1" x="1317.6" y="434.8" textLength="85.4" clip-path="url(#terminal-3945870313-line-17)">&#160;&#160;15162</text><text class="terminal-3945870313-r1" x="1415.2" y="434.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-17)">│</text><text class="terminal-3945870313-r1" x="1439.6" y="434.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-17)">&#160;&#160;&#160;15617</text><text class="terminal-3945870313-r1" x="1549.4" y="434.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-17)">│</text><text class="terminal-3945870313-r1" x="1573.8" y="434.8" textLength="85.4" clip-path="url(#terminal-3945870313-line-17)">&#160;&#160;&#160;fp16</text><text class="terminal-3945870313-r1" x="1671.4" y="434.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-17)">│</text><text class="terminal-3945870313-r1" x="1695.8" y="434.8" textLength="97.6" clip-path="url(#terminal-3945870313-line-17)">A100-fp…</text><text class="terminal-3945870313-r1" x="1805.6" y="434.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-17)">│</text><text class="terminal-3945870313-r1" x="1817.8" y="434.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-17)">
-</text><text class="terminal-3945870313-r1" x="0" y="459.2" textLength="1817.8" clip-path="url(#terminal-3945870313-line-18)">├──────────┼──────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼─────────┼──────────┼─────────┼──────────┤</text><text class="terminal-3945870313-r1" x="1817.8" y="459.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-18)">
-</text><text class="terminal-3945870313-r1" x="0" y="483.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-19)">│</text><text class="terminal-3945870313-r1" x="24.4" y="483.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-19)">gptq-ba…</text><text class="terminal-3945870313-r1" x="134.2" y="483.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-19)">│</text><text class="terminal-3945870313-r1" x="158.6" y="483.6" textLength="48.8" clip-path="url(#terminal-3945870313-line-19)">A100</text><text class="terminal-3945870313-r1" x="219.6" y="483.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-19)">│</text><text class="terminal-3945870313-r1" x="244" y="483.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-19)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;4</text><text class="terminal-3945870313-r1" x="353.8" y="483.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-19)">│</text><text class="terminal-3945870313-r1" x="378.2" y="483.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-19)">1.22e-01</text><text class="terminal-3945870313-r1" x="488" y="483.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-19)">│</text><text class="terminal-3945870313-r1" x="512.4" y="483.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-19)">&#160;&#160;&#160;32.80</text><text class="terminal-3945870313-r1" x="622.2" y="483.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-19)">│</text><text class="terminal-3945870313-r1" x="646.6" y="483.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-19)">&#160;&#160;&#160;&#160;7761</text><text class="terminal-3945870313-r1" x="756.4" y="483.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-19)">│</text><text class="terminal-3945870313-r1" x="780.8" y="483.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-19)">&#160;&#160;&#160;&#160;4855</text><text class="terminal-3945870313-r1" x="890.6" y="483.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-19)">│</text><text class="terminal-3945870313-r1" x="915" y="483.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-19)">&#160;&#160;&#160;&#160;5465</text><text class="terminal-3945870313-r1" x="1024.8" y="483.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-19)">│</text><text class="terminal-3945870313-r1" x="1049.2" y="483.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-19)">&#160;&#160;134.00</text><text class="terminal-3945870313-r1" x="1159" y="483.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-19)">│</text><text class="terminal-3945870313-r1" x="1183.4" y="483.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-19)">&#160;&#160;&#160;18085</text><text class="terminal-3945870313-r1" x="1293.2" y="483.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-19)">│</text><text class="terminal-3945870313-r1" x="1317.6" y="483.6" textLength="85.4" clip-path="url(#terminal-3945870313-line-19)">&#160;&#160;&#160;7335</text><text class="terminal-3945870313-r1" x="1415.2" y="483.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-19)">│</text><text class="terminal-3945870313-r1" x="1439.6" y="483.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-19)">&#160;&#160;&#160;15789</text><text class="terminal-3945870313-r1" x="1549.4" y="483.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-19)">│</text><text class="terminal-3945870313-r1" x="1573.8" y="483.6" textLength="85.4" clip-path="url(#terminal-3945870313-line-19)">&#160;&#160;&#160;GPTQ</text><text class="terminal-3945870313-r1" x="1671.4" y="483.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-19)">│</text><text class="terminal-3945870313-r1" x="1695.8" y="483.6" textLength="97.6" clip-path="url(#terminal-3945870313-line-19)">A100-GP…</text><text class="terminal-3945870313-r1" x="1805.6" y="483.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-19)">│</text><text class="terminal-3945870313-r1" x="1817.8" y="483.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-19)">
-</text><text class="terminal-3945870313-r1" x="0" y="508" textLength="1817.8" clip-path="url(#terminal-3945870313-line-20)">├──────────┼──────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼─────────┼──────────┼─────────┼──────────┤</text><text class="terminal-3945870313-r1" x="1817.8" y="508" textLength="12.2" clip-path="url(#terminal-3945870313-line-20)">
-</text><text class="terminal-3945870313-r1" x="0" y="532.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-21)">│</text><text class="terminal-3945870313-r1" x="24.4" y="532.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-21)">fp16-ba…</text><text class="terminal-3945870313-r1" x="134.2" y="532.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-21)">│</text><text class="terminal-3945870313-r1" x="158.6" y="532.4" textLength="48.8" clip-path="url(#terminal-3945870313-line-21)">A100</text><text class="terminal-3945870313-r1" x="219.6" y="532.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-21)">│</text><text class="terminal-3945870313-r1" x="244" y="532.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-21)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;1</text><text class="terminal-3945870313-r1" x="353.8" y="532.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-21)">│</text><text class="terminal-3945870313-r1" x="378.2" y="532.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-21)">3.28e-02</text><text class="terminal-3945870313-r1" x="488" y="532.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-21)">│</text><text class="terminal-3945870313-r1" x="512.4" y="532.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-21)">&#160;&#160;&#160;30.50</text><text class="terminal-3945870313-r1" x="622.2" y="532.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-21)">│</text><text class="terminal-3945870313-r1" x="646.6" y="532.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-21)">&#160;&#160;&#160;15153</text><text class="terminal-3945870313-r1" x="756.4" y="532.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-21)">│</text><text class="terminal-3945870313-r1" x="780.8" y="532.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-21)">&#160;&#160;&#160;13738</text><text class="terminal-3945870313-r1" x="890.6" y="532.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-21)">│</text><text class="terminal-3945870313-r1" x="915" y="532.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-21)">&#160;&#160;&#160;13767</text><text class="terminal-3945870313-r1" x="1024.8" y="532.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-21)">│</text><text class="terminal-3945870313-r1" x="1049.2" y="532.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-21)">&#160;&#160;&#160;37.90</text><text class="terminal-3945870313-r1" x="1159" y="532.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-21)">│</text><text class="terminal-3945870313-r1" x="1183.4" y="532.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-21)">&#160;&#160;&#160;15866</text><text class="terminal-3945870313-r1" x="1293.2" y="532.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-21)">│</text><text class="terminal-3945870313-r1" x="1317.6" y="532.4" textLength="85.4" clip-path="url(#terminal-3945870313-line-21)">&#160;&#160;14356</text><text class="terminal-3945870313-r1" x="1415.2" y="532.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-21)">│</text><text class="terminal-3945870313-r1" x="1439.6" y="532.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-21)">&#160;&#160;&#160;14480</text><text class="terminal-3945870313-r1" x="1549.4" y="532.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-21)">│</text><text class="terminal-3945870313-r1" x="1573.8" y="532.4" textLength="85.4" clip-path="url(#terminal-3945870313-line-21)">&#160;&#160;&#160;fp16</text><text class="terminal-3945870313-r1" x="1671.4" y="532.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-21)">│</text><text class="terminal-3945870313-r1" x="1695.8" y="532.4" textLength="97.6" clip-path="url(#terminal-3945870313-line-21)">A100-fp…</text><text class="terminal-3945870313-r1" x="1805.6" y="532.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-21)">│</text><text class="terminal-3945870313-r1" x="1817.8" y="532.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-21)">
-</text><text class="terminal-3945870313-r1" x="0" y="556.8" textLength="1817.8" clip-path="url(#terminal-3945870313-line-22)">├──────────┼──────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼─────────┼──────────┼─────────┼──────────┤</text><text class="terminal-3945870313-r1" x="1817.8" y="556.8" textLength="12.2" clip-path="url(#terminal-3945870313-line-22)">
-</text><text class="terminal-3945870313-r1" x="0" y="581.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-23)">│</text><text class="terminal-3945870313-r1" x="24.4" y="581.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-23)">gptq-ba…</text><text class="terminal-3945870313-r1" x="134.2" y="581.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-23)">│</text><text class="terminal-3945870313-r1" x="158.6" y="581.2" textLength="48.8" clip-path="url(#terminal-3945870313-line-23)">A100</text><text class="terminal-3945870313-r1" x="219.6" y="581.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-23)">│</text><text class="terminal-3945870313-r1" x="244" y="581.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-23)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;2</text><text class="terminal-3945870313-r1" x="353.8" y="581.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-23)">│</text><text class="terminal-3945870313-r1" x="378.2" y="581.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-23)">7.06e-02</text><text class="terminal-3945870313-r1" x="488" y="581.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-23)">│</text><text class="terminal-3945870313-r1" x="512.4" y="581.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-23)">&#160;&#160;&#160;28.30</text><text class="terminal-3945870313-r1" x="622.2" y="581.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-23)">│</text><text class="terminal-3945870313-r1" x="646.6" y="581.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-23)">&#160;&#160;&#160;&#160;6872</text><text class="terminal-3945870313-r1" x="756.4" y="581.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-23)">│</text><text class="terminal-3945870313-r1" x="780.8" y="581.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-23)">&#160;&#160;&#160;&#160;4484</text><text class="terminal-3945870313-r1" x="890.6" y="581.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-23)">│</text><text class="terminal-3945870313-r1" x="915" y="581.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-23)">&#160;&#160;&#160;&#160;4575</text><text class="terminal-3945870313-r1" x="1024.8" y="581.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-23)">│</text><text class="terminal-3945870313-r1" x="1049.2" y="581.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-23)">&#160;&#160;&#160;66.50</text><text class="terminal-3945870313-r1" x="1159" y="581.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-23)">│</text><text class="terminal-3945870313-r1" x="1183.4" y="581.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-23)">&#160;&#160;&#160;&#160;8822</text><text class="terminal-3945870313-r1" x="1293.2" y="581.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-23)">│</text><text class="terminal-3945870313-r1" x="1317.6" y="581.2" textLength="85.4" clip-path="url(#terminal-3945870313-line-23)">&#160;&#160;&#160;5722</text><text class="terminal-3945870313-r1" x="1415.2" y="581.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-23)">│</text><text class="terminal-3945870313-r1" x="1439.6" y="581.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-23)">&#160;&#160;&#160;&#160;6526</text><text class="terminal-3945870313-r1" x="1549.4" y="581.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-23)">│</text><text class="terminal-3945870313-r1" x="1573.8" y="581.2" textLength="85.4" clip-path="url(#terminal-3945870313-line-23)">&#160;&#160;&#160;GPTQ</text><text class="terminal-3945870313-r1" x="1671.4" y="581.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-23)">│</text><text class="terminal-3945870313-r1" x="1695.8" y="581.2" textLength="97.6" clip-path="url(#terminal-3945870313-line-23)">A100-GP…</text><text class="terminal-3945870313-r1" x="1805.6" y="581.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-23)">│</text><text class="terminal-3945870313-r1" x="1817.8" y="581.2" textLength="12.2" clip-path="url(#terminal-3945870313-line-23)">
-</text><text class="terminal-3945870313-r1" x="0" y="605.6" textLength="1817.8" clip-path="url(#terminal-3945870313-line-24)">├──────────┼──────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼─────────┼──────────┼─────────┼──────────┤</text><text class="terminal-3945870313-r1" x="1817.8" y="605.6" textLength="12.2" clip-path="url(#terminal-3945870313-line-24)">
-</text><text class="terminal-3945870313-r1" x="0" y="630" textLength="12.2" clip-path="url(#terminal-3945870313-line-25)">│</text><text class="terminal-3945870313-r1" x="24.4" y="630" textLength="97.6" clip-path="url(#terminal-3945870313-line-25)">gptq-ba…</text><text class="terminal-3945870313-r1" x="134.2" y="630" textLength="12.2" clip-path="url(#terminal-3945870313-line-25)">│</text><text class="terminal-3945870313-r1" x="158.6" y="630" textLength="48.8" clip-path="url(#terminal-3945870313-line-25)">A100</text><text class="terminal-3945870313-r1" x="219.6" y="630" textLength="12.2" clip-path="url(#terminal-3945870313-line-25)">│</text><text class="terminal-3945870313-r1" x="244" y="630" textLength="97.6" clip-path="url(#terminal-3945870313-line-25)">&#160;&#160;&#160;&#160;&#160;&#160;&#160;1</text><text class="terminal-3945870313-r1" x="353.8" y="630" textLength="12.2" clip-path="url(#terminal-3945870313-line-25)">│</text><text class="terminal-3945870313-r1" x="378.2" y="630" textLength="97.6" clip-path="url(#terminal-3945870313-line-25)">4.58e-02</text><text class="terminal-3945870313-r1" x="488" y="630" textLength="12.2" clip-path="url(#terminal-3945870313-line-25)">│</text><text class="terminal-3945870313-r1" x="512.4" y="630" textLength="97.6" clip-path="url(#terminal-3945870313-line-25)">&#160;&#160;&#160;21.80</text><text class="terminal-3945870313-r1" x="622.2" y="630" textLength="12.2" clip-path="url(#terminal-3945870313-line-25)">│</text><text class="terminal-3945870313-r1" x="646.6" y="630" textLength="97.6" clip-path="url(#terminal-3945870313-line-25)">&#160;&#160;&#160;&#160;6746</text><text class="terminal-3945870313-r1" x="756.4" y="630" textLength="12.2" clip-path="url(#terminal-3945870313-line-25)">│</text><text class="terminal-3945870313-r1" x="780.8" y="630" textLength="97.6" clip-path="url(#terminal-3945870313-line-25)">&#160;&#160;&#160;&#160;4298</text><text class="terminal-3945870313-r1" x="890.6" y="630" textLength="12.2" clip-path="url(#terminal-3945870313-line-25)">│</text><text class="terminal-3945870313-r1" x="915" y="630" textLength="97.6" clip-path="url(#terminal-3945870313-line-25)">&#160;&#160;&#160;&#160;4450</text><text class="terminal-3945870313-r1" x="1024.8" y="630" textLength="12.2" clip-path="url(#terminal-3945870313-line-25)">│</text><text class="terminal-3945870313-r1" x="1049.2" y="630" textLength="97.6" clip-path="url(#terminal-3945870313-line-25)">&#160;&#160;&#160;34.60</text><text class="terminal-3945870313-r1" x="1159" y="630" textLength="12.2" clip-path="url(#terminal-3945870313-line-25)">│</text><text class="terminal-3945870313-r1" x="1183.4" y="630" textLength="97.6" clip-path="url(#terminal-3945870313-line-25)">&#160;&#160;&#160;&#160;7606</text><text class="terminal-3945870313-r1" x="1293.2" y="630" textLength="12.2" clip-path="url(#terminal-3945870313-line-25)">│</text><text class="terminal-3945870313-r1" x="1317.6" y="630" textLength="85.4" clip-path="url(#terminal-3945870313-line-25)">&#160;&#160;&#160;4916</text><text class="terminal-3945870313-r1" x="1415.2" y="630" textLength="12.2" clip-path="url(#terminal-3945870313-line-25)">│</text><text class="terminal-3945870313-r1" x="1439.6" y="630" textLength="97.6" clip-path="url(#terminal-3945870313-line-25)">&#160;&#160;&#160;&#160;5309</text><text class="terminal-3945870313-r1" x="1549.4" y="630" textLength="12.2" clip-path="url(#terminal-3945870313-line-25)">│</text><text class="terminal-3945870313-r1" x="1573.8" y="630" textLength="85.4" clip-path="url(#terminal-3945870313-line-25)">&#160;&#160;&#160;GPTQ</text><text class="terminal-3945870313-r1" x="1671.4" y="630" textLength="12.2" clip-path="url(#terminal-3945870313-line-25)">│</text><text class="terminal-3945870313-r1" x="1695.8" y="630" textLength="97.6" clip-path="url(#terminal-3945870313-line-25)">A100-GP…</text><text class="terminal-3945870313-r1" x="1805.6" y="630" textLength="12.2" clip-path="url(#terminal-3945870313-line-25)">│</text><text class="terminal-3945870313-r1" x="1817.8" y="630" textLength="12.2" clip-path="url(#terminal-3945870313-line-25)">
-</text><text class="terminal-3945870313-r1" x="0" y="654.4" textLength="1817.8" clip-path="url(#terminal-3945870313-line-26)">└──────────┴──────┴──────────┴──────────┴──────────┴──────────┴──────────┴──────────┴──────────┴──────────┴─────────┴──────────┴─────────┴──────────┘</text><text class="terminal-3945870313-r1" x="1817.8" y="654.4" textLength="12.2" clip-path="url(#terminal-3945870313-line-26)">
-</text>
-    </g>
-    </g>
-</svg>
diff --git a/examples/running-llamas/artifacts/A100-80GB/short_report.csv b/examples/running-llamas/artifacts/A100-80GB/short_report.csv
deleted file mode 100644
index 6d6dba2d..00000000
--- a/examples/running-llamas/artifacts/A100-80GB/short_report.csv
+++ /dev/null
@@ -1,11 +0,0 @@
-experiment_name,GPU,Batch Size,Forward Latency (s),Forward Throughput (samples/s),Forward Max Memory Used (MB),Forward Max Memory Allocated (MB),Forward Max Memory Reserved (MB),Generate Throughput (tokens/s),Generate Max Memory Used (MB),Generate Max Memory Allocated (MB),Generate Max Memory Reserved (MB),Quantization Scheme,Group
-fp16-batch_size(16)-sequence_length(256)-new_tokens(512),A100,16,0.402,39.8,19165,16520,17779,471.0,27988,26442,84511,fp16,A100-fp16
-fp16-batch_size(8)-sequence_length(256)-new_tokens(512),A100,8,0.204,39.2,17087,15037,15701,290.0,64889,19997,63503,fp16,A100-fp16
-gptq-batch_size(16)-sequence_length(256)-new_tokens(512),A100,16,0.415,38.6,10900,7080,8604,333.0,65676,17002,83596,GPTQ,A100-GPTQ
-fp16-batch_size(4)-sequence_length(256)-new_tokens(512),A100,4,0.107,37.4,16022,14295,14636,147.0,26346,16774,24960,fp16,A100-fp16
-gptq-batch_size(8)-sequence_length(256)-new_tokens(512),A100,8,0.223,35.9,8826,5597,6530,206.0,56629,10557,54333,GPTQ,A100-GPTQ
-fp16-batch_size(2)-sequence_length(256)-new_tokens(512),A100,2,0.0579,34.5,15392,13924,14006,75.3,17003,15162,15617,fp16,A100-fp16
-gptq-batch_size(4)-sequence_length(256)-new_tokens(512),A100,4,0.122,32.8,7761,4855,5465,134.0,18085,7335,15789,GPTQ,A100-GPTQ
-fp16-batch_size(1)-sequence_length(256)-new_tokens(512),A100,1,0.0328,30.5,15153,13738,13767,37.9,15866,14356,14480,fp16,A100-fp16
-gptq-batch_size(2)-sequence_length(256)-new_tokens(512),A100,2,0.0706,28.3,6872,4484,4575,66.5,8822,5722,6526,GPTQ,A100-GPTQ
-gptq-batch_size(1)-sequence_length(256)-new_tokens(512),A100,1,0.0458,21.8,6746,4298,4450,34.6,7606,4916,5309,GPTQ,A100-GPTQ
diff --git a/examples/running-llamas/configs/bnb.yaml b/examples/running-llamas/configs/bnb.yaml
deleted file mode 100644
index 61cf1ebd..00000000
--- a/examples/running-llamas/configs/bnb.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-defaults:
-  - _base_
-  - _self_
-
-experiment_name: bnb-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens})
-
-backend:
-  quantization_scheme: bnb
-  quantization_config:
-    load_in_4bit: true
-    bnb_4bit_compute_dtype: float16
diff --git a/examples/running-llamas/configs/gptq.yaml b/examples/running-llamas/configs/gptq.yaml
deleted file mode 100644
index 3f15bdd5..00000000
--- a/examples/running-llamas/configs/gptq.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-defaults:
-  - _base_
-  - _self_
-
-experiment_name: gptq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens})
-model: TheBloke/Llama-2-7B-GPTQ
diff --git a/examples/running-llamas/configs/_base_.yaml b/examples/running-llamas/configs/single_device/_base_.yaml
similarity index 55%
rename from examples/running-llamas/configs/_base_.yaml
rename to examples/running-llamas/configs/single_device/_base_.yaml
index 23a3eaa4..ce06aa17 100644
--- a/examples/running-llamas/configs/_base_.yaml
+++ b/examples/running-llamas/configs/single_device/_base_.yaml
@@ -1,5 +1,6 @@
 defaults:
   - backend: pytorch # default backend
+  - launcher: process # isolated launcher
   - benchmark: inference # default benchmark
   - experiment # inheriting from experiment config
   - _self_ # for hydra 1.1 compatibility
@@ -8,31 +9,31 @@ defaults:
 
 hydra:
   run:
-    dir: experiments/${experiment_name}
+    dir: experiments/${oc.env:HOSTNAME}/single_device/${model}/${experiment_name}
   sweep:
-    dir: experiments/${experiment_name}
+    dir: experiments/${oc.env:HOSTNAME}/single_device/${model}/${experiment_name}
   job:
     chdir: true
     env_set:
-      CUDA_VISIBLE_DEVICES: 0
+      CUDA_VISIBLE_DEVICES: 2
       CUDA_DEVICE_ORDER: PCI_BUS_ID
   sweeper:
     params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16
+      benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128
 
-experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens})
-model: meta-llama/Llama-2-7b-hf
-device: cuda:0
+experiment_name: ???
+device: cuda
+model: ???
 
 backend:
-  initial_isolation_check: false
-  continous_isolation_check: false
+  continuous_isolation: true
+  isolation_check_interval: 0.1
   torch_dtype: float16
+  no_weights: true
 
 benchmark:
   memory: true
   warmup_runs: 10
-
   new_tokens: 512
   input_shapes:
     sequence_length: 256
diff --git a/examples/running-llamas/configs/single_device/fp16.yaml b/examples/running-llamas/configs/single_device/fp16.yaml
new file mode 100644
index 00000000..c3c39f6a
--- /dev/null
+++ b/examples/running-llamas/configs/single_device/fp16.yaml
@@ -0,0 +1,11 @@
+defaults:
+  - _base_
+  - _self_
+
+hydra:
+  sweeper:
+    params:
+      model: NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf,NousResearch/Llama-2-70b-hf
+
+experiment_name: fp16
+model: llama
diff --git a/examples/running-llamas/configs/single_device/gptq-4bit.yaml b/examples/running-llamas/configs/single_device/gptq-4bit.yaml
new file mode 100644
index 00000000..1c95390c
--- /dev/null
+++ b/examples/running-llamas/configs/single_device/gptq-4bit.yaml
@@ -0,0 +1,6 @@
+defaults:
+  - _base_
+  - _self_
+
+experiment_name: gptq-4bit
+model: TheBloke/Llama-2-7B-GPTQ
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(256)-new_tokens(512)/0/.hydra/config.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(256)-new_tokens(512)/0/.hydra/config.yaml
deleted file mode 100644
index d2cd4143..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(256)-new_tokens(512)/0/.hydra/config.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: ${is_inference:${benchmark.name}}
-  eval_mode: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 1
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens})
-model: meta-llama/Llama-2-7b-hf
-device: cuda:0
-task: ${infer_task:${model}}
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(256)-new_tokens(512)/0/.hydra/hydra.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(256)-new_tokens(512)/0/.hydra/hydra.yaml
deleted file mode 100644
index 41af721d..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(256)-new_tokens(512)/0/.hydra/hydra.yaml
+++ /dev/null
@@ -1,174 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${experiment_name}
-  sweep:
-    dir: experiments/${experiment_name}
-    subdir: ${hydra.job.num}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task:
-    - benchmark.input_shapes.batch_size=1
-  job:
-    name: experiment
-    chdir: true
-    override_dirname: benchmark.input_shapes.batch_size=1
-    id: '0'
-    num: 0
-    config_name: _base_
-    env_set:
-      CUDA_VISIBLE_DEVICES: '3'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/fp16-batch_size(1)-sequence_length(256)-new_tokens(512)/0
-    choices:
-      benchmark: inference
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(256)-new_tokens(512)/0/.hydra/overrides.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(256)-new_tokens(512)/0/.hydra/overrides.yaml
deleted file mode 100644
index 989520ff..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(256)-new_tokens(512)/0/.hydra/overrides.yaml
+++ /dev/null
@@ -1 +0,0 @@
-- benchmark.input_shapes.batch_size=1
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(256)-new_tokens(512)/0/hydra_config.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(256)-new_tokens(512)/0/hydra_config.yaml
deleted file mode 100644
index 6d8478b5..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(256)-new_tokens(512)/0/hydra_config.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-backend:
-  name: pytorch
-  version: 2.1.0+cu118
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: true
-  eval_mode: true
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 1
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: false
-  can_generate: true
-  forward_kwargs: {}
-  generate_kwargs:
-    max_new_tokens: 512
-    min_new_tokens: 512
-    do_sample: false
-    use_cache: true
-    pad_token_id: 0
-    num_beams: 1
-experiment_name: fp16-batch_size(1)-sequence_length(256)-new_tokens(512)
-model: meta-llama/Llama-2-7b-hf
-device: cuda
-task: text-generation
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(256)-new_tokens(512)/0/inference_results.csv b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(256)-new_tokens(512)/0/inference_results.csv
deleted file mode 100644
index 8161550f..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(1)-sequence_length(256)-new_tokens(512)/0/inference_results.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-forward.latency(s),forward.throughput(samples/s),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB)
-0.0328,30.5,15153,13738,13767,13.5,37.9,15866,14356,14480
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(256)-new_tokens(512)/4/.hydra/config.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(256)-new_tokens(512)/4/.hydra/config.yaml
deleted file mode 100644
index 88fed34b..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(256)-new_tokens(512)/4/.hydra/config.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: ${is_inference:${benchmark.name}}
-  eval_mode: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 16
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens})
-model: meta-llama/Llama-2-7b-hf
-device: cuda:0
-task: ${infer_task:${model}}
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(256)-new_tokens(512)/4/.hydra/hydra.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(256)-new_tokens(512)/4/.hydra/hydra.yaml
deleted file mode 100644
index 1b047280..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(256)-new_tokens(512)/4/.hydra/hydra.yaml
+++ /dev/null
@@ -1,174 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${experiment_name}
-  sweep:
-    dir: experiments/${experiment_name}
-    subdir: ${hydra.job.num}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task:
-    - benchmark.input_shapes.batch_size=16
-  job:
-    name: experiment
-    chdir: true
-    override_dirname: benchmark.input_shapes.batch_size=16
-    id: '4'
-    num: 4
-    config_name: _base_
-    env_set:
-      CUDA_VISIBLE_DEVICES: '3'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/fp16-batch_size(16)-sequence_length(256)-new_tokens(512)/4
-    choices:
-      benchmark: inference
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(256)-new_tokens(512)/4/.hydra/overrides.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(256)-new_tokens(512)/4/.hydra/overrides.yaml
deleted file mode 100644
index fdb7f01d..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(256)-new_tokens(512)/4/.hydra/overrides.yaml
+++ /dev/null
@@ -1 +0,0 @@
-- benchmark.input_shapes.batch_size=16
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(256)-new_tokens(512)/4/hydra_config.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(256)-new_tokens(512)/4/hydra_config.yaml
deleted file mode 100644
index 64882a0b..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(256)-new_tokens(512)/4/hydra_config.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-backend:
-  name: pytorch
-  version: 2.1.0+cu118
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: true
-  eval_mode: true
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 16
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: false
-  can_generate: true
-  forward_kwargs: {}
-  generate_kwargs:
-    max_new_tokens: 512
-    min_new_tokens: 512
-    do_sample: false
-    use_cache: true
-    pad_token_id: 0
-    num_beams: 1
-experiment_name: fp16-batch_size(16)-sequence_length(256)-new_tokens(512)
-model: meta-llama/Llama-2-7b-hf
-device: cuda:0
-task: text-generation
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(256)-new_tokens(512)/4/inference_results.csv b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(256)-new_tokens(512)/4/inference_results.csv
deleted file mode 100644
index bad3d7ca..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(16)-sequence_length(256)-new_tokens(512)/4/inference_results.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-forward.latency(s),forward.throughput(samples/s),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB)
-0.402,39.8,19165,16520,17779,17.4,471.0,27988,26442,84511
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(256)-new_tokens(512)/1/.hydra/config.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(256)-new_tokens(512)/1/.hydra/config.yaml
deleted file mode 100644
index f4197a4f..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(256)-new_tokens(512)/1/.hydra/config.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: ${is_inference:${benchmark.name}}
-  eval_mode: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 2
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens})
-model: meta-llama/Llama-2-7b-hf
-device: cuda:0
-task: ${infer_task:${model}}
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(256)-new_tokens(512)/1/.hydra/hydra.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(256)-new_tokens(512)/1/.hydra/hydra.yaml
deleted file mode 100644
index 7ef5862d..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(256)-new_tokens(512)/1/.hydra/hydra.yaml
+++ /dev/null
@@ -1,174 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${experiment_name}
-  sweep:
-    dir: experiments/${experiment_name}
-    subdir: ${hydra.job.num}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task:
-    - benchmark.input_shapes.batch_size=2
-  job:
-    name: experiment
-    chdir: true
-    override_dirname: benchmark.input_shapes.batch_size=2
-    id: '1'
-    num: 1
-    config_name: _base_
-    env_set:
-      CUDA_VISIBLE_DEVICES: '3'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/fp16-batch_size(2)-sequence_length(256)-new_tokens(512)/1
-    choices:
-      benchmark: inference
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(256)-new_tokens(512)/1/.hydra/overrides.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(256)-new_tokens(512)/1/.hydra/overrides.yaml
deleted file mode 100644
index 8211b85f..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(256)-new_tokens(512)/1/.hydra/overrides.yaml
+++ /dev/null
@@ -1 +0,0 @@
-- benchmark.input_shapes.batch_size=2
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(256)-new_tokens(512)/1/hydra_config.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(256)-new_tokens(512)/1/hydra_config.yaml
deleted file mode 100644
index 1de85dc8..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(256)-new_tokens(512)/1/hydra_config.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-backend:
-  name: pytorch
-  version: 2.1.0+cu118
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: true
-  eval_mode: true
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 2
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: false
-  can_generate: true
-  forward_kwargs: {}
-  generate_kwargs:
-    max_new_tokens: 512
-    min_new_tokens: 512
-    do_sample: false
-    use_cache: true
-    pad_token_id: 0
-    num_beams: 1
-experiment_name: fp16-batch_size(2)-sequence_length(256)-new_tokens(512)
-model: meta-llama/Llama-2-7b-hf
-device: cuda
-task: text-generation
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(256)-new_tokens(512)/1/inference_results.csv b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(256)-new_tokens(512)/1/inference_results.csv
deleted file mode 100644
index 42a8985a..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(256)-new_tokens(512)/1/inference_results.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-forward.latency(s),forward.throughput(samples/s),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB)
-0.0579,34.5,15392,13924,14006,13.6,75.3,17003,15162,15617
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(256)-new_tokens(512)/multirun.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(256)-new_tokens(512)/multirun.yaml
deleted file mode 100644
index 6de5aaf7..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(2)-sequence_length(256)-new_tokens(512)/multirun.yaml
+++ /dev/null
@@ -1,246 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${experiment_name}
-  sweep:
-    dir: experiments/${experiment_name}
-    subdir: ${hydra.job.num}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task: []
-  job:
-    name: experiment
-    chdir: true
-    override_dirname: ''
-    id: ???
-    num: ???
-    config_name: _base_
-    env_set:
-      CUDA_VISIBLE_DEVICES: '3'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: ???
-    choices:
-      benchmark: inference
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: ${is_inference:${benchmark.name}}
-  eval_mode: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 2
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens})
-model: meta-llama/Llama-2-7b-hf
-device: cuda:0
-task: ${infer_task:${model}}
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(256)-new_tokens(512)/2/.hydra/config.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(256)-new_tokens(512)/2/.hydra/config.yaml
deleted file mode 100644
index 589d9f13..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(256)-new_tokens(512)/2/.hydra/config.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: ${is_inference:${benchmark.name}}
-  eval_mode: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 4
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens})
-model: meta-llama/Llama-2-7b-hf
-device: cuda:0
-task: ${infer_task:${model}}
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(256)-new_tokens(512)/2/.hydra/hydra.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(256)-new_tokens(512)/2/.hydra/hydra.yaml
deleted file mode 100644
index 8e3abfc7..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(256)-new_tokens(512)/2/.hydra/hydra.yaml
+++ /dev/null
@@ -1,174 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${experiment_name}
-  sweep:
-    dir: experiments/${experiment_name}
-    subdir: ${hydra.job.num}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task:
-    - benchmark.input_shapes.batch_size=4
-  job:
-    name: experiment
-    chdir: true
-    override_dirname: benchmark.input_shapes.batch_size=4
-    id: '2'
-    num: 2
-    config_name: _base_
-    env_set:
-      CUDA_VISIBLE_DEVICES: '3'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/fp16-batch_size(4)-sequence_length(256)-new_tokens(512)/2
-    choices:
-      benchmark: inference
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(256)-new_tokens(512)/2/.hydra/overrides.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(256)-new_tokens(512)/2/.hydra/overrides.yaml
deleted file mode 100644
index eef8c9ca..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(256)-new_tokens(512)/2/.hydra/overrides.yaml
+++ /dev/null
@@ -1 +0,0 @@
-- benchmark.input_shapes.batch_size=4
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(256)-new_tokens(512)/2/hydra_config.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(256)-new_tokens(512)/2/hydra_config.yaml
deleted file mode 100644
index eb07905d..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(256)-new_tokens(512)/2/hydra_config.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-backend:
-  name: pytorch
-  version: 2.1.0+cu118
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: true
-  eval_mode: true
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 4
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: false
-  can_generate: true
-  forward_kwargs: {}
-  generate_kwargs:
-    max_new_tokens: 512
-    min_new_tokens: 512
-    do_sample: false
-    use_cache: true
-    pad_token_id: 0
-    num_beams: 1
-experiment_name: fp16-batch_size(4)-sequence_length(256)-new_tokens(512)
-model: meta-llama/Llama-2-7b-hf
-device: cuda
-task: text-generation
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(256)-new_tokens(512)/2/inference_results.csv b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(256)-new_tokens(512)/2/inference_results.csv
deleted file mode 100644
index 3a40d093..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(4)-sequence_length(256)-new_tokens(512)/2/inference_results.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-forward.latency(s),forward.throughput(samples/s),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB)
-0.107,37.4,16022,14295,14636,13.9,147.0,26346,16774,24960
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(256)-new_tokens(512)/3/.hydra/config.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(256)-new_tokens(512)/3/.hydra/config.yaml
deleted file mode 100644
index ff7402ca..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(256)-new_tokens(512)/3/.hydra/config.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: ${is_inference:${benchmark.name}}
-  eval_mode: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 8
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: fp16-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens})
-model: meta-llama/Llama-2-7b-hf
-device: cuda:0
-task: ${infer_task:${model}}
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(256)-new_tokens(512)/3/.hydra/hydra.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(256)-new_tokens(512)/3/.hydra/hydra.yaml
deleted file mode 100644
index ed654ac1..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(256)-new_tokens(512)/3/.hydra/hydra.yaml
+++ /dev/null
@@ -1,174 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${experiment_name}
-  sweep:
-    dir: experiments/${experiment_name}
-    subdir: ${hydra.job.num}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task:
-    - benchmark.input_shapes.batch_size=8
-  job:
-    name: experiment
-    chdir: true
-    override_dirname: benchmark.input_shapes.batch_size=8
-    id: '3'
-    num: 3
-    config_name: _base_
-    env_set:
-      CUDA_VISIBLE_DEVICES: '3'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/fp16-batch_size(8)-sequence_length(256)-new_tokens(512)/3
-    choices:
-      benchmark: inference
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(256)-new_tokens(512)/3/.hydra/overrides.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(256)-new_tokens(512)/3/.hydra/overrides.yaml
deleted file mode 100644
index 8cd14374..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(256)-new_tokens(512)/3/.hydra/overrides.yaml
+++ /dev/null
@@ -1 +0,0 @@
-- benchmark.input_shapes.batch_size=8
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(256)-new_tokens(512)/3/hydra_config.yaml b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(256)-new_tokens(512)/3/hydra_config.yaml
deleted file mode 100644
index a180796d..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(256)-new_tokens(512)/3/hydra_config.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-backend:
-  name: pytorch
-  version: 2.1.0+cu118
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: true
-  eval_mode: true
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 8
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: false
-  can_generate: true
-  forward_kwargs: {}
-  generate_kwargs:
-    max_new_tokens: 512
-    min_new_tokens: 512
-    do_sample: false
-    use_cache: true
-    pad_token_id: 0
-    num_beams: 1
-experiment_name: fp16-batch_size(8)-sequence_length(256)-new_tokens(512)
-model: meta-llama/Llama-2-7b-hf
-device: cuda
-task: text-generation
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(256)-new_tokens(512)/3/inference_results.csv b/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(256)-new_tokens(512)/3/inference_results.csv
deleted file mode 100644
index 7fdc7a5f..00000000
--- a/examples/running-llamas/experiments/A100-80GB/fp16-batch_size(8)-sequence_length(256)-new_tokens(512)/3/inference_results.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-forward.latency(s),forward.throughput(samples/s),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB)
-0.204,39.2,17087,15037,15701,14.1,290.0,64889,19997,63503
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(256)-new_tokens(512)/0/.hydra/config.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(256)-new_tokens(512)/0/.hydra/config.yaml
deleted file mode 100644
index 03090426..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(256)-new_tokens(512)/0/.hydra/config.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: ${is_inference:${benchmark.name}}
-  eval_mode: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 1
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: gptq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens})
-model: TheBloke/Llama-2-7B-GPTQ
-device: cuda:0
-task: ${infer_task:${model}}
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(256)-new_tokens(512)/0/.hydra/hydra.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(256)-new_tokens(512)/0/.hydra/hydra.yaml
deleted file mode 100644
index 65926c1f..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(256)-new_tokens(512)/0/.hydra/hydra.yaml
+++ /dev/null
@@ -1,174 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${experiment_name}
-  sweep:
-    dir: experiments/${experiment_name}
-    subdir: ${hydra.job.num}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task:
-    - benchmark.input_shapes.batch_size=1
-  job:
-    name: experiment
-    chdir: true
-    override_dirname: benchmark.input_shapes.batch_size=1
-    id: '0'
-    num: 0
-    config_name: gptq
-    env_set:
-      CUDA_VISIBLE_DEVICES: '3'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/gptq-batch_size(1)-sequence_length(256)-new_tokens(512)/0
-    choices:
-      benchmark: inference
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(256)-new_tokens(512)/0/.hydra/overrides.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(256)-new_tokens(512)/0/.hydra/overrides.yaml
deleted file mode 100644
index 989520ff..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(256)-new_tokens(512)/0/.hydra/overrides.yaml
+++ /dev/null
@@ -1 +0,0 @@
-- benchmark.input_shapes.batch_size=1
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(256)-new_tokens(512)/0/hydra_config.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(256)-new_tokens(512)/0/hydra_config.yaml
deleted file mode 100644
index 256e0e54..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(256)-new_tokens(512)/0/hydra_config.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-backend:
-  name: pytorch
-  version: 2.1.0+cu118
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: true
-  eval_mode: true
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 1
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: false
-  can_generate: true
-  forward_kwargs: {}
-  generate_kwargs:
-    max_new_tokens: 512
-    min_new_tokens: 512
-    do_sample: false
-    use_cache: true
-    pad_token_id: 0
-    num_beams: 1
-experiment_name: gptq-batch_size(1)-sequence_length(256)-new_tokens(512)
-model: TheBloke/Llama-2-7B-GPTQ
-device: cuda
-task: text-generation
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(256)-new_tokens(512)/0/inference_results.csv b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(256)-new_tokens(512)/0/inference_results.csv
deleted file mode 100644
index da04a0c3..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(1)-sequence_length(256)-new_tokens(512)/0/inference_results.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-forward.latency(s),forward.throughput(samples/s),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB)
-0.0458,21.8,6746,4298,4450,14.8,34.6,7606,4916,5309
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(256)-new_tokens(512)/4/.hydra/config.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(256)-new_tokens(512)/4/.hydra/config.yaml
deleted file mode 100644
index 58457ece..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(256)-new_tokens(512)/4/.hydra/config.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: ${is_inference:${benchmark.name}}
-  eval_mode: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 16
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: gptq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens})
-model: TheBloke/Llama-2-7B-GPTQ
-device: cuda:0
-task: ${infer_task:${model}}
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(256)-new_tokens(512)/4/.hydra/hydra.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(256)-new_tokens(512)/4/.hydra/hydra.yaml
deleted file mode 100644
index c9e56066..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(256)-new_tokens(512)/4/.hydra/hydra.yaml
+++ /dev/null
@@ -1,174 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${experiment_name}
-  sweep:
-    dir: experiments/${experiment_name}
-    subdir: ${hydra.job.num}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task:
-    - benchmark.input_shapes.batch_size=16
-  job:
-    name: experiment
-    chdir: true
-    override_dirname: benchmark.input_shapes.batch_size=16
-    id: '4'
-    num: 4
-    config_name: gptq
-    env_set:
-      CUDA_VISIBLE_DEVICES: '3'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/gptq-batch_size(16)-sequence_length(256)-new_tokens(512)/4
-    choices:
-      benchmark: inference
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(256)-new_tokens(512)/4/.hydra/overrides.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(256)-new_tokens(512)/4/.hydra/overrides.yaml
deleted file mode 100644
index fdb7f01d..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(256)-new_tokens(512)/4/.hydra/overrides.yaml
+++ /dev/null
@@ -1 +0,0 @@
-- benchmark.input_shapes.batch_size=16
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(256)-new_tokens(512)/4/hydra_config.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(256)-new_tokens(512)/4/hydra_config.yaml
deleted file mode 100644
index ad91270b..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(256)-new_tokens(512)/4/hydra_config.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-backend:
-  name: pytorch
-  version: 2.1.0+cu118
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: true
-  eval_mode: true
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 16
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: false
-  can_generate: true
-  forward_kwargs: {}
-  generate_kwargs:
-    max_new_tokens: 512
-    min_new_tokens: 512
-    do_sample: false
-    use_cache: true
-    pad_token_id: 0
-    num_beams: 1
-experiment_name: gptq-batch_size(16)-sequence_length(256)-new_tokens(512)
-model: TheBloke/Llama-2-7B-GPTQ
-device: cuda:0
-task: text-generation
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(256)-new_tokens(512)/4/inference_results.csv b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(256)-new_tokens(512)/4/inference_results.csv
deleted file mode 100644
index 0b114e75..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(16)-sequence_length(256)-new_tokens(512)/4/inference_results.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-forward.latency(s),forward.throughput(samples/s),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB)
-0.415,38.6,10900,7080,8604,24.6,333.0,65676,17002,83596
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(256)-new_tokens(512)/1/.hydra/config.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(256)-new_tokens(512)/1/.hydra/config.yaml
deleted file mode 100644
index 90ddbb6e..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(256)-new_tokens(512)/1/.hydra/config.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: ${is_inference:${benchmark.name}}
-  eval_mode: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 2
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: gptq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens})
-model: TheBloke/Llama-2-7B-GPTQ
-device: cuda:0
-task: ${infer_task:${model}}
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(256)-new_tokens(512)/1/.hydra/hydra.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(256)-new_tokens(512)/1/.hydra/hydra.yaml
deleted file mode 100644
index 77d220f4..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(256)-new_tokens(512)/1/.hydra/hydra.yaml
+++ /dev/null
@@ -1,174 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${experiment_name}
-  sweep:
-    dir: experiments/${experiment_name}
-    subdir: ${hydra.job.num}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task:
-    - benchmark.input_shapes.batch_size=2
-  job:
-    name: experiment
-    chdir: true
-    override_dirname: benchmark.input_shapes.batch_size=2
-    id: '1'
-    num: 1
-    config_name: gptq
-    env_set:
-      CUDA_VISIBLE_DEVICES: '3'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/gptq-batch_size(2)-sequence_length(256)-new_tokens(512)/1
-    choices:
-      benchmark: inference
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(256)-new_tokens(512)/1/.hydra/overrides.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(256)-new_tokens(512)/1/.hydra/overrides.yaml
deleted file mode 100644
index 8211b85f..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(256)-new_tokens(512)/1/.hydra/overrides.yaml
+++ /dev/null
@@ -1 +0,0 @@
-- benchmark.input_shapes.batch_size=2
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(256)-new_tokens(512)/1/hydra_config.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(256)-new_tokens(512)/1/hydra_config.yaml
deleted file mode 100644
index d946b75c..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(256)-new_tokens(512)/1/hydra_config.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-backend:
-  name: pytorch
-  version: 2.1.0+cu118
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: true
-  eval_mode: true
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 2
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: false
-  can_generate: true
-  forward_kwargs: {}
-  generate_kwargs:
-    max_new_tokens: 512
-    min_new_tokens: 512
-    do_sample: false
-    use_cache: true
-    pad_token_id: 0
-    num_beams: 1
-experiment_name: gptq-batch_size(2)-sequence_length(256)-new_tokens(512)
-model: TheBloke/Llama-2-7B-GPTQ
-device: cuda
-task: text-generation
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(256)-new_tokens(512)/1/inference_results.csv b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(256)-new_tokens(512)/1/inference_results.csv
deleted file mode 100644
index 1797a299..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(256)-new_tokens(512)/1/inference_results.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-forward.latency(s),forward.throughput(samples/s),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB)
-0.0706,28.3,6872,4484,4575,15.4,66.5,8822,5722,6526
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(256)-new_tokens(512)/multirun.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(256)-new_tokens(512)/multirun.yaml
deleted file mode 100644
index 667d738e..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(2)-sequence_length(256)-new_tokens(512)/multirun.yaml
+++ /dev/null
@@ -1,246 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${experiment_name}
-  sweep:
-    dir: experiments/${experiment_name}
-    subdir: ${hydra.job.num}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task: []
-  job:
-    name: experiment
-    chdir: true
-    override_dirname: ''
-    id: ???
-    num: ???
-    config_name: gptq
-    env_set:
-      CUDA_VISIBLE_DEVICES: '3'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: ???
-    choices:
-      benchmark: inference
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: ${is_inference:${benchmark.name}}
-  eval_mode: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 2
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: gptq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens})
-model: TheBloke/Llama-2-7B-GPTQ
-device: cuda:0
-task: ${infer_task:${model}}
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(256)-new_tokens(512)/2/.hydra/config.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(256)-new_tokens(512)/2/.hydra/config.yaml
deleted file mode 100644
index 97f81007..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(256)-new_tokens(512)/2/.hydra/config.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: ${is_inference:${benchmark.name}}
-  eval_mode: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 4
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: gptq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens})
-model: TheBloke/Llama-2-7B-GPTQ
-device: cuda:0
-task: ${infer_task:${model}}
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(256)-new_tokens(512)/2/.hydra/hydra.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(256)-new_tokens(512)/2/.hydra/hydra.yaml
deleted file mode 100644
index d5efb914..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(256)-new_tokens(512)/2/.hydra/hydra.yaml
+++ /dev/null
@@ -1,174 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${experiment_name}
-  sweep:
-    dir: experiments/${experiment_name}
-    subdir: ${hydra.job.num}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task:
-    - benchmark.input_shapes.batch_size=4
-  job:
-    name: experiment
-    chdir: true
-    override_dirname: benchmark.input_shapes.batch_size=4
-    id: '2'
-    num: 2
-    config_name: gptq
-    env_set:
-      CUDA_VISIBLE_DEVICES: '3'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/gptq-batch_size(4)-sequence_length(256)-new_tokens(512)/2
-    choices:
-      benchmark: inference
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(256)-new_tokens(512)/2/.hydra/overrides.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(256)-new_tokens(512)/2/.hydra/overrides.yaml
deleted file mode 100644
index eef8c9ca..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(256)-new_tokens(512)/2/.hydra/overrides.yaml
+++ /dev/null
@@ -1 +0,0 @@
-- benchmark.input_shapes.batch_size=4
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(256)-new_tokens(512)/2/hydra_config.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(256)-new_tokens(512)/2/hydra_config.yaml
deleted file mode 100644
index eb2c9cbd..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(256)-new_tokens(512)/2/hydra_config.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-backend:
-  name: pytorch
-  version: 2.1.0+cu118
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: true
-  eval_mode: true
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 4
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: false
-  can_generate: true
-  forward_kwargs: {}
-  generate_kwargs:
-    max_new_tokens: 512
-    min_new_tokens: 512
-    do_sample: false
-    use_cache: true
-    pad_token_id: 0
-    num_beams: 1
-experiment_name: gptq-batch_size(4)-sequence_length(256)-new_tokens(512)
-model: TheBloke/Llama-2-7B-GPTQ
-device: cuda
-task: text-generation
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(256)-new_tokens(512)/2/inference_results.csv b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(256)-new_tokens(512)/2/inference_results.csv
deleted file mode 100644
index e54a036c..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(4)-sequence_length(256)-new_tokens(512)/2/inference_results.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-forward.latency(s),forward.throughput(samples/s),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB)
-0.122,32.8,7761,4855,5465,15.3,134.0,18085,7335,15789
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(256)-new_tokens(512)/3/.hydra/config.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(256)-new_tokens(512)/3/.hydra/config.yaml
deleted file mode 100644
index 3a4085f7..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(256)-new_tokens(512)/3/.hydra/config.yaml
+++ /dev/null
@@ -1,73 +0,0 @@
-backend:
-  name: pytorch
-  version: ${pytorch_version:}
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: ${is_inference:${benchmark.name}}
-  eval_mode: ${is_inference:${benchmark.name}}
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 8
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: ${can_diffuse:${task}}
-  can_generate: ${can_generate:${task}}
-  forward_kwargs: {}
-  generate_kwargs: {}
-experiment_name: gptq-batch_size(${benchmark.input_shapes.batch_size})-sequence_length(${benchmark.input_shapes.sequence_length})-new_tokens(${benchmark.new_tokens})
-model: TheBloke/Llama-2-7B-GPTQ
-device: cuda:0
-task: ${infer_task:${model}}
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(256)-new_tokens(512)/3/.hydra/hydra.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(256)-new_tokens(512)/3/.hydra/hydra.yaml
deleted file mode 100644
index 5f8fa193..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(256)-new_tokens(512)/3/.hydra/hydra.yaml
+++ /dev/null
@@ -1,174 +0,0 @@
-hydra:
-  run:
-    dir: experiments/${experiment_name}
-  sweep:
-    dir: experiments/${experiment_name}
-    subdir: ${hydra.job.num}
-  launcher:
-    _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher
-  sweeper:
-    _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper
-    max_batch_size: null
-    params:
-      benchmark.input_shapes.batch_size: 1,2,4,8,16
-  help:
-    app_name: ${hydra.job.name}
-    header: '${hydra.help.app_name} is powered by Hydra.
-
-      '
-    footer: 'Powered by Hydra (https://hydra.cc)
-
-      Use --hydra-help to view Hydra specific help
-
-      '
-    template: '${hydra.help.header}
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (group=option)
-
-
-      $APP_CONFIG_GROUPS
-
-
-      == Config ==
-
-      Override anything in the config (foo.bar=value)
-
-
-      $CONFIG
-
-
-      ${hydra.help.footer}
-
-      '
-  hydra_help:
-    template: 'Hydra (${hydra.runtime.version})
-
-      See https://hydra.cc for more info.
-
-
-      == Flags ==
-
-      $FLAGS_HELP
-
-
-      == Configuration groups ==
-
-      Compose your configuration from those groups (For example, append hydra/job_logging=disabled
-      to command line)
-
-
-      $HYDRA_CONFIG_GROUPS
-
-
-      Use ''--cfg hydra'' to Show the Hydra config.
-
-      '
-    hydra_help: ???
-  hydra_logging:
-    version: 1
-    formatters:
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s'
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-    root:
-      level: INFO
-      handlers:
-      - console
-    disable_existing_loggers: false
-  job_logging:
-    version: 1
-    formatters:
-      simple:
-        format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s'
-      colorlog:
-        (): colorlog.ColoredFormatter
-        format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s]
-          - %(message)s'
-        log_colors:
-          DEBUG: purple
-          INFO: green
-          WARNING: yellow
-          ERROR: red
-          CRITICAL: red
-    handlers:
-      console:
-        class: logging.StreamHandler
-        formatter: colorlog
-        stream: ext://sys.stdout
-      file:
-        class: logging.FileHandler
-        formatter: simple
-        filename: ${hydra.job.name}.log
-    root:
-      level: INFO
-      handlers:
-      - console
-      - file
-    disable_existing_loggers: false
-  env: {}
-  mode: MULTIRUN
-  searchpath: []
-  callbacks: {}
-  output_subdir: .hydra
-  overrides:
-    hydra:
-    - hydra.mode=MULTIRUN
-    task:
-    - benchmark.input_shapes.batch_size=8
-  job:
-    name: experiment
-    chdir: true
-    override_dirname: benchmark.input_shapes.batch_size=8
-    id: '3'
-    num: 3
-    config_name: gptq
-    env_set:
-      CUDA_VISIBLE_DEVICES: '3'
-      CUDA_DEVICE_ORDER: PCI_BUS_ID
-    env_copy: []
-    config:
-      override_dirname:
-        kv_sep: '='
-        item_sep: ','
-        exclude_keys: []
-  runtime:
-    version: 1.3.2
-    version_base: '1.3'
-    cwd: /workspace/optimum-benchmark/examples/running-llamas
-    config_sources:
-    - path: hydra.conf
-      schema: pkg
-      provider: hydra
-    - path: optimum_benchmark
-      schema: pkg
-      provider: main
-    - path: hydra_plugins.hydra_colorlog.conf
-      schema: pkg
-      provider: hydra-colorlog
-    - path: /workspace/optimum-benchmark/examples/running-llamas/configs
-      schema: file
-      provider: command-line
-    - path: ''
-      schema: structured
-      provider: schema
-    output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/gptq-batch_size(8)-sequence_length(256)-new_tokens(512)/3
-    choices:
-      benchmark: inference
-      backend: pytorch
-      hydra/env: default
-      hydra/callbacks: null
-      hydra/job_logging: colorlog
-      hydra/hydra_logging: colorlog
-      hydra/hydra_help: default
-      hydra/help: default
-      hydra/sweeper: basic
-      hydra/launcher: basic
-      hydra/output: default
-  verbose: false
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(256)-new_tokens(512)/3/.hydra/overrides.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(256)-new_tokens(512)/3/.hydra/overrides.yaml
deleted file mode 100644
index 8cd14374..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(256)-new_tokens(512)/3/.hydra/overrides.yaml
+++ /dev/null
@@ -1 +0,0 @@
-- benchmark.input_shapes.batch_size=8
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(256)-new_tokens(512)/3/hydra_config.yaml b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(256)-new_tokens(512)/3/hydra_config.yaml
deleted file mode 100644
index 06e20479..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(256)-new_tokens(512)/3/hydra_config.yaml
+++ /dev/null
@@ -1,79 +0,0 @@
-backend:
-  name: pytorch
-  version: 2.1.0+cu118
-  _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend
-  seed: 42
-  inter_op_num_threads: null
-  intra_op_num_threads: null
-  initial_isolation_check: false
-  continous_isolation_check: false
-  delete_cache: false
-  no_weights: false
-  device_map: null
-  torch_dtype: float16
-  disable_grad: true
-  eval_mode: true
-  amp_autocast: false
-  amp_dtype: null
-  torch_compile: false
-  torch_compile_config: {}
-  bettertransformer: false
-  quantization_scheme: null
-  quantization_config: {}
-  use_ddp: false
-  ddp_config: {}
-  peft_strategy: null
-  peft_config: {}
-benchmark:
-  name: inference
-  _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark
-  duration: 10
-  warmup_runs: 10
-  memory: true
-  energy: false
-  input_shapes:
-    batch_size: 8
-    sequence_length: 256
-    num_choices: 1
-    feature_size: 80
-    nb_max_frames: 3000
-    audio_sequence_length: 16000
-  new_tokens: 512
-  can_diffuse: false
-  can_generate: true
-  forward_kwargs: {}
-  generate_kwargs:
-    max_new_tokens: 512
-    min_new_tokens: 512
-    do_sample: false
-    use_cache: true
-    pad_token_id: 0
-    num_beams: 1
-experiment_name: gptq-batch_size(8)-sequence_length(256)-new_tokens(512)
-model: TheBloke/Llama-2-7B-GPTQ
-device: cuda
-task: text-generation
-hub_kwargs:
-  revision: main
-  cache_dir: null
-  force_download: false
-  local_files_only: false
-environment:
-  optimum_version: 1.13.2
-  optimum_commit: null
-  transformers_version: 4.34.1
-  transformers_commit: null
-  accelerate_version: 0.24.1
-  accelerate_commit: null
-  diffusers_version: null
-  diffusers_commit: null
-  python_version: 3.10.12
-  system: Linux
-  cpu: ' AMD EPYC 7742 64-Core Processor'
-  cpu_count: 128
-  cpu_ram_mb: 540684
-  gpus:
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
-  - NVIDIA A100-SXM4-80GB
diff --git a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(256)-new_tokens(512)/3/inference_results.csv b/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(256)-new_tokens(512)/3/inference_results.csv
deleted file mode 100644
index 5c52e357..00000000
--- a/examples/running-llamas/experiments/A100-80GB/gptq-batch_size(8)-sequence_length(256)-new_tokens(512)/3/inference_results.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-forward.latency(s),forward.throughput(samples/s),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB)
-0.223,35.9,8826,5597,6530,19.9,206.0,56629,10557,54333