diff --git a/examples/running-llamas/configs/fp16+bt.yaml b/examples/running-llamas/configs/fp16+bt.yaml new file mode 100644 index 00000000..d7db1000 --- /dev/null +++ b/examples/running-llamas/configs/fp16+bt.yaml @@ -0,0 +1,8 @@ +defaults: + - _base_ + - _self_ + +experiment_name: fp16+bt + +backend: + to_bettertransformer: true diff --git a/examples/running-llamas/configs/fp16+fa2.yaml b/examples/running-llamas/configs/fp16+fa2.yaml new file mode 100644 index 00000000..93959afb --- /dev/null +++ b/examples/running-llamas/configs/fp16+fa2.yaml @@ -0,0 +1,8 @@ +defaults: + - _base_ + - _self_ + +experiment_name: fp16+fa2 + +backend: + use_flash_attention_2: true diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/.hydra/config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/.hydra/config.yaml deleted file mode 100644 index a249298d..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/.hydra/config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - continuous_isolation: true - isolation_check_interval: null - delete_cache: false - no_weights: true - device_map: null - torch_dtype: float16 - eval_mode: ${is_inference:${benchmark.name}} - disable_grad: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - to_bettertransformer: false - use_flash_attention_2: false - quantization_scheme: null - quantization_config: {} - data_parallel: false - deepspeed_inference: false - deepspeed_inference_config: {} - peft_strategy: null - peft_config: {} -launcher: - name: process - _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher - start_method: spawn -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 512 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: fp16 -model: NousResearch/Llama-2-7b-hf -task: ${infer_task:${model}} -device: cuda -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.14.1 - optimum_commit: null - transformers_version: 4.35.2 - transformers_commit: null - accelerate_version: 0.24.1 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540671 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/.hydra/hydra.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/.hydra/hydra.yaml deleted file mode 100644 index c863e7c3..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/.hydra/hydra.yaml +++ /dev/null @@ -1,177 +0,0 @@ -hydra: - run: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - sweep: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - subdir: ${benchmark.input_shapes.batch_size} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128 - model: NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=1 - - model=NousResearch/Llama-2-7b-hf - job: - name: cli - chdir: true - override_dirname: benchmark.input_shapes.batch_size=1,model=NousResearch/Llama-2-7b-hf - id: '0' - num: 0 - config_name: fp16 - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-llamas - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-llamas/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1 - choices: - benchmark: inference - launcher: process - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/.hydra/overrides.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/.hydra/overrides.yaml deleted file mode 100644 index 6af3cf2d..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=1 -- model=NousResearch/Llama-2-7b-hf diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/hydra_config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/hydra_config.yaml deleted file mode 100644 index 1e7e68a8..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/hydra_config.yaml +++ /dev/null @@ -1,83 +0,0 @@ -launcher: - name: process - _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher - start_method: spawn -backend: - name: pytorch - version: 2.1.1+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - continuous_isolation: true - isolation_check_interval: 1.0 - delete_cache: false - no_weights: true - device_map: null - torch_dtype: float16 - eval_mode: true - disable_grad: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - to_bettertransformer: false - use_flash_attention_2: false - quantization_scheme: null - quantization_config: {} - data_parallel: false - deepspeed_inference: false - deepspeed_inference_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 1 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 512 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - num_return_sequences: 1 - max_new_tokens: 512 - min_new_tokens: 512 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: fp16 -model: NousResearch/Llama-2-7b-hf -task: text-generation -device: cuda -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.14.1 - optimum_commit: null - transformers_version: 4.35.2 - transformers_commit: null - accelerate_version: 0.24.1 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540671 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/inference_results.csv b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/inference_results.csv deleted file mode 100644 index 53e050a6..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/1/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),decode.latency(s),decode.throughput(tokens/s),generate.peak_memory(MB),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB) -0.0335,29.9,15239,15239,13738,13767,13.5,37.9,13.5,37.9,15954,15954,14356,14480 diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/.hydra/config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/.hydra/config.yaml deleted file mode 100644 index aff4cbc5..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/.hydra/config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - continuous_isolation: true - isolation_check_interval: null - delete_cache: false - no_weights: true - device_map: null - torch_dtype: float16 - eval_mode: ${is_inference:${benchmark.name}} - disable_grad: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - to_bettertransformer: false - use_flash_attention_2: false - quantization_scheme: null - quantization_config: {} - data_parallel: false - deepspeed_inference: false - deepspeed_inference_config: {} - peft_strategy: null - peft_config: {} -launcher: - name: process - _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher - start_method: spawn -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 128 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 512 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: fp16 -model: NousResearch/Llama-2-7b-hf -task: ${infer_task:${model}} -device: cuda -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.14.1 - optimum_commit: null - transformers_version: 4.35.2 - transformers_commit: null - accelerate_version: 0.24.1 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540671 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/.hydra/hydra.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/.hydra/hydra.yaml deleted file mode 100644 index a8752cc5..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/.hydra/hydra.yaml +++ /dev/null @@ -1,177 +0,0 @@ -hydra: - run: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - sweep: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - subdir: ${benchmark.input_shapes.batch_size} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128 - model: NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=128 - - model=NousResearch/Llama-2-7b-hf - job: - name: cli - chdir: true - override_dirname: benchmark.input_shapes.batch_size=128,model=NousResearch/Llama-2-7b-hf - id: '14' - num: 14 - config_name: fp16 - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-llamas - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-llamas/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128 - choices: - benchmark: inference - launcher: process - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/.hydra/overrides.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/.hydra/overrides.yaml deleted file mode 100644 index 74593dc2..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=128 -- model=NousResearch/Llama-2-7b-hf diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/hydra_config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/hydra_config.yaml deleted file mode 100644 index 02ec5c58..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/128/hydra_config.yaml +++ /dev/null @@ -1,83 +0,0 @@ -launcher: - name: process - _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher - start_method: spawn -backend: - name: pytorch - version: 2.1.1+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - continuous_isolation: true - isolation_check_interval: 1.0 - delete_cache: false - no_weights: true - device_map: null - torch_dtype: float16 - eval_mode: true - disable_grad: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - to_bettertransformer: false - use_flash_attention_2: false - quantization_scheme: null - quantization_config: {} - data_parallel: false - deepspeed_inference: false - deepspeed_inference_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 128 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 512 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - num_return_sequences: 1 - max_new_tokens: 512 - min_new_tokens: 512 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: fp16 -model: NousResearch/Llama-2-7b-hf -task: text-generation -device: cuda -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.14.1 - optimum_commit: null - transformers_version: 4.35.2 - transformers_commit: null - accelerate_version: 0.24.1 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540671 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/.hydra/config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/.hydra/config.yaml deleted file mode 100644 index 0f3121ee..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/.hydra/config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - continuous_isolation: true - isolation_check_interval: null - delete_cache: false - no_weights: true - device_map: null - torch_dtype: float16 - eval_mode: ${is_inference:${benchmark.name}} - disable_grad: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - to_bettertransformer: false - use_flash_attention_2: false - quantization_scheme: null - quantization_config: {} - data_parallel: false - deepspeed_inference: false - deepspeed_inference_config: {} - peft_strategy: null - peft_config: {} -launcher: - name: process - _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher - start_method: spawn -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 512 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: fp16 -model: NousResearch/Llama-2-7b-hf -task: ${infer_task:${model}} -device: cuda -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.14.1 - optimum_commit: null - transformers_version: 4.35.2 - transformers_commit: null - accelerate_version: 0.24.1 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540671 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/.hydra/hydra.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/.hydra/hydra.yaml deleted file mode 100644 index 20399860..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/.hydra/hydra.yaml +++ /dev/null @@ -1,177 +0,0 @@ -hydra: - run: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - sweep: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - subdir: ${benchmark.input_shapes.batch_size} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128 - model: NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=16 - - model=NousResearch/Llama-2-7b-hf - job: - name: cli - chdir: true - override_dirname: benchmark.input_shapes.batch_size=16,model=NousResearch/Llama-2-7b-hf - id: '8' - num: 8 - config_name: fp16 - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-llamas - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-llamas/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16 - choices: - benchmark: inference - launcher: process - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/.hydra/overrides.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/.hydra/overrides.yaml deleted file mode 100644 index 46c7d465..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=16 -- model=NousResearch/Llama-2-7b-hf diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/hydra_config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/hydra_config.yaml deleted file mode 100644 index ad990cc6..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/hydra_config.yaml +++ /dev/null @@ -1,83 +0,0 @@ -launcher: - name: process - _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher - start_method: spawn -backend: - name: pytorch - version: 2.1.1+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - continuous_isolation: true - isolation_check_interval: 1.0 - delete_cache: false - no_weights: true - device_map: null - torch_dtype: float16 - eval_mode: true - disable_grad: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - to_bettertransformer: false - use_flash_attention_2: false - quantization_scheme: null - quantization_config: {} - data_parallel: false - deepspeed_inference: false - deepspeed_inference_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 16 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 512 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - num_return_sequences: 1 - max_new_tokens: 512 - min_new_tokens: 512 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: fp16 -model: NousResearch/Llama-2-7b-hf -task: text-generation -device: cuda -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.14.1 - optimum_commit: null - transformers_version: 4.35.2 - transformers_commit: null - accelerate_version: 0.24.1 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540671 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/inference_results.csv b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/inference_results.csv deleted file mode 100644 index 40d786cb..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/16/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),decode.latency(s),decode.throughput(tokens/s),generate.peak_memory(MB),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB) -0.414,38.6,19251,19251,16520,17779,17.5,468.0,17.1,478.0,28076,28076,26442,84420 diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/.hydra/config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/.hydra/config.yaml deleted file mode 100644 index c3fed0b3..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/.hydra/config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - continuous_isolation: true - isolation_check_interval: null - delete_cache: false - no_weights: true - device_map: null - torch_dtype: float16 - eval_mode: ${is_inference:${benchmark.name}} - disable_grad: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - to_bettertransformer: false - use_flash_attention_2: false - quantization_scheme: null - quantization_config: {} - data_parallel: false - deepspeed_inference: false - deepspeed_inference_config: {} - peft_strategy: null - peft_config: {} -launcher: - name: process - _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher - start_method: spawn -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 512 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: fp16 -model: NousResearch/Llama-2-7b-hf -task: ${infer_task:${model}} -device: cuda -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.14.1 - optimum_commit: null - transformers_version: 4.35.2 - transformers_commit: null - accelerate_version: 0.24.1 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540671 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/.hydra/hydra.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/.hydra/hydra.yaml deleted file mode 100644 index 0fff6497..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/.hydra/hydra.yaml +++ /dev/null @@ -1,177 +0,0 @@ -hydra: - run: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - sweep: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - subdir: ${benchmark.input_shapes.batch_size} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128 - model: NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=2 - - model=NousResearch/Llama-2-7b-hf - job: - name: cli - chdir: true - override_dirname: benchmark.input_shapes.batch_size=2,model=NousResearch/Llama-2-7b-hf - id: '2' - num: 2 - config_name: fp16 - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-llamas - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-llamas/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2 - choices: - benchmark: inference - launcher: process - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/.hydra/overrides.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/.hydra/overrides.yaml deleted file mode 100644 index cfae74df..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=2 -- model=NousResearch/Llama-2-7b-hf diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/hydra_config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/hydra_config.yaml deleted file mode 100644 index 2d2cdd5e..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/hydra_config.yaml +++ /dev/null @@ -1,83 +0,0 @@ -launcher: - name: process - _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher - start_method: spawn -backend: - name: pytorch - version: 2.1.1+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - continuous_isolation: true - isolation_check_interval: 1.0 - delete_cache: false - no_weights: true - device_map: null - torch_dtype: float16 - eval_mode: true - disable_grad: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - to_bettertransformer: false - use_flash_attention_2: false - quantization_scheme: null - quantization_config: {} - data_parallel: false - deepspeed_inference: false - deepspeed_inference_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 2 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 512 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - num_return_sequences: 1 - max_new_tokens: 512 - min_new_tokens: 512 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: fp16 -model: NousResearch/Llama-2-7b-hf -task: text-generation -device: cuda -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.14.1 - optimum_commit: null - transformers_version: 4.35.2 - transformers_commit: null - accelerate_version: 0.24.1 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540671 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/inference_results.csv b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/inference_results.csv deleted file mode 100644 index 3fe725e1..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/2/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),decode.latency(s),decode.throughput(tokens/s),generate.peak_memory(MB),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB) -0.0589,34.0,15482,15482,13924,14011,13.6,75.3,13.5,75.7,17082,17082,15162,15609 diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/.hydra/config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/.hydra/config.yaml deleted file mode 100644 index 14e60142..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/.hydra/config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - continuous_isolation: true - isolation_check_interval: null - delete_cache: false - no_weights: true - device_map: null - torch_dtype: float16 - eval_mode: ${is_inference:${benchmark.name}} - disable_grad: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - to_bettertransformer: false - use_flash_attention_2: false - quantization_scheme: null - quantization_config: {} - data_parallel: false - deepspeed_inference: false - deepspeed_inference_config: {} - peft_strategy: null - peft_config: {} -launcher: - name: process - _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher - start_method: spawn -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 32 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 512 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: fp16 -model: NousResearch/Llama-2-7b-hf -task: ${infer_task:${model}} -device: cuda -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.14.1 - optimum_commit: null - transformers_version: 4.35.2 - transformers_commit: null - accelerate_version: 0.24.1 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540671 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/.hydra/hydra.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/.hydra/hydra.yaml deleted file mode 100644 index de1322ef..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/.hydra/hydra.yaml +++ /dev/null @@ -1,177 +0,0 @@ -hydra: - run: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - sweep: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - subdir: ${benchmark.input_shapes.batch_size} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128 - model: NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=32 - - model=NousResearch/Llama-2-7b-hf - job: - name: cli - chdir: true - override_dirname: benchmark.input_shapes.batch_size=32,model=NousResearch/Llama-2-7b-hf - id: '10' - num: 10 - config_name: fp16 - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-llamas - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-llamas/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32 - choices: - benchmark: inference - launcher: process - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/.hydra/overrides.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/.hydra/overrides.yaml deleted file mode 100644 index 425e1ca1..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=32 -- model=NousResearch/Llama-2-7b-hf diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/hydra_config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/hydra_config.yaml deleted file mode 100644 index 1f782598..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/hydra_config.yaml +++ /dev/null @@ -1,83 +0,0 @@ -launcher: - name: process - _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher - start_method: spawn -backend: - name: pytorch - version: 2.1.1+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - continuous_isolation: true - isolation_check_interval: 1.0 - delete_cache: false - no_weights: true - device_map: null - torch_dtype: float16 - eval_mode: true - disable_grad: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - to_bettertransformer: false - use_flash_attention_2: false - quantization_scheme: null - quantization_config: {} - data_parallel: false - deepspeed_inference: false - deepspeed_inference_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 32 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 512 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - num_return_sequences: 1 - max_new_tokens: 512 - min_new_tokens: 512 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: fp16 -model: NousResearch/Llama-2-7b-hf -task: text-generation -device: cuda -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.14.1 - optimum_commit: null - transformers_version: 4.35.2 - transformers_commit: null - accelerate_version: 0.24.1 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540671 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/inference_results.csv b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/inference_results.csv deleted file mode 100644 index e32d1f98..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/32/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),decode.latency(s),decode.throughput(tokens/s),generate.peak_memory(MB),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB) -0.817,39.2,23464,23464,19487,21992,26.9,609.0,26.1,627.0,53265,53265,39331,84422 diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/.hydra/config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/.hydra/config.yaml deleted file mode 100644 index f6ed928d..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/.hydra/config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - continuous_isolation: true - isolation_check_interval: null - delete_cache: false - no_weights: true - device_map: null - torch_dtype: float16 - eval_mode: ${is_inference:${benchmark.name}} - disable_grad: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - to_bettertransformer: false - use_flash_attention_2: false - quantization_scheme: null - quantization_config: {} - data_parallel: false - deepspeed_inference: false - deepspeed_inference_config: {} - peft_strategy: null - peft_config: {} -launcher: - name: process - _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher - start_method: spawn -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 512 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: fp16 -model: NousResearch/Llama-2-7b-hf -task: ${infer_task:${model}} -device: cuda -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.14.1 - optimum_commit: null - transformers_version: 4.35.2 - transformers_commit: null - accelerate_version: 0.24.1 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540671 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/.hydra/hydra.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/.hydra/hydra.yaml deleted file mode 100644 index 83b04824..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/.hydra/hydra.yaml +++ /dev/null @@ -1,177 +0,0 @@ -hydra: - run: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - sweep: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - subdir: ${benchmark.input_shapes.batch_size} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128 - model: NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=4 - - model=NousResearch/Llama-2-7b-hf - job: - name: cli - chdir: true - override_dirname: benchmark.input_shapes.batch_size=4,model=NousResearch/Llama-2-7b-hf - id: '4' - num: 4 - config_name: fp16 - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-llamas - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-llamas/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4 - choices: - benchmark: inference - launcher: process - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/.hydra/overrides.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/.hydra/overrides.yaml deleted file mode 100644 index 2c9eacd6..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=4 -- model=NousResearch/Llama-2-7b-hf diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/hydra_config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/hydra_config.yaml deleted file mode 100644 index dde859c4..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/hydra_config.yaml +++ /dev/null @@ -1,83 +0,0 @@ -launcher: - name: process - _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher - start_method: spawn -backend: - name: pytorch - version: 2.1.1+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - continuous_isolation: true - isolation_check_interval: 1.0 - delete_cache: false - no_weights: true - device_map: null - torch_dtype: float16 - eval_mode: true - disable_grad: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - to_bettertransformer: false - use_flash_attention_2: false - quantization_scheme: null - quantization_config: {} - data_parallel: false - deepspeed_inference: false - deepspeed_inference_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 4 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 512 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - num_return_sequences: 1 - max_new_tokens: 512 - min_new_tokens: 512 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: fp16 -model: NousResearch/Llama-2-7b-hf -task: text-generation -device: cuda -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.14.1 - optimum_commit: null - transformers_version: 4.35.2 - transformers_commit: null - accelerate_version: 0.24.1 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540671 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/inference_results.csv b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/inference_results.csv deleted file mode 100644 index ee533513..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/4/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),decode.latency(s),decode.throughput(tokens/s),generate.peak_memory(MB),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB) -0.109,36.7,16107,16107,14295,14636,13.7,149.0,13.6,150.0,26434,26434,16774,24960 diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/.hydra/config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/.hydra/config.yaml deleted file mode 100644 index 97f42016..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/.hydra/config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - continuous_isolation: true - isolation_check_interval: null - delete_cache: false - no_weights: true - device_map: null - torch_dtype: float16 - eval_mode: ${is_inference:${benchmark.name}} - disable_grad: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - to_bettertransformer: false - use_flash_attention_2: false - quantization_scheme: null - quantization_config: {} - data_parallel: false - deepspeed_inference: false - deepspeed_inference_config: {} - peft_strategy: null - peft_config: {} -launcher: - name: process - _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher - start_method: spawn -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 64 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 512 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: fp16 -model: NousResearch/Llama-2-7b-hf -task: ${infer_task:${model}} -device: cuda -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.14.1 - optimum_commit: null - transformers_version: 4.35.2 - transformers_commit: null - accelerate_version: 0.24.1 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540671 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/.hydra/hydra.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/.hydra/hydra.yaml deleted file mode 100644 index b391fd16..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/.hydra/hydra.yaml +++ /dev/null @@ -1,177 +0,0 @@ -hydra: - run: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - sweep: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - subdir: ${benchmark.input_shapes.batch_size} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128 - model: NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=64 - - model=NousResearch/Llama-2-7b-hf - job: - name: cli - chdir: true - override_dirname: benchmark.input_shapes.batch_size=64,model=NousResearch/Llama-2-7b-hf - id: '12' - num: 12 - config_name: fp16 - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-llamas - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-llamas/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64 - choices: - benchmark: inference - launcher: process - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/.hydra/overrides.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/.hydra/overrides.yaml deleted file mode 100644 index 55afae42..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=64 -- model=NousResearch/Llama-2-7b-hf diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/hydra_config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/hydra_config.yaml deleted file mode 100644 index 20bc3fdd..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/hydra_config.yaml +++ /dev/null @@ -1,83 +0,0 @@ -launcher: - name: process - _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher - start_method: spawn -backend: - name: pytorch - version: 2.1.1+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - continuous_isolation: true - isolation_check_interval: 1.0 - delete_cache: false - no_weights: true - device_map: null - torch_dtype: float16 - eval_mode: true - disable_grad: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - to_bettertransformer: false - use_flash_attention_2: false - quantization_scheme: null - quantization_config: {} - data_parallel: false - deepspeed_inference: false - deepspeed_inference_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 64 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 512 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - num_return_sequences: 1 - max_new_tokens: 512 - min_new_tokens: 512 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: fp16 -model: NousResearch/Llama-2-7b-hf -task: text-generation -device: cuda -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.14.1 - optimum_commit: null - transformers_version: 4.35.2 - transformers_commit: null - accelerate_version: 0.24.1 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540671 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/inference_results.csv b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/inference_results.csv deleted file mode 100644 index 0a2f82a7..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/64/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),decode.latency(s),decode.throughput(tokens/s),generate.peak_memory(MB),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB) -1.64,39.0,31911,31911,25422,30440,45.9,714.0,44.3,738.0,67584,67584,65112,84420 diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/.hydra/config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/.hydra/config.yaml deleted file mode 100644 index 2be7665d..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/.hydra/config.yaml +++ /dev/null @@ -1,76 +0,0 @@ -backend: - name: pytorch - version: ${pytorch_version:} - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - continuous_isolation: true - isolation_check_interval: null - delete_cache: false - no_weights: true - device_map: null - torch_dtype: float16 - eval_mode: ${is_inference:${benchmark.name}} - disable_grad: ${is_inference:${benchmark.name}} - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - to_bettertransformer: false - use_flash_attention_2: false - quantization_scheme: null - quantization_config: {} - data_parallel: false - deepspeed_inference: false - deepspeed_inference_config: {} - peft_strategy: null - peft_config: {} -launcher: - name: process - _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher - start_method: spawn -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 512 - can_diffuse: ${can_diffuse:${task}} - can_generate: ${can_generate:${task}} - forward_kwargs: {} - generate_kwargs: {} -experiment_name: fp16 -model: NousResearch/Llama-2-7b-hf -task: ${infer_task:${model}} -device: cuda -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.14.1 - optimum_commit: null - transformers_version: 4.35.2 - transformers_commit: null - accelerate_version: 0.24.1 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540671 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/.hydra/hydra.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/.hydra/hydra.yaml deleted file mode 100644 index 1b3320a7..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/.hydra/hydra.yaml +++ /dev/null @@ -1,177 +0,0 @@ -hydra: - run: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - sweep: - dir: experiments/${oc.env:HOSTNAME}/${model}/${experiment_name} - subdir: ${benchmark.input_shapes.batch_size} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: - benchmark.input_shapes.batch_size: 1,2,4,8,16,32,64,128 - model: NousResearch/Llama-2-7b-hf,NousResearch/Llama-2-13b-hf - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: MULTIRUN - searchpath: [] - callbacks: {} - output_subdir: .hydra - overrides: - hydra: - - hydra.mode=MULTIRUN - task: - - benchmark.input_shapes.batch_size=8 - - model=NousResearch/Llama-2-7b-hf - job: - name: cli - chdir: true - override_dirname: benchmark.input_shapes.batch_size=8,model=NousResearch/Llama-2-7b-hf - id: '6' - num: 6 - config_name: fp16 - env_set: - CUDA_VISIBLE_DEVICES: '0' - CUDA_DEVICE_ORDER: PCI_BUS_ID - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /workspace/optimum-benchmark/examples/running-llamas - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: optimum_benchmark - schema: pkg - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: /workspace/optimum-benchmark/examples/running-llamas/configs - schema: file - provider: command-line - - path: '' - schema: structured - provider: schema - output_dir: /workspace/optimum-benchmark/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8 - choices: - benchmark: inference - launcher: process - backend: pytorch - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/.hydra/overrides.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/.hydra/overrides.yaml deleted file mode 100644 index 1389ebb3..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/.hydra/overrides.yaml +++ /dev/null @@ -1,2 +0,0 @@ -- benchmark.input_shapes.batch_size=8 -- model=NousResearch/Llama-2-7b-hf diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/hydra_config.yaml b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/hydra_config.yaml deleted file mode 100644 index cf0ee8dd..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/hydra_config.yaml +++ /dev/null @@ -1,83 +0,0 @@ -launcher: - name: process - _target_: optimum_benchmark.launchers.process.launcher.ProcessLauncher - start_method: spawn -backend: - name: pytorch - version: 2.1.1+cu118 - _target_: optimum_benchmark.backends.pytorch.backend.PyTorchBackend - seed: 42 - inter_op_num_threads: null - intra_op_num_threads: null - continuous_isolation: true - isolation_check_interval: 1.0 - delete_cache: false - no_weights: true - device_map: null - torch_dtype: float16 - eval_mode: true - disable_grad: true - amp_autocast: false - amp_dtype: null - torch_compile: false - torch_compile_config: {} - to_bettertransformer: false - use_flash_attention_2: false - quantization_scheme: null - quantization_config: {} - data_parallel: false - deepspeed_inference: false - deepspeed_inference_config: {} - peft_strategy: null - peft_config: {} -benchmark: - name: inference - _target_: optimum_benchmark.benchmarks.inference.benchmark.InferenceBenchmark - duration: 10 - warmup_runs: 10 - memory: true - energy: false - input_shapes: - batch_size: 8 - sequence_length: 256 - num_choices: 1 - feature_size: 80 - nb_max_frames: 3000 - audio_sequence_length: 16000 - new_tokens: 512 - can_diffuse: false - can_generate: true - forward_kwargs: {} - generate_kwargs: - num_return_sequences: 1 - max_new_tokens: 512 - min_new_tokens: 512 - do_sample: false - use_cache: true - pad_token_id: 0 - num_beams: 1 -experiment_name: fp16 -model: NousResearch/Llama-2-7b-hf -task: text-generation -device: cuda -hub_kwargs: - revision: main - cache_dir: null - force_download: false - local_files_only: false -environment: - optimum_version: 1.14.1 - optimum_commit: null - transformers_version: 4.35.2 - transformers_commit: null - accelerate_version: 0.24.1 - accelerate_commit: null - diffusers_version: null - diffusers_commit: null - python_version: 3.10.12 - system: Linux - cpu: ' AMD EPYC 7742 64-Core Processor' - cpu_count: 128 - cpu_ram_mb: 540671 - gpus: - - NVIDIA A100-SXM4-80GB diff --git a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/inference_results.csv b/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/inference_results.csv deleted file mode 100644 index f1c9c922..00000000 --- a/examples/running-llamas/experiments/hf-dgx-01/NousResearch/Llama-2-7b-hf/fp16/8/inference_results.csv +++ /dev/null @@ -1,2 +0,0 @@ -forward.latency(s),forward.throughput(samples/s),forward.peak_memory(MB),forward.max_memory_used(MB),forward.max_memory_allocated(MB),forward.max_memory_reserved(MB),generate.latency(s),generate.throughput(tokens/s),decode.latency(s),decode.throughput(tokens/s),generate.peak_memory(MB),generate.max_memory_used(MB),generate.max_memory_allocated(MB),generate.max_memory_reserved(MB) -0.206,38.8,17172,17172,15037,15701,14.0,293.0,13.8,296.0,64977,64977,19997,63503