From 6adad1651566ffb761ce47f8d671b73a3bbb0ec2 Mon Sep 17 00:00:00 2001 From: Libin Tang Date: Fri, 21 Jun 2024 01:19:32 -0700 Subject: [PATCH] Fix issues during readme validation (#1083) Fix review comments. (cherry picked from commit 9e1319f2d5c9c129229db63451eba0283dfab235) --- examples/summarization/run_summarization.py | 3 +++ examples/text-generation/README.md | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) mode change 100644 => 100755 examples/summarization/run_summarization.py mode change 100644 => 100755 examples/text-generation/README.md diff --git a/examples/summarization/run_summarization.py b/examples/summarization/run_summarization.py old mode 100644 new mode 100755 index 23be70e213..db7a4913c9 --- a/examples/summarization/run_summarization.py +++ b/examples/summarization/run_summarization.py @@ -764,6 +764,9 @@ def compute_metrics(eval_preds): else: training_args.generation_config.max_length = data_args.val_max_target_length if data_args.num_beams is not None: + if data_args.num_beams == 1: + training_args.generation_config.length_penalty = None + training_args.generation_config.early_stopping = False training_args.generation_config.num_beams = data_args.num_beams elif training_args.generation_num_beams is not None: training_args.generation_config.num_beams = training_args.generation_num_beams diff --git a/examples/text-generation/README.md b/examples/text-generation/README.md old mode 100644 new mode 100755 index cac7bbbe50..e020e72a79 --- a/examples/text-generation/README.md +++ b/examples/text-generation/README.md @@ -443,7 +443,9 @@ More information on usage of the unifier script can be found in fp8 Habana docs: Some models can fit on HPU DRAM but can't fit on the CPU RAM. When we run a model on single card and don't use deepspeed, the `--disk_offload` flag allows to offload weights to disk during model quantization in HQT. When this flag is mentioned, during the quantization process, each weight first is loaded from disk to CPU RAM, when brought to HPU DRAM and quantized there. This way not all the model is on the CPU RAM but only one weight each time. To enable this weights offload mechanism, add `--disk_offload` flag to the topology command line. -Here is an example of using disk_offload in quantize command. Please make sure to run the measurement first. +Here is an example of using disk_offload in quantize command. +Please follow the "Running FP8 models on single device" section first before running the cmd below. + ```bash QUANT_CONFIG=./quantization_config/maxabs_quant.json TQDM_DISABLE=1 \ python run_generation.py \