From 78159d09d728444102070d2e403e01894127c822 Mon Sep 17 00:00:00 2001 From: yaoguany <89233842+yaoguany@users.noreply.github.com> Date: Tue, 18 Jul 2023 01:37:33 +0800 Subject: [PATCH] fix small bug --- src/lmflow/pipeline/finetuner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lmflow/pipeline/finetuner.py b/src/lmflow/pipeline/finetuner.py index cb65f9e68..82a185f49 100644 --- a/src/lmflow/pipeline/finetuner.py +++ b/src/lmflow/pipeline/finetuner.py @@ -149,7 +149,8 @@ def group_text(self, tokenized_datasets, model_max_length): f"({model_max_length})." f"Using block_size={data_args.block_size}.") block_size = data_args.block_size - + else: + block_size = data_args.block_size # Main data processing function that will concatenate all texts from # our dataset and generate chunks of block_size. def group_texts(examples):