diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 0529962bffaf..aafb6c28be6c 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -2223,8 +2223,11 @@ def _get_padding_truncation_strategies( elif padding is not False: if padding is True: if verbose: - if max_length is not None: - warnings.warn("`max_length` is ignored when `padding`=`True`.") + if max_length is not None and (truncation is False or truncation == "do_not_truncate"): + warnings.warn( + "`max_length` is ignored when `padding`=`True` and there is no truncation strategy. " + "To pad to max length, use `padding='max_length'`." + ) if old_pad_to_max_length is not False: warnings.warn("Though `pad_to_max_length` = `True`, it is ignored because `padding`=`True`.") padding_strategy = PaddingStrategy.LONGEST # Default to pad to the longest sequence in the batch