From f48ed6e288e6f29d15e8f6669be200e3808ad89a Mon Sep 17 00:00:00 2001 From: raushan Date: Wed, 29 Jan 2025 09:25:39 +0100 Subject: [PATCH] fix --- src/transformers/models/idefics2/processing_idefics2.py | 5 +++++ src/transformers/models/idefics3/processing_idefics3.py | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/src/transformers/models/idefics2/processing_idefics2.py b/src/transformers/models/idefics2/processing_idefics2.py index 8c1647cdff41..0991149fa86b 100644 --- a/src/transformers/models/idefics2/processing_idefics2.py +++ b/src/transformers/models/idefics2/processing_idefics2.py @@ -198,6 +198,11 @@ def __call__( elif not isinstance(text, list) and not isinstance(text[0], str): raise ValueError("Invalid input text. Please provide a string, or a list of strings") + add_special_tokens = True + if self.bos_token is not None and text[0].startswith(self.bos_token): + add_special_tokens = False + output_kwargs["text_kwargs"]["add_special_tokens"] = add_special_tokens + # Replace the image token with fake tokens around the expanded image token sequence of length `image_seq_len` fake_image_token = self.fake_image_token.content image_token = self.image_token.content diff --git a/src/transformers/models/idefics3/processing_idefics3.py b/src/transformers/models/idefics3/processing_idefics3.py index 40c8829fe76e..1d888b536985 100644 --- a/src/transformers/models/idefics3/processing_idefics3.py +++ b/src/transformers/models/idefics3/processing_idefics3.py @@ -156,6 +156,7 @@ def __init__(self, image_processor, tokenizer=None, image_seq_len: int = 169, ch ] } tokenizer.add_special_tokens(tokens_to_add) + self.bos_token = self.tokenizer.bos_token super().__init__(image_processor, tokenizer, chat_template=chat_template, **kwargs) @@ -249,6 +250,11 @@ def __call__( raise ValueError("Invalid input text. Please provide a string, or a list of strings") n_images_in_text = [sample.count(self.image_token.content) for sample in text] + add_special_tokens = True + if self.bos_token is not None and text[0].startswith(self.bos_token): + add_special_tokens = False + output_kwargs["text_kwargs"]["add_special_tokens"] = add_special_tokens + if images is not None: if is_image_or_image_url(images): images = [[images]]