diff --git a/pyrit/prompt_converter/__init__.py b/pyrit/prompt_converter/__init__.py index a232e5dd1..81eea213b 100644 --- a/pyrit/prompt_converter/__init__.py +++ b/pyrit/prompt_converter/__init__.py @@ -25,6 +25,7 @@ FuzzerSimilarConverter, ) from pyrit.prompt_converter.human_in_the_loop_converter import HumanInTheLoopConverter +from pyrit.prompt_converter.image_overlay_converter import ImageOverlayConverter from pyrit.prompt_converter.leetspeak_converter import LeetspeakConverter from pyrit.prompt_converter.morse_converter import MorseConverter from pyrit.prompt_converter.noise_converter import NoiseConverter @@ -45,6 +46,7 @@ from pyrit.prompt_converter.variation_converter import VariationConverter + __all__ = [ "AddImageTextConverter", "AddTextImageConverter", @@ -66,6 +68,7 @@ "FuzzerShortenConverter", "FuzzerSimilarConverter", "HumanInTheLoopConverter", + "ImageOverlayConverter", "LeetspeakConverter", "LLMGenericTextConverter", "MorseConverter", diff --git a/pyrit/prompt_converter/image_overlay_converter.py b/pyrit/prompt_converter/image_overlay_converter.py new file mode 100644 index 000000000..c8c22edef --- /dev/null +++ b/pyrit/prompt_converter/image_overlay_converter.py @@ -0,0 +1,99 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import logging +import base64 +from typing import Optional + +from PIL import Image +from io import BytesIO + +from pyrit.models import data_serializer_factory +from pyrit.models import PromptDataType +from pyrit.prompt_converter import PromptConverter, ConverterResult +from pyrit.memory import MemoryInterface, DuckDBMemory + +logger = logging.getLogger(__name__) + + +class ImageOverlayConverter(PromptConverter): + """ + A converter that takes in a base image, and a secondary image to embed within the main image. + + Args: + base_image_path (str): File path of the base image + secondary_image_path (str): File path of the second image to embed with the base image + x_pos (int, optional): X coordinate to place second image on the base image (0 is left most). Defaults to 0. + y_pos (int, optional): Y coordinate to place second image on the base image (0 is upper most). Defaults to 0. + memory: (memory, optional): Memory to store the chat messages. DuckDBMemory will be used by default. + """ + + def __init__( + self, + base_image_path: str, + secondary_image_path: str, + x_pos: Optional[int] = 0, + y_pos: Optional[int] = 0, + memory: Optional[MemoryInterface] = None, + ): + if not base_image_path: + raise ValueError("Please provide valid base image path") + if not secondary_image_path: + raise ValueError("Please provide valid overlay image path") + + self._base_image_path = base_image_path + self._secondary_image_path = secondary_image_path + self._x_pos = x_pos + self._y_pos = y_pos + self._memory = memory or DuckDBMemory() + + def _add_overlay_image(self) -> Image.Image: + """ + Embed the second image onto the base image + + Returns: + Image.Image: The combined image with overlay. + """ + # Open the images + base_image = Image.open(self._base_image_path) + second_image = Image.open(self._secondary_image_path) + + # Make a copy of base image, in case the user want to keep their input images unchanged + copied_base_image = base_image.copy() + + # Paste the second image onto the base image + copied_base_image.paste(second_image, (self._x_pos, self._y_pos), second_image) + + return copied_base_image + + async def convert_async(self, *, prompt: str, input_type: PromptDataType = "image") -> ConverterResult: + """ + Converter the base image to embed the second image onto it. + + Args: + prompt (str): Not used in this converter but may be required by the interface + input_type (PromptDataType): type of data, should be image + Returns: + ConverterResult: converted image with file path + """ + if not self.input_supported(input_type): + raise ValueError("Input type not supported") + + img_serializer = data_serializer_factory(value=self._base_image_path, data_type="image_path", memory=self._memory) + + # Add overlay to the image + updated_img = self._add_overlay_image() + + # Encode the image using base64 and return ConverterResult + # I took the following code from add_image_text_converter.py @author rdheekonda + image_bytes = BytesIO() + mime_type = img_serializer.get_mime_type(self._base_image_path) + image_type = mime_type.split("/")[-1] + updated_img.save(image_bytes, format=image_type) + image_str = base64.b64encode(image_bytes.getvalue()) + # Save image as generated UUID filename + await img_serializer.save_b64_image(data=image_str) + return ConverterResult(output_text=str(img_serializer.value), output_type="image_path") + + def input_supported(self, input_type: PromptDataType) -> bool: + return input_type == "image"