comic

edenartlab · Jan 21, 2024 · 24d13d4 · 24d13d4
1 parent 9e52982
commit 24d13d4
Show file tree

Hide file tree

Showing 18 changed files with 429 additions and 46 deletions.
diff --git a/app/animations/__init__.py b/app/animations/__init__.py
@@ -1,3 +1,4 @@
 from .monologue import animated_monologue
 from .dialogue import animated_dialogue
-from .story import animated_story
+from .story import animated_story
+from .comic import illustrated_comic
diff --git a/app/animations/animation.py b/app/animations/animation.py
@@ -1,8 +1,9 @@
 from typing import Optional
+from PIL import Image, ImageDraw
 
 from ..plugins import replicate, elevenlabs, s3
 from ..character import EdenCharacter
-from ..utils import combine_speech_video
+from ..utils import combine_speech_video, wrap_text, get_font
 
 
 def talking_head(
@@ -45,4 +46,56 @@ def screenplay_clip(
         height=height,
     )
     output_filename = combine_speech_video(audio_url, video_url)
-    return output_filename, thumbnail_url
+    return output_filename, thumbnail_url
+
+
+def comic_strip(
+    images: list[Image.Image],
+    captions: list[str],
+    margin: int = 30,
+    padding: int = 25,
+    caption_padding_top: int = 10,
+    line_spacing: int = 1.3,
+    font_size: int = 48,
+    font_ttf: str = 'Roboto-Regular.ttf'
+):
+    font = get_font(font_ttf, font_size)
+    caption_box_height = 3 * int(1.5 * font.size)
+
+    width, height = images[0].size
+    total_width = width * 2 + margin
+    total_height = height * 2 + caption_box_height * 2 + margin
+
+    composite_image = Image.new('RGB', (total_width, total_height), color='white')
+
+    draw = ImageDraw.Draw(composite_image)
+    draw.rectangle([(0, 0), (total_width, total_height)], fill='black')
+
+    caption_box_height = 3 * int(1.5 * font.size) + 2 * caption_padding_top
+
+    first_pane_image = None
+
+    for i, image in enumerate(images):
+
+        x = (i % 2) * (width + margin) if i % 2 == 0 else (i % 2) * width + margin
+        y = (i // 2) * (height + caption_box_height) if i // 2 == 0 else (i // 2) * (height + caption_box_height + margin)
+
+        composite_image.paste(image, (x, y))
+
+        caption_box = Image.new('RGB', (width, caption_box_height), color='black')
+        draw = ImageDraw.Draw(caption_box)
+
+        wrapped_caption = wrap_text(draw, captions[i], font, width - 2 * padding)
+        caption_y = caption_padding_top
+        for line in wrapped_caption:
+            draw.text((padding, caption_y), line, fill='white', font=font)
+            caption_y += int(line_spacing * font.size)
+
+        composite_image.paste(caption_box, (x, y + height))
+
+        if i == 0:
+            thumbnail = Image.new('RGB', (width, height + caption_box_height), color='white')
+            thumbnail.paste(image, (0, 0))
+            thumbnail.paste(caption_box, (0, height))
+
+    return composite_image, thumbnail
diff --git a/app/animations/comic.py b/app/animations/comic.py
@@ -0,0 +1,59 @@
+import os
+import requests
+import tempfile
+
+from .. import utils
+from ..plugins import replicate, s3
+from ..character import EdenCharacter
+from ..llm import LLM
+from ..models import ComicRequest, ComicResult
+from ..prompt_templates.comic import comicwriter_system_template
+from .animation import comic_strip
+
+
+def illustrated_comic(request: ComicRequest):
+    params = {"temperature": 1.0, "max_tokens": 2000, **request.params}
+    loras = {
+        "Verdelis": "https://edenartlab-prod-data.s3.us-east-1.amazonaws.com/f290723c93715a8eb14e589ca1eec211e10691f683d53cde37139bc7d3a91c22.tar"
+    }
+
+    comicwriter = LLM(
+        model=request.model,
+        system_message=str(comicwriter_system_template),
+        params=params,
+    )
+
+    comic_book = comicwriter(request.prompt, output_schema=ComicResult)
+
+    def run_panel(panel):
+
+        # pick lora of character
+        # pick init image character x genre
+
+        return replicate.sdxl({
+            "text_input": panel['image'],
+            "lora": loras["Verdelis"],
+            "width": 1024,
+            "height": 1024,
+            "n_samples": 1,
+        })
+
+    results = utils.process_in_parallel(
+        comic_book['panels'], 
+        run_panel,
+        max_workers=4
+    )
+
+    image_urls = [image_url for image_url, thumbnail in results]
+    images = [utils.download_image(url) for url in image_urls]
+    captions = [panel['caption'] for panel in comic_book['panels']]
+
+    composite_image, thumbnail_image = comic_strip(images, captions)
+
+    img_bytes = utils.PIL_to_bytes(composite_image, ext="JPEG")
+    thumbnail_bytes = utils.PIL_to_bytes(thumbnail_image, ext="WEBP")
+
+    output_url = s3.upload(img_bytes, "jpg")
+    thumbnail_url = s3.upload(thumbnail_bytes, "webp")
+
+    return output_url, thumbnail_url
diff --git a/app/animations/story.py b/app/animations/story.py
@@ -27,12 +27,13 @@ def animated_story(request: StoryRequest):
         for character_id, character in characters.items()
     }
 
-    images = [
-        characters[character_id].image 
-        for character_id in request.character_ids
-    ]
+    # images = [
+    #     characters[character_id].image 
+    #     for character_id in request.character_ids
+    # ]
 
-    width, height = utils.calculate_target_dimensions(images, MAX_PIXELS)
+    # width, height = utils.calculate_target_dimensions(images, MAX_PIXELS)
+    width, height = 1024, 1024
 
     def run_story_segment(clip):
         if clip['voiceover'] == 'character':

diff --git a/app/fonts/Arial.ttf b/app/fonts/Arial.ttf
diff --git a/app/fonts/Roboto-Regular.ttf b/app/fonts/Roboto-Regular.ttf
diff --git a/app/generator.py b/app/generator.py
@@ -3,7 +3,12 @@
 import requests
 from fastapi import BackgroundTasks
 
-from .animations import animated_monologue, animated_dialogue, animated_story
+from .animations import (
+    animated_monologue, 
+    animated_dialogue, 
+    animated_story, 
+    illustrated_comic
+)
 from .models import MonologueRequest, MonologueResult
 from .models import DialogueRequest, DialogueResult, StoryRequest
 from .models import TaskRequest, TaskUpdate, TaskResult
@@ -56,6 +61,15 @@ def process_task(task_id: str, request: TaskRequest):
             )
             output_url, thumbnail_url = animated_story(task_req)
 
+        elif task_type == "comic":
+            character_id = request.config.get("characterId")
+            prompt = request.config.get("prompt")
+            task_req = ComicRequest(
+                character_id=character_id,
+                prompt=prompt,
+            )
+            output_url, thumbnail_url = illustrated_comic(task_req)
+
         output = TaskResult(
             files=[output_url],
             thumbnails=[thumbnail_url],

diff --git a/app/models.py b/app/models.py
@@ -68,6 +68,26 @@ class StoryClip(BaseModel):
     image_description: str = Field(description="Image content for clip")
 
 
+class ComicRequest(BaseModel):
+    character_id: str
+    prompt: str
+    model: str = "gpt-4-1106-preview"
+    params: dict = {}
+
+class ComicPanel(BaseModel):
+    """
+    A single panel in a comic book sequence
+    """
+    image: str = Field(description="Literal description of image content for panel")
+    caption: str = Field(description="Creative caption of panel")
+
+class ComicResult(BaseModel):
+    """
+    A screenplay consisting of a sequence of clips
+    """
+    panels: List[ComicPanel] = Field(description="Comic Book panels")
+
+
 class ChatRequest(BaseModel):
     """
     A chat request to an EdenCharacter
@@ -115,10 +135,3 @@ class ModerationResult(BaseModel):
     gore: int = Field(description="Violence or gore")
     hate: int = Field(description="Hate, abusive or toxic speech")
     spam: int = Field(description="Spam, scam, or deceptive content")
-
-# class CharacterChatMessage(BaseModel):
-#     character: Character
-#     message: str
-
-#     def __str__(self):
-#         return f"{self.character.name}: {self.message}"
diff --git a/app/plugins/replicate.py b/app/plugins/replicate.py
@@ -49,7 +49,7 @@ def submit_task(
     )
     return prediction
 
-
+# config:dict?
 def wav2lip(
     face_url: str,
     speech_url: str,
@@ -82,18 +82,7 @@ def wav2lip(
     return output_url, thumbnail_url
 
 
-def sdxl(
-    text_input: str,
-    width: int,
-    height: int,
-):
-    config = {
-        "text_input": text_input,
-        "width": width,
-        "height": height,
-        "n_samples": 1,
-    }
-
+def sdxl(config: dict[any]):
     output = run_task(
         config, 
         model_name="abraham-ai/eden-sd-pipelines-sdxl"

diff --git a/app/prompt_templates/comic/__init__.py b/app/prompt_templates/comic/__init__.py
@@ -0,0 +1,7 @@
+from string import Template
+from pathlib import Path
+
+dir_path = Path(__file__).parent
+
+with open(dir_path / 'comicwriter_system.txt', 'r') as file:
+    comicwriter_system_template = Template(file.read())