Merge pull request #30 from edenartlab/stage

updates to reels
edenartlab · Mar 1, 2024 · e8e0b33 · e8e0b33
2 parents 7272eac + e590753
commit e8e0b33
Show file tree

Hide file tree

Showing 15 changed files with 155 additions and 140 deletions.
diff --git a/app/animations/animation.py b/app/animations/animation.py
@@ -173,7 +173,8 @@ def poster(
     font = get_font(font_ttf, font_size)
     width, height = image.size
 
-    draw = ImageDraw.Draw(Image.new('RGB', (width, height)))
+    draw = ImageDraw.Draw(Image.new('RGB', (width, height), (0, 0, 0)))
+    caption = caption.replace('\n', ' ')
     wrapped_caption = wrap_text(draw, caption, font, width - 2 * margin)
     num_lines = len(wrapped_caption)
 
@@ -188,13 +189,12 @@ def poster(
     draw.rectangle([(0, 0), (total_width, total_height)], fill='black')
 
     resized_image = image.resize((width, height))
-
     composite_image.paste(resized_image, (int(margin/2), int(margin/2)))
 
-    caption_box = Image.new('RGB', (width, caption_box_height), color='black')
+    caption_box = Image.new('RGB', (total_width, caption_box_height), color='black')
     draw = ImageDraw.Draw(caption_box)
 
-    caption_y = caption_padding_top + 0*margin/2
+    caption_y = caption_padding_top
     for line in wrapped_caption:
         draw.text((margin + shadow_offset[0], caption_y + shadow_offset[1]), line, fill=shadow_color, font=font)
         draw.text((margin, caption_y), line, fill=font_color, font=font)

diff --git a/app/animations/little_martians.py b/app/animations/little_martians.py
@@ -24,11 +24,14 @@ def little_martian_poster(request: LittleMartianRequest):
     data = littlemartians_data[request.martian.value][request.setting.value][request.genre.value]
 
     lora = data['lora']
+    character_id = data['character_id']
     modifier = data['modifier']
     lora_scale = random_interval(*data['lora_scale'])
     init_image = random.choice(data['init_images'])
     init_image_strength = random_interval(*data['init_image_strength'])
-
+
+    character = EdenCharacter(character_id)
+
     littlemartian_writer = LLM(
         model=request.model,
         system_message=littlemartians_poster_system.template,
@@ -37,36 +40,24 @@ def little_martian_poster(request: LittleMartianRequest):
 
     prompt = littlemartians_poster_prompt.substitute(
         martian = request.martian.value,
+        identity = character.identity,
         setting = request.setting.value,
         genre = request.genre.value,
         premise = request.prompt,
     )
 
-    print("==============")
-    print("PROMPT:", prompt)
-
     result = littlemartian_writer(prompt, output_schema=Poster)
 
     prompt = result['image']
 
     text_input = f'{modifier}, {prompt}'
 
-    print("RESULT:", prompt)
-    print("text_input:", text_input)
-
-    # def run_panel(panel, idx):
-    #     # pick lora of character
-    #     # pick init image character x genre
-    print("========")
-
-    if request.aspect_ratio.value == "portrait":
-        w, h = 1024, 1280
-    elif request.aspect_ratio.value == "landscape":
-        w, h = 1280, 768
-    elif request.aspect_ratio.value == "square":
-        w, h = 1024, 1024
-
-
+    if request.aspect_ratio == "portrait":
+        width, height = 1280, 1920
+    elif request.aspect_ratio == "landscape":
+        width, height = 1920, 1280
+    else:
+        width, height = 1600, 1600
 
     config = {
         "text_input": text_input,
@@ -80,18 +71,11 @@ def little_martian_poster(request: LittleMartianRequest):
         "n_samples": 1,
     }
 
-
-    print("config")
-    print(config)
-
-
     image_url, thumbnail_url = replicate.sdxl(config)
 
-    print(image_url, thumbnail_url)
-
     caption = result['caption']
-
     print(caption)
+
     image = utils.download_image(image_url)
     composite_image, thumbnail_image = poster(image, caption)
 

diff --git a/app/animations/reel.py b/app/animations/reel.py
@@ -9,10 +9,6 @@
 from ..character import Character, EdenCharacter
 from ..scenarios import reel
 from ..models import ReelRequest
-#from .animation import reel_clip
-
-MAX_PIXELS = 1024 * 1024
-MAX_WORKERS = 3
 
 
 def animated_reel(request: ReelRequest, callback=None):
@@ -21,8 +17,6 @@ def animated_reel(request: ReelRequest, callback=None):
     if callback:
         callback(progress=0.1)
 
-    print(result)
-
     characters = {
         character_id: EdenCharacter(character_id)
         for character_id in request.character_ids + [request.narrator_id]
@@ -44,8 +38,14 @@ def animated_reel(request: ReelRequest, callback=None):
             voice_id = elevenlabs.get_random_voice()
             characters[character_id].voice = voice_id
 
-    width, height = 1024, 1280
-    duration = 20
+    if request.aspect_ratio == "portrait":
+        width, height = 1280, 1920
+    elif request.aspect_ratio == "landscape":
+        width, height = 1920, 1280
+    else:
+        width, height = 1600, 1600
+
+    min_duration = 20
     speech_audio = None
 
     if result["speech"]:
@@ -75,28 +75,36 @@ def animated_reel(request: ReelRequest, callback=None):
         silence2 = AudioSegment.silent(duration=2500)
         speech_audio = silence1 + speech_audio + silence2
 
-        duration = len(speech_audio) / 1000
+        duration = max(min_duration, len(speech_audio) / 1000)
 
     music_url, _ = replicate.audiocraft(
-        prompt=result["music_description"],
+        prompt=result["music_prompt"],
         seconds=duration
     )
     music_bytes = requests.get(music_url).content
-    print("music", music_url)
 
     if speech_audio:
         buffer = BytesIO()
         music_audio = AudioSegment.from_mp3(BytesIO(music_bytes))
         music_audio = music_audio - 5
         speech_audio = speech_audio + 5  # boost speech
+
+        # combine speech and audio at max duration
+        nm, na = len(music_audio), len(speech_audio)
+        duration = max(nm, na)
+        if len(music_audio) < duration:
+            music_audio += AudioSegment.silent(duration=duration - nm)
+        elif len(speech_audio) < duration:
+            speech_audio += AudioSegment.silent(duration=duration - na)
         combined_audio = music_audio.overlay(speech_audio)
+
         combined_audio.export(buffer, format="mp3")
         audio_url = s3.upload(buffer.getvalue(), "mp3")
     else:
         audio_url = s3.upload(music_bytes, "mp3")
 
     video_url, thumbnail_url = replicate.txt2vid(
-        interpolation_texts=[result["image_description"]],
+        interpolation_texts=[result["image_prompt"]],
         width=width,
         height=height,
     )

diff --git a/app/animations/story.py b/app/animations/story.py
@@ -56,7 +56,7 @@ def run_story_segment(clip, idx):
         else:
             character = characters[request.narrator_id]
         output_filename, thumbnail_url = screenplay_clip(
-            character, clip["speech"], clip["image_description"], width, height
+            character, clip["speech"], clip["image_prompt"], width, height
         )
         progress += progress_increment
         if callback:

diff --git a/app/models/__init__.py b/app/models/__init__.py
@@ -29,6 +29,7 @@
     StoryVoiceoverMode,
     StoryClip,
     StoryResult,
+    ReelNarrationMode,
     ReelRequest,
     ReelResult,
     ComicRequest,

diff --git a/app/models/scenarios.py b/app/models/scenarios.py
@@ -61,7 +61,7 @@ class StoryClip(BaseModel):
         description="Character name if voiceover mode is character, otherwise null"
     )
     speech: str = Field(description="Spoken text for clip")
-    image_description: str = Field(description="Image content for clip")
+    image_prompt: str = Field(description="Image content for clip")
 
 
 class StoryResult(BaseModel):
@@ -71,13 +71,22 @@ class StoryResult(BaseModel):
     clips: List[StoryClip] = Field(description="Clips in the sequence")
 
 
+class ReelNarrationMode(Enum):
+    auto = "auto"
+    on = "on"
+    off = "off"
+
+
 class ReelRequest(BaseModel):
     character_ids: List[str]
     prompt: str
-    narrator_id: str = NARRATOR_CHARACTER_ID
-    model: str = "gpt-4-1106-preview"
-    params: dict = {}
-    intro_screen: bool = False
+    music_prompt: Optional[str] = None
+    aspect_ratio: Optional[str] = "portrait"
+    narrator_id: Optional[str] = NARRATOR_CHARACTER_ID
+    narration: Optional[ReelNarrationMode] = "auto"
+    intro_screen: Optional[bool] = False
+    model: Optional[str] = "gpt-4-1106-preview"
+    params: Optional[dict] = {}
 
 
 class ReelVoiceoverMode(Enum):
@@ -95,8 +104,8 @@ class ReelResult(BaseModel):
         description="Character name if voiceover mode is character, otherwise null"
     )
     speech: Optional[str] = Field(description="Spoken text for clip if voiceover mode is not none, otherwise null")
-    music_description: str = Field(description="Music content for reel")
-    image_description: str = Field(description="Image content for clip")
+    music_prompt: str = Field(description="Music content for reel")
+    image_prompt: str = Field(description="Image content for clip")
 
 
 

diff --git a/app/prompt_templates/cinema/reelwriter_prompt.txt b/app/prompt_templates/cinema/reelwriter_prompt.txt
@@ -1,8 +1,3 @@
-Characters:
 $character_details
-
-Character names (only use these for character field in each clip):
-$character_names
-
 The premise of the reel is:
 $prompt
diff --git a/app/prompt_templates/cinema/reelwriter_system.txt b/app/prompt_templates/cinema/reelwriter_system.txt
@@ -6,8 +6,8 @@ You will then write a script for a reel based on the information provided.
 
 Your output contains the following elements:
 
-image_description: a short and concise 1-sentence description of the visual content for the reel, structured as a prompt, focusing on visual elements and action, not plot or dialogue
-music_description: a short and concise 1-sentence description of the music for the reel, structured as a prompt. Use descriptive words to convey the mood and genre of the music
+image_prompt: a short and concise 1-sentence description of the visual content for the reel, structured as a prompt, focusing on visual elements and action, not plot or dialogue
+music_prompt: a short and concise 1-sentence description of the music for the reel, structured as a prompt. Use descriptive words to convey the mood and genre of the music
 voiceover: whether there is a voiceover by a narrator, or by a character, or no voiceover at all
 character: If voiceover is in character mode, the name of the speaking character. Important: you may only use the exact name of a character provided by the user in the cast of characters.
 speech: If voiceover is in character or narrator mode, the text of the speech

diff --git a/app/prompt_templates/cinema/screenwriter_prompt.txt b/app/prompt_templates/cinema/screenwriter_prompt.txt
@@ -1,8 +1,3 @@
-Characters:
 $character_details
-
-Character names (only use these for character field in each clip):
-$character_names
-
 The premise of the story is:
 $story
diff --git a/app/prompt_templates/cinema/screenwriter_system.txt b/app/prompt_templates/cinema/screenwriter_system.txt
@@ -7,7 +7,7 @@ You will then write a screenplay for a film based on the information provided. I
 voiceover: whether the voiceover is the narrator or a character speaking
 character: If voiceover is in character mode, the name of the speaking character. Important: you may only use the exact name of a character provided by the user in the cast of characters.
 speech: If voiceover is in character or narrator mode, the text of the speech
-image_description: a description of the image content for the clip
+image_prompt: a description of the image content for the clip
 
 Generate around 5-10 clips. Approximately half should be character dialogue and half should be narration. For clips where the voiceover is a character, some of the image prompts may emphasize non-personal objects or scenery, and some may emphasize the character.