From ca2f6fcaee765076133871af9f61c9fa8dffc697 Mon Sep 17 00:00:00 2001 From: Fabian Gonzalez Date: Wed, 10 Jan 2024 23:05:55 -0500 Subject: [PATCH 01/13] Add pylint workflow --- .github/workflows/pylint.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 .github/workflows/pylint.yml diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml new file mode 100644 index 0000000..5730b67 --- /dev/null +++ b/.github/workflows/pylint.yml @@ -0,0 +1,21 @@ +name: Python linter +on: [push] +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pylint + - name: Analysing the code with pylint + run: | + pylint $(git ls-files '*.py') From fb276eba57a4337597b515b08cf222958603e3d3 Mon Sep 17 00:00:00 2001 From: Fabian Gonzalez Date: Thu, 11 Jan 2024 00:27:16 -0500 Subject: [PATCH 02/13] Begin fixing all linter issues --- .pylintrc | 2 + api/chatgpt.py | 82 +++++++++++++++++---- main.py | 4 +- processing/image.py | 166 ++++++++++++++++++++++++++++++++++-------- processing/video.py | 51 ++++++++++--- ui/listicles/utils.py | 3 +- ui/music/interface.py | 6 +- ui/music/utils.py | 58 +++++++++------ ui/ui.py | 5 +- utils/gradio.py | 3 +- 10 files changed, 296 insertions(+), 84 deletions(-) create mode 100644 .pylintrc diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..d047969 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,2 @@ +[MAIN] +max-line-length=120 \ No newline at end of file diff --git a/api/chatgpt.py b/api/chatgpt.py index 39d37ea..8aeac86 100644 --- a/api/chatgpt.py +++ b/api/chatgpt.py @@ -1,12 +1,25 @@ +""" +This module provides utility functions for interacting with the OpenAI API and Gradio interfaces. +""" import openai from openai import OpenAI import os import gradio as gr +from typing import Optional -# The actual gradio image name (+ orig_name) is <>.png, but the tmp file created and sent to -# batch is based on the portion after the last `/` in the url without the '%' (looks url encoded). -def url_to_gradio_image_name(url): +def url_to_gradio_image_name(url: str) -> str: + """ + Converts an OpenAI generated URL to a Gradio-compatible image name. + + This function extracts the portion of the URL after the last forward slash ('/'). It removes special characters + often found in URLs such as '%', '&', and '='. The resulting string is truncated to a maximum length of 200 + characters to prevent issues with file name length limitations. + + :param url: The URL containing the image name. + :returns: A cleaned and truncated version of the image name suitable for use with Gradio. + """ + # Get the part after the final `/` in the URL image_name = url.rsplit('/', 1)[-1] @@ -21,7 +34,18 @@ def url_to_gradio_image_name(url): return image_name -def get_openai_client(api_key): +def get_openai_client(api_key: Optional[str] = None) -> Optional[OpenAI]: + """ + Creates and returns an OpenAI client object configured with the given API key. + + This function initializes an OpenAI client using the provided API key. If the provided API key is None or empty, + it attempts to retrieve the API key from the environment variable 'OPENAI_API_KEY'. If the environment variable is + also not set, it raises a warning and returns None. + + :param api_key: The API key for OpenAI. If not provided, the function will try to use the API key from the + environment variable. + :returns: An instance of the OpenAI client configured with the API key, or None if no valid API key is provided. + """ if api_key is None or api_key == "": api_key = os.environ.get("OPENAI_API_KEY") if api_key is None or api_key == "": @@ -31,7 +55,24 @@ def get_openai_client(api_key): return OpenAI(api_key=api_key) -def get_chat_response(client: openai.Client, api_model: str, role: str, prompt: str, context: list = None, as_json: bool= False): +def get_chat_response(client: openai.Client, api_model: str, role: str, prompt: str, context: Optional[list] = None, + as_json: bool = False) -> Optional[str]: + """ + Generates a chat response using the OpenAI API based on the provided parameters. + + This function sends a message to the OpenAI API using the specified client and model. It constructs a message with + a role (system or user) and the provided prompt. It also optionally includes previous chat context. The response + can be returned in JSON format if specified. + + :param client: The OpenAI client to use for making the API call. + :param api_model: The model to use for the chat completion (e.g., 'davinci-codex'). + :param role: The role the AI should assume. + :param prompt: The message prompt to send to the chat model. + :param context: A list of previous chat messages to provide context. Default is None. + :param as_json: A flag to specify if the response should be in JSON format. Default is False. + + :returns: The chat response as a string, or None if there was an error or no response generated. + """ message = [ {"role": "system", "content": role}, @@ -59,29 +100,42 @@ def get_chat_response(client: openai.Client, api_model: str, role: str, prompt: messages=message, ) - - response = response.choices[0] - if response.finish_reason != "stop": if response.finish_reason == "length": gr.Warning( - f"finish_reason: {response.finish_reason}. The maximum number of tokens specified in the request was reached.") - return None, None, None + f"finish_reason: {response.finish_reason}. The maximum number of tokens specified in the request was " + f"reached.") + return None elif response.finish_reason == "content_filter": gr.Warning( - f"finish_reason: {response.finish_reason}. The content was omitted due to a flag from OpenAI's content filters.") - return None, None, None + f"finish_reason: {response.finish_reason}. The content was omitted due to a flag from OpenAI's content " + f"filters.") + return None content = response.message.content if content is None or content == "": gr.Warning("No content was generated.") - return None, None + return None return content -def get_image_response(client: openai.Client, api_model: str, prompt: str, portrait=False): +def get_image_response(client: openai.Client, api_model: str, prompt: str, portrait=False) -> Optional[str]: + """ + Generates an image response using the OpenAI API based on a given prompt and specified parameters. + + This function requests the OpenAI API to generate an image based on the provided text prompt. It allows specification + of the model to use and whether the generated image should be in a portrait format. For 'dall-e-3' model, + it supports high-definition (HD) quality image generation. + + :param client: The OpenAI client to use for making the API call. + :param api_model: The model to use for image generation (e.g., 'dall-e-3'). + :param prompt: The text prompt based on which the image is generated. + :param portrait: A flag to specify if the generated image should be in portrait orientation. Default is False. + + :returns: The URL of the generated image, or None if no image was generated or if there was an error. + """ image_size = "1024x1024" if portrait and api_model == "dall-e-3": image_size = "1024x1792" diff --git a/main.py b/main.py index 43d422b..1ebec69 100755 --- a/main.py +++ b/main.py @@ -1,8 +1,8 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -* -import ui.ui as ui -import utils.font_manager as font_manager +from ui import ui +from utils import font_manager if __name__ == '__main__': # Initialize fonts, and svg file grabber at start diff --git a/processing/image.py b/processing/image.py index 52c9667..9e793ba 100644 --- a/processing/image.py +++ b/processing/image.py @@ -1,3 +1,6 @@ +""" +Module for handling image-related operations in a Gradio interface. +""" import PIL from PIL import ImageFont, ImageDraw, Image, ImageFilter import numpy as np @@ -8,14 +11,29 @@ import os import cv2 from pathlib import Path -import utils.path_handler as path_handler +from utils import path_handler import utils.gradio as gru +from typing import Tuple, Optional, Union, Any, Literal image_folder = "images" default_path = os.path.join(path_handler.get_default_path(), image_folder) -def render_image_output(): +def render_image_output() -> (gr.Image, gr.Textbox, gr.Dropdown, gr.Button): + """ + Creates and returns a set of Gradio interface components for image output. + + This function sets up an image display component along with associated controls for naming the image file, + selecting its file type, and a button for saving the image to disk. It leverages Gradio's UI components to + create an interactive and user-friendly interface for image handling. + + Returns: + - Tuple[gr.Image, gr.Textbox, gr.Dropdown, gr.Button]: A tuple containing Gradio UI components: + - gr.Image: An image display component for showing image output. + - gr.Textbox: A textbox for inputting the name of the image file. + - gr.Dropdown: A dropdown menu for selecting the image file type. + - gr.Button: A button that triggers the action to save the image to disk. + """ image_output = gr.Image(elem_classes=["single-image-output"], label="Image Output", interactive=False, show_download_button=False, type="filepath") @@ -28,7 +46,23 @@ def render_image_output(): return image_output, image_name, image_suffix, save_image_button -def render_text_editor_parameters(name): +def render_text_editor_parameters(name: str) -> ((gr.Dropdown, gr.Dropdown, gr.Number, gr.ColorPicker, gr.Slider), + (gr.Checkbox, gr.ColorPicker, gr.Slider, gr.Number), + (gr.Checkbox, gr.ColorPicker, gr.Slider)): + """ + Creates and returns a set of Gradio interface components for text editor parameters. + + This function sets up a set of Gradio UI components for configuring the text editor parameters. It includes + controls for font family, font style, font size, font color, font opacity, drop shadow, drop shadow color, + drop shadow opacity, drop shadow radius, background, background color, and background opacity. + + :param name: The name of the text editor parameters section. + :return: A tuple of tuples containing Gradio UI components: A tuple containing Gradio UI + components for configuring the font family, font style, font size, font color, and font opacity. A tuple + containing Gradio UI components for configuring the drop shadow, drop shadow color, drop shadow opacity, + and drop shadow radius. A tuple containing Gradio UI components for configuring the background, background + color, and background opacity. + """ with gr.Accordion(label=name): with gr.Column(): font_family, font_style, font_color, font_opacity, font_size = gru.render_font_picker() @@ -49,7 +83,23 @@ def render_text_editor_parameters(name): (background_checkbox, background_color, background_opacity)) -def add_background(image_pil, draw, position, text, font, padding=(15, 5), fill_color=(0, 0, 0, 255), border_radius=0): +def add_background(image_pil: PIL.Image, draw: PIL.ImageDraw, position: Tuple[int, int], text: str, font: PIL.ImageFont, + padding: Tuple[int, int] = (15, 5), fill_color: Tuple[int, int, int, int] = (0, 0, 0, 255), + border_radius: int = 0) -> (Tuple[int, int], Tuple[int, int]): + """ + Adds a background to text on an image. + + :param image_pil: The image to get the size of for text placement. + :param draw: The image draw object to use for drawing the background. + :param position: The position of the text on the image. + :param text: The text to add a background to. + :param font: The font used for the text. + :param padding: The padding to add between the text and the background. + :param fill_color: The RGBA color to fill the background with. + :param border_radius: The radius of the border. + + :return: A tuple containing the position of the text and the size of the background. + """ # Calculate width and height of text with padding bbox = draw.textbbox((0, 0), text, font=font) text_width = bbox[2] - bbox[0] @@ -67,8 +117,19 @@ def add_background(image_pil, draw, position, text, font, padding=(15, 5), fill_ return (x1 + padding[0], y1 + padding[1]), (x2 - x1, y2 - y1) -def add_blurred_shadow(image_pil, text, position, font, shadow_color=(0, 0, 0), shadow_offset=(0, 0), - blur_radius=1): +def add_blurred_shadow(image_pil: PIL.Image, text: str, position: Tuple[int, int], font: PIL.ImageFont, + shadow_color: Tuple[int, int, int, int] = (0, 0, 0), shadow_offset: Tuple[int, int] = (0, 0), + blur_radius: int = 1): + """ + Adds a blurred shadow or highlight to text on an image. + :param image_pil: The image to place the shadow on. + :param text: The text to add a shadow to. + :param position: The position of the text on the image. + :param font: The font used for the text. + :param shadow_color: The RGBA color of the shadow. + :param shadow_offset: The offset of the shadow. + :param blur_radius: The radius of the blur. + """ # Create an image for the shadow shadow_image = Image.new('RGBA', image_pil.size, (0, 0, 0, 0)) shadow_draw = ImageDraw.Draw(shadow_image) @@ -84,7 +145,15 @@ def add_blurred_shadow(image_pil, text, position, font, shadow_color=(0, 0, 0), image_pil.paste(blurred_shadow, (0, 0), blurred_shadow) -def read_image_from_disk(filepath, size=None): +def read_image_from_disk(filepath: str, size: Optional[Tuple[int, int]] = None) \ + -> Union[cv2.Mat, np.ndarray[Any, np.dtype[np.generic]], np.ndarray]: + """ + Reads an image from disk and returns it as a NumPy array for use with PIL. + :param filepath: The path to the image file. + :param size: The size to resize the image to. + + :return: A NumPy array containing the image. + """ img = cv2.imread(filepath, cv2.IMREAD_UNCHANGED) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA) # Convert to RGBA for PIL usage if size: @@ -92,20 +161,27 @@ def read_image_from_disk(filepath, size=None): return img -# This assumes the images are from a gallery, which is why it checks for the 'root' attribute. -def save_images_to_disk(images, image_type, dir=default_path): +def save_images_to_disk(images: gr.data_classes.GradioRootModel, image_type: Literal["png", "jpg", "webp"], + save_dir: str = default_path) -> Optional[str]: + """ + Saves a list of images to disk. + :param images: The list of images to save from Gradio's Gallery. + :param image_type: The type of image to save. + :param save_dir: The directory to save the images to. + :return: The directory the images were saved to. + """ if not images or len(images.root) == 0: gr.Warning("No images to save.") return - base_dir = Path(dir) if Path(dir).is_absolute() else Path("/").joinpath(dir) + base_dir = Path(save_dir) if Path(save_dir).is_absolute() else Path("/").joinpath(save_dir) date = datetime.now().strftime("%m%d%Y") unique_id = uuid.uuid4() - dir = f"{base_dir}/{date}/{unique_id}" + save_dir = f"{base_dir}/{date}/{unique_id}" - if not os.path.exists(dir): - os.makedirs(dir) + if not os.path.exists(save_dir): + os.makedirs(save_dir) for index, image_container in enumerate(images.root): image = image_container.image @@ -114,25 +190,35 @@ def save_images_to_disk(images, image_type, dir=default_path): continue filename = f"{index}.{image_type}" - filepath = os.path.join(dir, filename) + filepath = os.path.join(save_dir, filename) img = cv2.imread(image.path, cv2.IMREAD_UNCHANGED) cv2.imwrite(filepath, img) - gr.Info(f"Saved generated images to {dir}.") - return dir - - -def save_image_to_disk(image_path, name, image_suffix=".png", dir=default_path): + gr.Info(f"Saved generated images to {save_dir}.") + return save_dir + + +def save_image_to_disk(image_path: str, name: Optional[str] = None, + image_suffix: Literal[".png", ".jpg", ".webp"] = ".png", save_dir: str = default_path) \ + -> Optional[str]: + """ + Saves an image to disk. + :param image_path: The path to the image to save. (from a temporary directory from Gradio) + :param name: The name of the image file. If not provided, a generated name will be used. + :param image_suffix: The suffix of the image file denoting its type. + :param save_dir: The directory to save the image to. + :return: The directory the image was saved to. + """ if image_path is None: gr.Warning("No image to save.") - return + return None - base_dir = Path(dir) if Path(dir).is_absolute() else Path("/").joinpath(dir) + base_dir = Path(save_dir) if Path(save_dir).is_absolute() else Path("/").joinpath(save_dir) date = datetime.now().strftime("%m%d%Y") unique_id = uuid.uuid4() - dir = f"{base_dir}/{date}/{unique_id}" + save_dir = f"{base_dir}/{date}/{unique_id}" if name is None or name == "": unique_id = uuid.uuid4() @@ -142,21 +228,41 @@ def save_image_to_disk(image_path, name, image_suffix=".png", dir=default_path): name = Path(name).stem name = f"{name}{image_suffix}" - if not os.path.exists(dir): - os.makedirs(dir) + if not os.path.exists(save_dir): + os.makedirs(save_dir) - filepath = os.path.join(dir, name) + filepath = os.path.join(save_dir, name) img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED) cv2.imwrite(filepath, img) - gr.Info(f"Saved generated image to {dir}.") - return dir + gr.Info(f"Saved generated image to {save_dir}.") + return save_dir # Function to add text to an image with custom font, size, and wrapping -def add_text(image, text, position, font_path, font_size, font_color=(255, 255, 255, 255), shadow_color=(255, 255, 255), - shadow_radius=None, max_width=None, show_background=False, show_shadow=False, - background_color=(0, 0, 0, 255), x_center=False): +def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[int, int], font_path: str, + font_size: int, font_color: Tuple[int, int, int, int] = (255, 255, 255, 255), + shadow_color: Tuple[int, int, int, int] = (255, 255, 255, 255), + shadow_radius: Optional[int] = None, max_width: Optional[int] = None, show_background: bool = False, + show_shadow: bool = False, background_color: Tuple[int, int, int, int] = (0, 0, 0, 255), + x_center: bool = False) -> (np.ndarray, Tuple[int, int]): + """ + Adds text to an image with custom font, size, and wrapping. + :param image: The image to add text to. + :param text: The text to add to the image. + :param position: The (x, y) position of the text on the image. + :param font_path: The path to the font to use. + :param font_size: The size of the font. + :param font_color: The color of the font. + :param shadow_color: The color of the shadow. + :param shadow_radius: The radius of the shadow. + :param max_width: The maximum width of the text before wrapping. + :param show_background: Whether to show a background behind the text. + :param show_shadow: Whether to show a shadow behind the text. + :param background_color: The color of the background. + :param x_center: Whether to center the text on the x-axis. This ignores the positional x parameter. + :return: A tuple containing the image with text added and the size of the text block. + """ if not isinstance(position, tuple): raise TypeError("Position must be a 2-tuple.", type(position)) diff --git a/processing/video.py b/processing/video.py index b972727..96f8663 100644 --- a/processing/video.py +++ b/processing/video.py @@ -1,16 +1,31 @@ +""" +Module for handling video-related operations in a Gradio interface. +""" import uuid from datetime import datetime -import gradio as gr from pathlib import Path import os +import gradio as gr from moviepy.editor import VideoFileClip -import utils.path_handler as path_handler +from utils import path_handler +from typing import Optional, Literal video_folder = "videos" default_path = os.path.join(path_handler.get_default_path(), video_folder) -def render_video_output(): +def render_video_output() -> (gr.Video, gr.Textbox, gr.Dropdown, gr.Button): + """ + Creates and returns a set of Gradio interface components for video output. + + This function sets up a video display component along with associated controls for naming the video file, + selecting its file type, and a button for saving the video to disk. It leverages Gradio's UI components to + create an interactive and user-friendly interface for video handling. + + :returns: A tuple containing the following Gradio UI components: A video display component for showing video output, + a textbox for inputting the name of the video file, a dropdown menu for selecting the video file type, and a + button that triggers the action to save the video to disk. + """ video_output = gr.Video(elem_classes=["video-output"], label="Video Output", interactive=False) with gr.Row(): video_name = gr.Textbox(label="Name", lines=1, max_lines=1, scale=2) @@ -20,14 +35,28 @@ def render_video_output(): return video_output, video_name, video_suffix, save_video_button -def save_video_to_disk(video, name, video_suffix=".mp4", dir=default_path): - if not video: +def save_video_to_disk(video_path: str, name: Optional[str] = None, video_suffix: Literal[".mp4", ".mov"] = ".mp4", + save_dir: str = default_path) -> None: + """ + Saves a video file to the specified directory with a given name and file suffix. + + This function handles saving a video file to disk. It constructs a file path using the provided directory, + current date, and a unique name or the specified name. It supports saving in either .mp4 or .mov format. + If no name is provided, it generates a unique identifier for the file name. The function creates the necessary + directory structure if it does not exist and then saves the video using moviepy. + + :param video_path: The path to the video file to be saved. + :param name: The desired name for the saved video file. If not provided, a unique name is generated. + :param video_suffix: The file extension for the video. Defaults to ".mp4". + :param save_dir: The directory where the video will be saved. Defaults to the default path defined globally. + """ + if not video_path or video_path == "": gr.Warning("No video to save.") return - base_dir = Path(dir) if Path(dir).is_absolute() else Path("/").joinpath(dir) + base_dir = Path(save_dir) if Path(save_dir).is_absolute() else Path("/").joinpath(save_dir) date = datetime.now().strftime("%m%d%Y") - dir = f"{base_dir}/{date}" + save_dir = f"{base_dir}/{date}" if name is None or name == "": unique_id = uuid.uuid4() @@ -37,12 +66,12 @@ def save_video_to_disk(video, name, video_suffix=".mp4", dir=default_path): name = Path(name).stem name = f"{name}{video_suffix}" - video_clip = VideoFileClip(video) + video_clip = VideoFileClip(video_path) - if not os.path.exists(dir): - os.makedirs(dir) + if not os.path.exists(save_dir): + os.makedirs(save_dir) - video_fqn = os.path.join(dir, name) + video_fqn = os.path.join(save_dir, name) video_clip.write_videofile(video_fqn, codec="libx264", fps=video_clip.fps) gr.Info(f"Saved video to {video_fqn}.") diff --git a/ui/listicles/utils.py b/ui/listicles/utils.py index 5b75969..dbea344 100644 --- a/ui/listicles/utils.py +++ b/ui/listicles/utils.py @@ -188,7 +188,8 @@ def generate_listicle(api_key, api_text_model, api_image_model, number_of_items, if association is not None and association != "": # Add association field if provided json_format += ", association: " json_format += "}" - message = f"Format the listicle into JSON. For the items, store as a list named 'items' with the content format: {json_format}." + message = (f"Format the listicle into JSON. For the items, store as a list named 'items' with the content " + f"format: {json_format}.") if rating_type is not None and rating_type != "": message += (f"Include a top-level field `rating_type: ` with what the rating " f"represents.") diff --git a/ui/music/interface.py b/ui/music/interface.py index 0057eed..3bac1a5 100644 --- a/ui/music/interface.py +++ b/ui/music/interface.py @@ -11,7 +11,8 @@ def render_music_section(): with gr.Tab("Generate Cover"): send_cover_to_process_button, send_cover_to_video_button, generated_image_output_path = render_generate_cover() with gr.Tab("Add Text To Image"): - send_processed_cover_to_video_button, processed_image_input, processed_image_output_path = render_process_cover() + send_processed_cover_to_video_button, processed_image_input, processed_image_output_path = \ + render_process_cover() with gr.Tab("Create Music Video"): music_video_cover_image = render_music_video_creation() @@ -111,7 +112,8 @@ def render_music_video_creation(): generate_audio_visualizer_button = gr.Checkbox(value=False, label="Generate Audio Visualizer", interactive=True) with gr.Group() as audio_visualizer_group: - audio_visualizer_color, audio_visualizer_opacity = gru.render_color_opacity_picker("Audio Visualizer") + audio_visualizer_color, audio_visualizer_opacity = \ + gru.render_color_opacity_picker("Audio Visualizer") with gr.Group(): with gr.Row(): audio_visualizer_num_rows = gr.Number(value=90, label="Number of Rows", diff --git a/ui/music/utils.py b/ui/music/utils.py index 63e6fa8..9690e59 100644 --- a/ui/music/utils.py +++ b/ui/music/utils.py @@ -37,7 +37,7 @@ def analyze_audio(audio, target_fps): cached_visualizer_background = None -def draw_visualizer(canvas, frequency_data, base_size=1, max_size=7, color=(255, 255, 255, 255), dot_count=(90, 65), # the more dots, the more drawings, meaning slower. +def draw_visualizer(canvas, frequency_data, base_size=1, max_size=7, color=(255, 255, 255, 255), dot_count=(90, 65), alias_scale=1, custom_drawing=None): global cached_visualizer_dot_positions, cached_visualizer_background width, height = canvas.size[0] * alias_scale, canvas.size[1] * alias_scale @@ -91,7 +91,8 @@ def draw_visualizer(canvas, frequency_data, base_size=1, max_size=7, color=(255, # avg_loudness = loudness_values.get(column, -80) < if anything breaks, do this # Scale the loudness to the dot size - scaled_loudness = (avg_loudness - min_loudness) / (max_loudness - min_loudness) if max_loudness != min_loudness else 0 + scaled_loudness = (avg_loudness - min_loudness) / (max_loudness - min_loudness) \ + if max_loudness != min_loudness else 0 dot_size = base_size + scaled_loudness * (max_size - base_size) dot_size = min(max(dot_size, base_size), max_size) * alias_scale @@ -101,9 +102,13 @@ def draw_visualizer(canvas, frequency_data, base_size=1, max_size=7, color=(255, if custom_drawing is not None: custom_drawing = custom_drawing.resize((int(dot_size), int(dot_size)), Image.LANCZOS) - large_canvas.paste(custom_drawing, (int(pos_x - dot_size / 2), int(pos_y - dot_size / 2)), custom_drawing) + large_canvas.paste(custom_drawing, + (int(pos_x - dot_size / 2), int(pos_y - dot_size / 2)), + custom_drawing) else: - large_draw.ellipse([(pos_x - dot_size / 2, pos_y - dot_size / 2), (pos_x + dot_size / 2, pos_y + dot_size / 2)], fill=color, outline=color) + large_draw.ellipse([ + (pos_x - dot_size / 2, pos_y - dot_size / 2), (pos_x + dot_size / 2, pos_y + dot_size / 2) + ], fill=color, outline=color) canvas.paste(large_canvas.resize(canvas.size, Image.LANCZOS)) @@ -194,25 +199,34 @@ def create_music_video( song_pos = (20, int(height * 0.925)) text_canvas, (_, song_height) = image_processing.add_text(text_canvas, song, song_pos, - font_families[song_font_type][song_font_style], - font_size=song_font_size, - font_color=image_utils.get_rgba(song_font_color, song_font_opacity), - show_shadow=song_shadow_enabled, shadow_radius=song_shadow_radius, - shadow_color=image_utils.get_rgba(song_shadow_color, - song_shadow_opacity), - show_background=song_background_enabled, - background_color=image_utils.get_rgba(song_background_color, - song_background_opacity)) + font_families[song_font_type][song_font_style], + font_size=song_font_size, + font_color=image_utils.get_rgba(song_font_color, + song_font_opacity), + show_shadow=song_shadow_enabled, + shadow_radius=song_shadow_radius, + shadow_color=image_utils.get_rgba(song_shadow_color, + song_shadow_opacity), + show_background=song_background_enabled, + background_color=image_utils.get_rgba( + song_background_color, + song_background_opacity)) artist_pos = (song_pos[0], song_pos[1] - song_height - 5) text_canvas, (_, artist_height) = image_processing.add_text(text_canvas, artist, artist_pos, - font_families[artist_font_type][artist_font_style], - font_size=artist_font_size, - font_color=image_utils.get_rgba(artist_font_color, artist_font_opacity), - show_shadow=artist_shadow_enabled, - shadow_radius=artist_shadow_radius, - shadow_color=image_utils.get_rgba(artist_shadow_color, artist_shadow_opacity), - show_background=artist_background_enabled, - background_color=image_utils.get_rgba(artist_background_color, artist_background_opacity)) + font_families[artist_font_type][artist_font_style], + font_size=artist_font_size, + font_color=image_utils.get_rgba(artist_font_color, + artist_font_opacity), + show_shadow=artist_shadow_enabled, + shadow_radius=artist_shadow_radius, + shadow_color=image_utils.get_rgba(artist_shadow_color, + artist_shadow_opacity + ), + show_background=artist_background_enabled, + background_color=image_utils.get_rgba( + artist_background_color, + artist_background_opacity) + ) text_np = np.array(text_canvas) text_clip = ImageClip(text_np).set_duration(audio_clip.duration) @@ -240,7 +254,7 @@ def create_music_video( preset="medium", verbose=False, # add: logger=None logger=None, - ) + ) return temp_video_path diff --git a/ui/ui.py b/ui/ui.py index f3c82fc..f6093e1 100644 --- a/ui/ui.py +++ b/ui/ui.py @@ -1,5 +1,8 @@ -import gradio as gr +""" +This file contains the main UI code that runs the TrendGenie web app. +""" import os +import gradio as gr import ui.listicles.interface as listicle_interface import ui.music.interface as music_interface diff --git a/utils/gradio.py b/utils/gradio.py index f1f2b31..2add475 100644 --- a/utils/gradio.py +++ b/utils/gradio.py @@ -25,7 +25,8 @@ def render_font_picker(default_font_size=55): with gr.Row(): font_families_list = list(font_families.keys()) initial_font_family = font_families_list[0] if len(font_families_list) > 0 else "" - font_family = gr.Dropdown(font_families_list, value=initial_font_family, label="Font Family", interactive=True) + font_family = gr.Dropdown(font_families_list, value=initial_font_family, label="Font Family", + interactive=True) font_styles_list = list(font_families[initial_font_family].keys() if initial_font_family else []) initial_font_style = font_styles_list[0] if len(font_styles_list) > 0 else "" font_style = gr.Dropdown(font_styles_list, value=initial_font_style, label="Font Style", interactive=True) From 89071fb6bedfa87d2fc136335504afec1f8cd007 Mon Sep 17 00:00:00 2001 From: Fabian Gonzalez Date: Thu, 11 Jan 2024 00:40:21 -0500 Subject: [PATCH 03/13] Some more linter fixes --- api/chatgpt.py | 6 +++--- ui/components/openai.py | 12 +++++++++++- utils/gradio.py | 34 +++++++++++++++++++++++++++++----- utils/image.py | 13 ++++++++++++- utils/path_handler.py | 11 +++++++++-- 5 files changed, 64 insertions(+), 12 deletions(-) diff --git a/api/chatgpt.py b/api/chatgpt.py index 8aeac86..363ff16 100644 --- a/api/chatgpt.py +++ b/api/chatgpt.py @@ -125,9 +125,9 @@ def get_image_response(client: openai.Client, api_model: str, prompt: str, portr """ Generates an image response using the OpenAI API based on a given prompt and specified parameters. - This function requests the OpenAI API to generate an image based on the provided text prompt. It allows specification - of the model to use and whether the generated image should be in a portrait format. For 'dall-e-3' model, - it supports high-definition (HD) quality image generation. + This function requests the OpenAI API to generate an image based on the provided text prompt. It allows + specification of the model to use and whether the generated image should be in a portrait format. For 'dall-e-3' + model, it supports high-definition (HD) quality image generation. :param client: The OpenAI client to use for making the API call. :param api_model: The model to use for image generation (e.g., 'dall-e-3'). diff --git a/ui/components/openai.py b/ui/components/openai.py index c8ac4b8..4fec6bf 100644 --- a/ui/components/openai.py +++ b/ui/components/openai.py @@ -1,7 +1,17 @@ +""" +This module contains ui components for the OpenAI API. +""" import gradio as gr -def render_openai_setup(show_text_model=True, show_image_model=True): +def render_openai_setup(show_text_model: bool = True, show_image_model: bool = True) \ + -> (gr.Textbox, gr.Dropdown, gr.Dropdown): + """ + Renders the OpenAI API setup components. + :param show_text_model: Whether to show the text model dropdown. + :param show_image_model: Whether to show the image model dropdown. + :return: A tuple containing the API key, text model, and image model components. + """ api_text_model = None api_image_model = None with gr.Row(): diff --git a/utils/gradio.py b/utils/gradio.py index 2add475..57a4018 100644 --- a/utils/gradio.py +++ b/utils/gradio.py @@ -1,8 +1,16 @@ +""" +This module contains utility functions for rendering widely-used Gradio components. +""" import gradio as gr -import utils.font_manager as font_manager +from utils import font_manager -def render_color_opacity_picker(default_name_label="Font"): +def render_color_opacity_picker(default_name_label: str = "Font") -> tuple[gr.ColorPicker, gr.Slider]: + """ + Renders a color picker with the appropriate styling. + :param default_name_label: The default name label to use. + :return: A tuple containing the color and opacity components. + """ with gr.Group(): with gr.Row(): color = gr.ColorPicker(label=f"{default_name_label} Color", scale=1, interactive=True) @@ -11,7 +19,13 @@ def render_color_opacity_picker(default_name_label="Font"): return color, opacity -def bind_checkbox_to_visibility(checkbox, group): +def bind_checkbox_to_visibility(checkbox: gr.Checkbox, group: gr.Group): + """ + Binds a checkbox to the visibility of a group. When the checkbox is checked, the group is visible, and when the + checkbox is unchecked, the group is hidden. + :param checkbox: The Checkbox component to bind. + :param group: The Group component to bind. + """ checkbox.change( lambda state: gr.Group(visible=state), inputs=checkbox, @@ -19,7 +33,13 @@ def bind_checkbox_to_visibility(checkbox, group): ) -def render_font_picker(default_font_size=55): +def render_font_picker(default_font_size: int = 55) \ + -> tuple[gr.Dropdown, gr.Dropdown, gr.ColorPicker, gr.Slider, gr.Number]: + """ + Renders a font picker with the appropriate styling. + :param default_font_size: The default font size to use. + :return: A tuple containing the font family, font style, font color, font opacity, and font size components. + """ font_families = font_manager.get_fonts() with gr.Group(): with gr.Row(): @@ -45,5 +65,9 @@ def update_font_styles(selected_font_family): return font_family, font_style, font_color, font_opacity, font_size -def render_tool_description(description): +def render_tool_description(description: str): + """ + Renders a description for a tool with the appropriate styling. + :param description: The description to render. + """ gr.Markdown(description, elem_classes=["tool-description"]) diff --git a/utils/image.py b/utils/image.py index a195c75..af47f1d 100644 --- a/utils/image.py +++ b/utils/image.py @@ -1,5 +1,16 @@ +""" +This file contains functions for image processing. +""" +from typing import Tuple, Union -def get_rgba(color, opacity): + +def get_rgba(color: Union[str, Tuple[int, int, int]], opacity: int) -> Tuple[int, int, int, int]: + """ + Gets the RGBA value for a given color and opacity. + :param color: The color to use. Either a hex string or a tuple of RGB values. + :param opacity: The opacity to use, from 0 to 100. + :return: The RGBA value. + """ # Opacity should be 0 -> 0, 100 -> 255 alpha = int(opacity * 255 / 100) diff --git a/utils/path_handler.py b/utils/path_handler.py index c79358c..706db3f 100644 --- a/utils/path_handler.py +++ b/utils/path_handler.py @@ -1,13 +1,20 @@ +""" +This module contains functions for handling paths. +""" import os from pathlib import Path default_path = None -def get_default_path(): + +def get_default_path() -> str: + """ + Gets the default path for saving files, which is the user's home directory under a folder called "trendgenie". + :return: + """ global default_path if default_path is None: homepath = Path.home() default_path = os.path.join(homepath, "trendgenie") return default_path - From cb6b712486fd8731596c666c2f873050a8c632e7 Mon Sep 17 00:00:00 2001 From: Fabian Gonzalez Date: Thu, 11 Jan 2024 00:41:11 -0500 Subject: [PATCH 04/13] Remove 3.8 from pylinter --- .github/workflows/pylint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 5730b67..7ea94c9 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -5,7 +5,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.9", "3.10"] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} From 87ac5bbe19ba3bc13630d019c4d429180df3fd2b Mon Sep 17 00:00:00 2001 From: Fabian Gonzalez Date: Fri, 12 Jan 2024 23:38:21 -0500 Subject: [PATCH 05/13] Some more minor pylint fixes --- api/chatgpt.py | 4 +-- main.py | 5 ++- processing/image.py | 34 +++++++++--------- processing/video.py | 6 ++-- ui/listicles/utils.py | 3 +- ui/music/interface.py | 34 +++++++++++++++--- ui/music/utils.py | 83 +++++++++++++++++++++++++++++-------------- ui/ui.py | 2 +- utils/font_manager.py | 82 ++++++++++++++++++++++++++---------------- utils/gradio.py | 4 +-- utils/path_handler.py | 12 +++---- 11 files changed, 174 insertions(+), 95 deletions(-) diff --git a/api/chatgpt.py b/api/chatgpt.py index 363ff16..debbbc0 100644 --- a/api/chatgpt.py +++ b/api/chatgpt.py @@ -1,11 +1,11 @@ """ This module provides utility functions for interacting with the OpenAI API and Gradio interfaces. """ +import os +from typing import Optional import openai from openai import OpenAI -import os import gradio as gr -from typing import Optional def url_to_gradio_image_name(url: str) -> str: diff --git a/main.py b/main.py index 1ebec69..d314cbe 100755 --- a/main.py +++ b/main.py @@ -1,4 +1,7 @@ -#!/usr/bin/env python3 +""" +This is the main file for the web app. It launches the web app and initializes the font manager and inflect engine. +""" +# !/usr/bin/env python3 # -*- coding: utf-8 -* from ui import ui diff --git a/processing/image.py b/processing/image.py index 9e793ba..995e89f 100644 --- a/processing/image.py +++ b/processing/image.py @@ -1,22 +1,22 @@ """ Module for handling image-related operations in a Gradio interface. """ -import PIL -from PIL import ImageFont, ImageDraw, Image, ImageFilter -import numpy as np import textwrap -import gradio as gr import uuid from datetime import datetime import os -import cv2 from pathlib import Path +from typing import Tuple, Optional, Union, Any, Literal +import PIL +from PIL import ImageFont, ImageDraw, Image, ImageFilter +import numpy as np +import gradio as gr +import cv2 from utils import path_handler import utils.gradio as gru -from typing import Tuple, Optional, Union, Any, Literal -image_folder = "images" -default_path = os.path.join(path_handler.get_default_path(), image_folder) +IMAGE_FOLDER = "images" +default_path = os.path.join(path_handler.get_default_path(), IMAGE_FOLDER) def render_image_output() -> (gr.Image, gr.Textbox, gr.Dropdown, gr.Button): @@ -104,17 +104,17 @@ def add_background(image_pil: PIL.Image, draw: PIL.ImageDraw, position: Tuple[in bbox = draw.textbbox((0, 0), text, font=font) text_width = bbox[2] - bbox[0] text_height = bbox[3] - bbox[1] - x1 = position[0] - padding[0] # left - y1 = position[1] - padding[1] # top - x2 = x1 + text_width + 2 * padding[0] # right - y2 = y1 + text_height + 2 * padding[1] # bottom + left = position[0] - padding[0] + top = position[1] - padding[1] # top + right = left + text_width + 2 * padding[0] + bottom = top + text_height + 2 * padding[1] rect_img = Image.new('RGBA', image_pil.size, (0, 0, 0, 0)) rect_draw = ImageDraw.Draw(rect_img) - rect_draw.rounded_rectangle([x1, y1, x2, y2], fill=fill_color, radius=border_radius) + rect_draw.rounded_rectangle([left, top, right, bottom], fill=fill_color, radius=border_radius) image_pil.paste(rect_img, (0, 0), rect_img) - return (x1 + padding[0], y1 + padding[1]), (x2 - x1, y2 - y1) + return (left + padding[0], top + padding[1]), (right - left, bottom - top) def add_blurred_shadow(image_pil: PIL.Image, text: str, position: Tuple[int, int], font: PIL.ImageFont, @@ -172,7 +172,7 @@ def save_images_to_disk(images: gr.data_classes.GradioRootModel, image_type: Lit """ if not images or len(images.root) == 0: gr.Warning("No images to save.") - return + return None base_dir = Path(save_dir) if Path(save_dir).is_absolute() else Path("/").joinpath(save_dir) @@ -279,7 +279,7 @@ def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[i font = ImageFont.truetype(font_path, font_size) draw = ImageDraw.Draw(txt_layer) - img_width, img_height = image_pil.size + img_width, _ = image_pil.size if max_width: # Prepare for text wrapping if max_width is provided wrapped_text = textwrap.fill(text, width=max_width) @@ -292,7 +292,7 @@ def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[i y_offset = 0 max_line_width = 0 # Keep track of the widest line total_height = 0 # Accumulate total height of text block - for i, line in enumerate(lines): + for line in lines: bbox = draw.textbbox((0, 0), line, font=font) line_width = bbox[2] - bbox[0] line_height = bbox[3] - bbox[1] diff --git a/processing/video.py b/processing/video.py index 96f8663..3d19e59 100644 --- a/processing/video.py +++ b/processing/video.py @@ -5,13 +5,13 @@ from datetime import datetime from pathlib import Path import os +from typing import Optional, Literal import gradio as gr from moviepy.editor import VideoFileClip from utils import path_handler -from typing import Optional, Literal -video_folder = "videos" -default_path = os.path.join(path_handler.get_default_path(), video_folder) +VIDEO_FOLDER = "videos" +default_path = os.path.join(path_handler.get_default_path(), VIDEO_FOLDER) def render_video_output() -> (gr.Video, gr.Textbox, gr.Dropdown, gr.Button): diff --git a/ui/listicles/utils.py b/ui/listicles/utils.py index dbea344..d43eeb5 100644 --- a/ui/listicles/utils.py +++ b/ui/listicles/utils.py @@ -1,9 +1,8 @@ import gradio as gr import json -import utils.image as image_utils import processing.image as image_processing import os -import utils.font_manager as font_manager +from utils import font_manager, image as image_utils import api.chatgpt as chatgpt_api diff --git a/ui/music/interface.py b/ui/music/interface.py index 3bac1a5..06f187c 100644 --- a/ui/music/interface.py +++ b/ui/music/interface.py @@ -1,12 +1,19 @@ +""" +Tbe interface for the music section of the UI. This is the main piece where we define the Gradio interface components. +""" import gradio as gr import utils.gradio as gru -from ui.music.utils import * +from ui.music.utils import generate_cover_image, process, create_music_video import processing.video as video_processing import processing.image as image_processing import ui.components.openai as openai_components -def render_music_section(): +def render_music_section() -> None: + """ + Renders the music cover video section of the UI. + :return: None + """ gru.render_tool_description("Create a cover and a simple video for your music!") with gr.Tab("Generate Cover"): send_cover_to_process_button, send_cover_to_video_button, generated_image_output_path = render_generate_cover() @@ -27,7 +34,13 @@ def render_music_section(): outputs=[music_video_cover_image]) -def render_generate_cover(): +def render_generate_cover() -> (gr.Button, gr.Button, gr.Image): + """ + Renders the cover generation interface component for the music cover creation section. + :return: A tuple containing the following Gradio UI components: A button for generating a cover image, a button for + sending the generated cover image to the "Add Text to Image" section, and an image display component for + displaying the generated cover image. + """ api_key, _, api_image_model = openai_components.render_openai_setup(show_text_model=False) with gr.Row(equal_height=False): with gr.Group(): @@ -49,7 +62,14 @@ def render_generate_cover(): return send_to_process_button, send_to_create_video_button, image_output -def render_process_cover(): +def render_process_cover() -> (gr.Button, gr.Image, gr.Image): + """ + Renders the cover processing interface component for the music cover creation section. This is where we add text to + the cover image. + :return: A tuple containing the following Gradio UI components: A button for processing a cover image, an image + display component for displaying the cover image before processing, and an image display component for + displaying the cover image after processing. + """ with gr.Column(): gr.Markdown("## Input") with gr.Group(): @@ -86,7 +106,11 @@ def render_process_cover(): return send_to_create_video_button, input_image, image_output -def render_music_video_creation(): +def render_music_video_creation() -> gr.Image: + """ + Renders the music video creation interface component for the music cover creation section. + :return: An image display component for displaying the cover image. + """ gr.Markdown("## Input") with gr.Row(equal_height=False): # Sadly we can't use RGBA here due to JPEG images not supporting alpha and breaking. It would be nice if Gradio diff --git a/ui/music/utils.py b/ui/music/utils.py index 9690e59..2909657 100644 --- a/ui/music/utils.py +++ b/ui/music/utils.py @@ -1,26 +1,36 @@ +""" +This file contains the functions and utilities used to generate the music video and cover image. +""" import math -from PIL import Image, ImageFilter, ImageDraw, ImageFont +from typing import Dict, List, Optional +from PIL import Image, ImageFilter, ImageDraw from moviepy.editor import AudioFileClip, ImageClip, CompositeVideoClip, concatenate_videoclips import multiprocessing -import utils.font_manager as font_manager -import utils.image as image_utils +from utils import font_manager, image as image_utils import numpy as np import tempfile -import api.chatgpt as chatgpt_api -import processing.image as image_processing +from api import chatgpt as chatgpt_api +from processing import image as image_processing import librosa -import cProfile -def analyze_audio(audio, target_fps): - y, sr = librosa.load(audio, sr=None) +def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]], np.ndarray): + """ + Analyzes the audio file at the given path and returns the frequency loudness and times relating to the frequency + loudness. + :param audio_path: The path to the audio file to analyze. + :param target_fps: The target frames per second for the audio visualizer. This is used to downsample the audio so + that it aligns with the video. + :return: A tuple containing the frequency loudness and times relating to the frequency loudness. + """ + y, sr = librosa.load(audio_path, sr=None) D = librosa.stft(y) D_db = librosa.amplitude_to_db(np.abs(D), ref=np.max) frequencies = librosa.fft_frequencies(sr=sr) times = librosa.frames_to_time(np.arange(D_db.shape[1]), sr=sr) - audio_clip = AudioFileClip(audio) + audio_clip = AudioFileClip(audio_path) audio_frames_per_video_frame = len(times) / (target_fps * audio_clip.duration) sample_indices = np.arange(0, len(times), audio_frames_per_video_frame) @@ -33,27 +43,43 @@ def analyze_audio(audio, target_fps): return downsampled_frequency_loudness, downsampled_times -cached_visualizer_dot_positions = None -cached_visualizer_background = None - - -def draw_visualizer(canvas, frequency_data, base_size=1, max_size=7, color=(255, 255, 255, 255), dot_count=(90, 65), - alias_scale=1, custom_drawing=None): - global cached_visualizer_dot_positions, cached_visualizer_background +CACHED_VISUALIZER_DOT_POSITIONS = None +CACHED_VISUALIZER_BACKGROUND = None + + +def draw_visualizer(canvas: Image, frequency_data: Dict[float, float], base_size: int = 1, max_size: int = 7, + color: tuple[int, int, int, int] = (255, 255, 255, 255), dot_count: tuple[int, int] = (90, 65), + alias_scale: int = 1, custom_drawing: Optional[Image] = None) -> None: + """ + Draws a visualizer on the given canvas frame using the frequency data. + :param canvas: The canvas to draw the visualizer on. + :param frequency_data: The frequency data to use for drawing the visualizer. + :param base_size: The base size of the dots (silent). + :param max_size: The maximum size of the dots (loudest portion). + :param color: The color of the dots. + :param dot_count: The number of dots to use in the visualizer. The first value is the number of rows, and the second + value is the number of columns. + :param alias_scale: The alias scale to use for the visualizer. This is used to increase the resolution of the + visualizer. + :param custom_drawing: The custom drawing to use for the visualizer. This is used to replace the dots with a custom + image. + :return: + """ + global CACHED_VISUALIZER_DOT_POSITIONS, CACHED_VISUALIZER_BACKGROUND width, height = canvas.size[0] * alias_scale, canvas.size[1] * alias_scale - if cached_visualizer_background is None: - cached_visualizer_background = Image.new("RGBA", (width, height)) - large_canvas = cached_visualizer_background.copy() + if CACHED_VISUALIZER_BACKGROUND is None: + CACHED_VISUALIZER_BACKGROUND = Image.new("RGBA", (width, height)) + large_canvas = CACHED_VISUALIZER_BACKGROUND.copy() large_draw = ImageDraw.Draw(large_canvas) # In case the dot count changes, recalculate the dot positions - if cached_visualizer_dot_positions is None or len(cached_visualizer_dot_positions) != dot_count[0] * dot_count[1]: + if CACHED_VISUALIZER_DOT_POSITIONS is None or len(CACHED_VISUALIZER_DOT_POSITIONS) != dot_count[0] * dot_count[1]: # Calculate and store dot positions x_positions = (width / dot_count[0]) * np.arange(dot_count[0]) + (width / dot_count[0] / 2) y_positions = (height / dot_count[1]) * np.arange(dot_count[1]) + (height / dot_count[1] / 2) grid_x, grid_y = np.meshgrid(x_positions, y_positions) - cached_visualizer_dot_positions = [(grid_x[y, x], grid_y[y, x]) for x in range(dot_count[0]) for y in + CACHED_VISUALIZER_DOT_POSITIONS = [(grid_x[y, x], grid_y[y, x]) for x in range(dot_count[0]) for y in range(dot_count[1])] # Precompute log frequencies @@ -83,7 +109,7 @@ def draw_visualizer(canvas, frequency_data, base_size=1, max_size=7, color=(255, loudness_values[x] = avg_loudness cached_dot_sizes = {} - for i, (pos_x, pos_y) in enumerate(cached_visualizer_dot_positions): + for i, (pos_x, pos_y) in enumerate(CACHED_VISUALIZER_DOT_POSITIONS): column = i // dot_count[1] # Ensure the correct column is computed if column not in cached_dot_sizes: @@ -175,8 +201,6 @@ def create_music_video( if time_point > audio_clip.duration: break frame = frame_cache.copy() - # cProfile.runctx("draw_visualizer(frame, frequency_loudness[i], color=audio_visualizer_color_opacity)", - # locals=locals(), globals=globals()) draw_visualizer(frame, frequency_loudness[i], color=audio_visualizer_color_opacity, custom_drawing=custom_drawing, base_size=audio_visualizer_min_size, max_size=audio_visualizer_max_size, dot_count=(audio_visualizer_num_rows, @@ -252,14 +276,21 @@ def create_music_video( temp_audiofile=temp_audio_path, threads=threads, preset="medium", - verbose=False, # add: logger=None + verbose=False, logger=None, ) return temp_video_path -def generate_cover_image(api_key, api_model, prompt): +def generate_cover_image(api_key: str, api_model: str, prompt: str) -> Optional[str]: + """ + Generates a cover image using the OpenAI API based on a given prompt and specified parameters. + :param api_key: The API key to use for the OpenAI API. + :param api_model: The model to use for image generation (e.g., 'dall-e-3'). + :param prompt: The text prompt based on which the image is generated. + :return: The URL of the generated image, or None if no image was generated or if there was an error. + """ client = chatgpt_api.get_openai_client(api_key) image_url = chatgpt_api.get_image_response(client, api_model, prompt, portrait=False) if image_url is None or image_url == "": diff --git a/ui/ui.py b/ui/ui.py index f6093e1..9d52763 100644 --- a/ui/ui.py +++ b/ui/ui.py @@ -8,7 +8,7 @@ # Read the styles.css file and add it to the page. css_file = os.path.join(os.path.dirname(__file__), "styles.css") -with open(css_file, "r") as file: +with open(css_file, "r", encoding="utf-8") as file: css = file.read() with gr.Blocks(theme=gr.themes.Soft(), css=css) as WebApp: diff --git a/utils/font_manager.py b/utils/font_manager.py index bb53315..6114b8d 100644 --- a/utils/font_manager.py +++ b/utils/font_manager.py @@ -1,12 +1,18 @@ +""" +This module manages the fonts and the inflect engine. +""" import glob import os +from typing import Optional, Dict, TypeAlias from fontTools.ttLib import TTFont import inflect -import utils.path_handler as path_handler +from utils import path_handler -font_families = None -p = None -fonts_dirs = [ +NestedDict: TypeAlias = Dict[str, Dict[str, str]] + +FONT_FAMILIES: Optional[NestedDict] = None +P: Optional[inflect.engine] = None +FONTS_DIRS = [ # MacOS "/Library/Fonts", "~/Library/Fonts", "System/Library/Fonts", # Linux @@ -16,31 +22,37 @@ ] -def initialize_inflect(): - global p - if p is None: - p = inflect.engine() +def initialize_inflect() -> inflect.engine: + """ + Initializes the inflect engine. + :return: The inflect engine. + """ + global P + if P is None: + P = inflect.engine() - return p + return P -def initialize_fonts(): - global font_families - if font_families is None: - font_families = font_families +def initialize_fonts() -> NestedDict: + """ + Initializes the font families from the global FONTS_DIRS. + :return: The font families and their paths. They are called by map[font_family][font_style]. + """ + global FONT_FAMILIES font_files = [] # Add TrendGenie fonts trendgenie_fonts_dir = os.path.join(path_handler.get_default_path(), "fonts") - fonts_dirs.append(trendgenie_fonts_dir) - for fonts_dir in fonts_dirs: + FONTS_DIRS.append(trendgenie_fonts_dir) + for fonts_dir in FONTS_DIRS: fonts_dir = os.path.expanduser(fonts_dir) if not os.path.exists(fonts_dir): continue font_files += glob.glob(os.path.join(fonts_dir, "**/*.ttf"), recursive=True) font_files += glob.glob(os.path.join(fonts_dir, "**/*.otf"), recursive=True) - font_families = {} + FONT_FAMILIES = {} for font_file in font_files: font = TTFont(font_file) name = font['name'] @@ -52,22 +64,32 @@ def initialize_fonts(): elif record.nameID == 2 and b'\000' in record.string: style_name = record.string.decode('utf-16-be').rstrip('\0') if family_name and style_name: - if family_name not in font_families: - font_families[family_name] = {} - font_families[family_name][style_name] = font_file + if family_name not in FONT_FAMILIES: + FONT_FAMILIES[family_name] = {} + FONT_FAMILIES[family_name][style_name] = font_file + + return FONT_FAMILIES + - return font_families +def get_fonts() -> NestedDict: + """ + Gets the font families. If they are not initialized, it initializes them. + :return: The font families and their paths. They are called by map[font_family][font_style]. + """ + global FONT_FAMILIES + if FONT_FAMILIES is None: + FONT_FAMILIES = initialize_fonts() -def get_fonts(): - global font_families - if font_families is None: - font_families = initialize_fonts() + return FONT_FAMILIES - return font_families -def get_inflect(): - global p - if p is None: - p = initialize_inflect() +def get_inflect() -> inflect.engine: + """ + Gets the inflect engine. If it is not initialized, it initializes it. + :return: The inflect engine. + """ + global P + if P is None: + P = initialize_inflect() - return p \ No newline at end of file + return P diff --git a/utils/gradio.py b/utils/gradio.py index 57a4018..f086830 100644 --- a/utils/gradio.py +++ b/utils/gradio.py @@ -54,8 +54,8 @@ def render_font_picker(default_font_size: int = 55) \ def update_font_styles(selected_font_family): if selected_font_family is None or selected_font_family == "": return [] - font_syles = list(font_families[selected_font_family].keys()) - return gr.Dropdown(font_syles, value=font_syles[0], label="Font Style") + font_styles = list(font_families[selected_font_family].keys()) + return gr.Dropdown(font_styles, value=font_styles[0], label="Font Style") font_family.change(update_font_styles, inputs=[font_family], outputs=[font_style]) with gr.Group(): diff --git a/utils/path_handler.py b/utils/path_handler.py index 706db3f..58251cb 100644 --- a/utils/path_handler.py +++ b/utils/path_handler.py @@ -4,17 +4,17 @@ import os from pathlib import Path -default_path = None +DEFAULT_PATH = None def get_default_path() -> str: """ Gets the default path for saving files, which is the user's home directory under a folder called "trendgenie". - :return: + :return: The default path. """ - global default_path - if default_path is None: + global DEFAULT_PATH + if DEFAULT_PATH is None: homepath = Path.home() - default_path = os.path.join(homepath, "trendgenie") + DEFAULT_PATH = os.path.join(homepath, "trendgenie") - return default_path + return DEFAULT_PATH From 66b37ec03cdea8d427cce1ba181372ee594cf67c Mon Sep 17 00:00:00 2001 From: Fabian Gonzalez Date: Sun, 14 Jan 2024 20:49:48 -0500 Subject: [PATCH 06/13] Add type checks to resolve dynamic members --- .pylintrc | 5 ++++- utils/font_manager.py | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.pylintrc b/.pylintrc index d047969..59c5a5c 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,2 +1,5 @@ [MAIN] -max-line-length=120 \ No newline at end of file +max-line-length=120 + +[TYPECHECK] +generated-members=gradio.components.dropdown.*,gradio.components.button.*,cv2.* \ No newline at end of file diff --git a/utils/font_manager.py b/utils/font_manager.py index 6114b8d..4074ec1 100644 --- a/utils/font_manager.py +++ b/utils/font_manager.py @@ -3,12 +3,12 @@ """ import glob import os -from typing import Optional, Dict, TypeAlias +from typing import Optional, Dict from fontTools.ttLib import TTFont import inflect from utils import path_handler -NestedDict: TypeAlias = Dict[str, Dict[str, str]] +NestedDict = Dict[str, Dict[str, str]] FONT_FAMILIES: Optional[NestedDict] = None P: Optional[inflect.engine] = None From 2d4cd614b36e4d4fa4c0e266ca584553c366ad8d Mon Sep 17 00:00:00 2001 From: Fabian Gonzalez Date: Sun, 14 Jan 2024 21:11:28 -0500 Subject: [PATCH 07/13] Some more linter fixes - image.py --- processing/image.py | 125 ++++++++++++++++++++++++++++++++------------ 1 file changed, 92 insertions(+), 33 deletions(-) diff --git a/processing/image.py b/processing/image.py index ec6acd8..03f1f6a 100644 --- a/processing/image.py +++ b/processing/image.py @@ -1,3 +1,4 @@ +from typing import Optional, Literal from PIL import ImageFont, ImageDraw, Image, ImageFilter import numpy as np import textwrap @@ -14,7 +15,11 @@ default_path = os.path.join(path_handler.get_default_path(), image_folder) -def render_image_output(): +def render_image_output() -> (gr.Image, gr.Textbox, gr.Dropdown, gr.Button): + """ + Renders the image output components. + :return: A tuple containing the image output, image name, image suffix, and save image button components. + """ image_output = gr.Image(elem_classes=["single-image-output"], label="Image Output", interactive=False, show_download_button=False, type="filepath") @@ -27,7 +32,14 @@ def render_image_output(): return image_output, image_name, image_suffix, save_image_button -def render_text_editor_parameters(name): +def render_text_editor_parameters(name: str) -> ((gr.Dropdown, gr.Dropdown, gr.ColorPicker, gr.Slider, gr.Number), + (gr.Checkbox, gr.ColorPicker, gr.Slider, gr.Number), + (gr.Checkbox, gr.ColorPicker, gr.Slider)): + """ + Renders the text editor parameters. + :param name: The name of the text editor parameters. This is used as the label for the accordion. + :return: A tuple containing the font, drop shadow, and background components. + """ with gr.Accordion(label=name): with gr.Column(): font_family, font_style, font_color, font_opacity, font_size = gru.render_font_picker() @@ -48,26 +60,51 @@ def render_text_editor_parameters(name): (background_checkbox, background_color, background_opacity)) -def add_background(image_pil, draw, position, text, font, padding=(15, 5), fill_color=(0, 0, 0, 255), border_radius=0): +def add_background(image_pil: Image, draw: ImageDraw, position: tuple[int, int], text: str, font: ImageFont, + padding: tuple[int, int] = (15, 5), fill_color: tuple[int, int, int, int] = (0, 0, 0, 255), + border_radius: int = 0) -> (tuple[int, int], tuple[int, int]): + """ + Adds a background to the text. + :param image_pil: The PIL image to add the background to. + :param draw: The PIL draw object to use. + :param position: The position of the text on the image. + :param text: The text to add the background to. + :param font: The font to use. + :param padding: The padding between the font and background. + :param fill_color: The color of the background. + :param border_radius: The border radius of the background. + :return: A tuple containing the position of the text and the size of the background. + """ # Calculate width and height of text with padding bbox = draw.textbbox((0, 0), text, font=font) text_width = bbox[2] - bbox[0] text_height = bbox[3] - bbox[1] - x1 = position[0] - padding[0] # left - y1 = position[1] - padding[1] # top - x2 = x1 + text_width + 2 * padding[0] # right - y2 = y1 + text_height + 2 * padding[1] # bottom + left = position[0] - padding[0] + top = position[1] - padding[1] + right = left + text_width + 2 * padding[0] + bottom = top + text_height + 2 * padding[1] rect_img = Image.new('RGBA', image_pil.size, (0, 0, 0, 0)) rect_draw = ImageDraw.Draw(rect_img) - rect_draw.rounded_rectangle([x1, y1, x2, y2], fill=fill_color, radius=border_radius) + rect_draw.rounded_rectangle([left, top, right, bottom], fill=fill_color, radius=border_radius) image_pil.paste(rect_img, (0, 0), rect_img) - return (x1 + padding[0], y1 + padding[1]), (x2 - x1, y2 - y1) - - -def add_blurred_shadow(image_pil, text, position, font, shadow_color=(0, 0, 0), shadow_offset=(0, 0), - blur_radius=1): + return (left + padding[0], top + padding[1]), (right - left, bottom - top) + + +def add_blurred_shadow(image_pil: Image, text: str, position: tuple[float, float], font: ImageFont, + shadow_color: tuple[int, int, int, int] = (0, 0, 0, 0), shadow_offset: tuple[int, int] = (0, 0), + blur_radius: int = 1) -> None: + """ + Adds a blurred shadow (or highlight) to the text. + :param image_pil: The PIL image to add the shadow to. + :param text: The text to add the shadow to. + :param position: The position of the text on the image. + :param font: The font to use. + :param shadow_color: The color of the shadow. + :param shadow_offset: The offset of the shadow. + :param blur_radius: The blur radius of the shadow. + """ # Create an image for the shadow shadow_image = Image.new('RGBA', image_pil.size, (0, 0, 0, 0)) shadow_draw = ImageDraw.Draw(shadow_image) @@ -83,7 +120,13 @@ def add_blurred_shadow(image_pil, text, position, font, shadow_color=(0, 0, 0), image_pil.paste(blurred_shadow, (0, 0), blurred_shadow) -def read_image_from_disk(filepath, size=None): +def read_image_from_disk(filepath: str, size: Optional[cv2.typing.Size] = None) -> np.ndarray: + """ + Reads and returns an image from disk using CV2. + :param filepath: The path to the image. + :param size: The size to resize the image to. + :return: The image as a NumPy array. + """ img = cv2.imread(filepath, cv2.IMREAD_UNCHANGED) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA) # Convert to RGBA for PIL usage if size: @@ -91,20 +134,27 @@ def read_image_from_disk(filepath, size=None): return img -# This assumes the images are from a gallery, which is why it checks for the 'root' attribute. -def save_images_to_disk(images, image_type, dir=default_path): +def save_images_to_disk(images: gr.data_classes.RootModel, image_type: gr.Dropdown, save_dir: str = default_path) -> \ + Optional[str]: + """ + Saves a list of images to disk. + :param images: The list of images to save. Imported from a gradio.Gallery component. + :param image_type: The type of image to save. + :param save_dir: The directory to save the images to. + :return: The directory the images were saved to. None if there was an error. + """ if not images or len(images.root) == 0: gr.Warning("No images to save.") - return + return None - base_dir = Path(dir) if Path(dir).is_absolute() else Path("/").joinpath(dir) + base_dir = Path(save_dir) if Path(save_dir).is_absolute() else Path("/").joinpath(save_dir) date = datetime.now().strftime("%m%d%Y") unique_id = uuid.uuid4() - dir = f"{base_dir}/{date}/{unique_id}" + save_dir = f"{base_dir}/{date}/{unique_id}" - if not os.path.exists(dir): - os.makedirs(dir) + if not os.path.exists(save_dir): + os.makedirs(save_dir) for index, image_container in enumerate(images.root): image = image_container.image @@ -113,25 +163,34 @@ def save_images_to_disk(images, image_type, dir=default_path): continue filename = f"{index}.{image_type}" - filepath = os.path.join(dir, filename) + filepath = os.path.join(save_dir, filename) img = cv2.imread(image.path, cv2.IMREAD_UNCHANGED) cv2.imwrite(filepath, img) - gr.Info(f"Saved generated images to {dir}.") - return dir + gr.Info(f"Saved generated images to {save_dir}.") + return save_dir -def save_image_to_disk(image_path, name, image_suffix=".png", dir=default_path): +def save_image_to_disk(image_path: str, name: Optional[str] = None, save_dir: str = default_path, + image_suffix: Literal[".png", ".jpg", ".webp"] = ".png") -> Optional[str]: + """ + Saves an image to disk. + :param image_path: The path to the temporary image. + :param name: The name to give the saved image. + :param save_dir: The directory to save the image to. + :param image_suffix: The suffix to give the saved image. + :return: The directory the image was saved to. None if there was an error. + """ if image_path is None: gr.Warning("No image to save.") - return + return None - base_dir = Path(dir) if Path(dir).is_absolute() else Path("/").joinpath(dir) + base_dir = Path(save_dir) if Path(save_dir).is_absolute() else Path("/").joinpath(save_dir) date = datetime.now().strftime("%m%d%Y") unique_id = uuid.uuid4() - dir = f"{base_dir}/{date}/{unique_id}" + save_dir = f"{base_dir}/{date}/{unique_id}" if name is None or name == "": unique_id = uuid.uuid4() @@ -141,15 +200,15 @@ def save_image_to_disk(image_path, name, image_suffix=".png", dir=default_path): name = Path(name).stem name = f"{name}{image_suffix}" - if not os.path.exists(dir): - os.makedirs(dir) + if not os.path.exists(save_dir): + os.makedirs(save_dir) - filepath = os.path.join(dir, name) + filepath = os.path.join(save_dir, name) img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED) cv2.imwrite(filepath, img) - gr.Info(f"Saved generated image to {dir}.") - return dir + gr.Info(f"Saved generated image to {save_dir}.") + return save_dir # Function to add text to an image with custom font, size, and wrapping From fd7b6c383d74eeecb9b960223cd0ffa1161f371f Mon Sep 17 00:00:00 2001 From: Fabian Gonzalez Date: Sun, 14 Jan 2024 22:03:26 -0500 Subject: [PATCH 08/13] More linting - ui/music/utils.py --- .github/workflows/pylint.yml | 2 +- api/chatgpt.py | 25 ++--- processing/image.py | 51 +++++++--- ui/music/interface.py | 5 +- ui/music/utils.py | 189 ++++++++++++++++++++++++++--------- utils/image.py | 29 +++++- 6 files changed, 224 insertions(+), 77 deletions(-) diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 7ea94c9..2d4b561 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -5,7 +5,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9", "3.10"] + python-version: ["3.10", "3.11"] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} diff --git a/api/chatgpt.py b/api/chatgpt.py index debbbc0..e80d050 100644 --- a/api/chatgpt.py +++ b/api/chatgpt.py @@ -80,8 +80,8 @@ def get_chat_response(client: openai.Client, api_model: str, role: str, prompt: # Give the model previous chat context if context is not None and len(context) > 0: - for c in context: - message.append(c) + for curr_context in context: + message.append(curr_context) message.append({ "role": "user", @@ -102,16 +102,17 @@ def get_chat_response(client: openai.Client, api_model: str, role: str, prompt: response = response.choices[0] if response.finish_reason != "stop": - if response.finish_reason == "length": - gr.Warning( - f"finish_reason: {response.finish_reason}. The maximum number of tokens specified in the request was " - f"reached.") - return None - elif response.finish_reason == "content_filter": - gr.Warning( - f"finish_reason: {response.finish_reason}. The content was omitted due to a flag from OpenAI's content " - f"filters.") - return None + match response.finish_reason: + case "length": + gr.Warning( + f"finish_reason: {response.finish_reason}. The maximum number of tokens specified in the request " + f"was reached.") + return None + case "content_filter": + gr.Warning( + f"finish_reason: {response.finish_reason}. The content was omitted due to a flag from OpenAI's " + f"content filters.") + return None content = response.message.content if content is None or content == "": diff --git a/processing/image.py b/processing/image.py index 03f1f6a..fd2cec9 100644 --- a/processing/image.py +++ b/processing/image.py @@ -1,18 +1,21 @@ -from typing import Optional, Literal -from PIL import ImageFont, ImageDraw, Image, ImageFilter -import numpy as np +""" +This module contains functions for processing images. +""" import textwrap -import gradio as gr import uuid -from datetime import datetime import os -import cv2 from pathlib import Path -import utils.path_handler as path_handler +from datetime import datetime +from typing import Optional, Literal, Union, Tuple +from PIL import ImageFont, ImageDraw, Image, ImageFilter +import numpy as np +import gradio as gr +import cv2 +from utils import path_handler import utils.gradio as gru -image_folder = "images" -default_path = os.path.join(path_handler.get_default_path(), image_folder) +IMAGE_FOLDER = "images" +default_path = os.path.join(path_handler.get_default_path(), IMAGE_FOLDER) def render_image_output() -> (gr.Image, gr.Textbox, gr.Dropdown, gr.Button): @@ -212,9 +215,29 @@ def save_image_to_disk(image_path: str, name: Optional[str] = None, save_dir: st # Function to add text to an image with custom font, size, and wrapping -def add_text(image, text, position, font_path, font_size, font_color=(255, 255, 255, 255), shadow_color=(255, 255, 255), - shadow_radius=None, max_width=None, show_background=False, show_shadow=False, - background_color=(0, 0, 0, 255), x_center=False): +def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[int, int], font_path: str, + font_size: int, font_color: Tuple[int, int, int, int] = (255, 255, 255, 255), + shadow_color: Tuple[int, int, int, int] = (255, 255, 255, 255), + shadow_radius: Optional[int] = None, max_width: Optional[int] = None, show_background: bool = False, + show_shadow: bool = False, background_color: Tuple[int, int, int, int] = (0, 0, 0, 255), + x_center: bool = False) -> (np.ndarray, Tuple[int, int]): + """ + Adds text to an image with custom font, size, and wrapping. + :param image: The image to add text to. + :param text: The text to add to the image. + :param position: The (x, y) position of the text on the image. + :param font_path: The path to the font to use. + :param font_size: The size of the font. + :param font_color: The color of the font. + :param shadow_color: The color of the shadow. + :param shadow_radius: The radius of the shadow. + :param max_width: The maximum width of the text before wrapping. + :param show_background: Whether to show a background behind the text. + :param show_shadow: Whether to show a shadow behind the text. + :param background_color: The color of the background. + :param x_center: Whether to center the text on the x-axis. This ignores the positional x parameter. + :return: A tuple containing the image with text added and the size of the text block. + """ if not isinstance(position, tuple): raise TypeError("Position must be a 2-tuple.", type(position)) @@ -231,7 +254,7 @@ def add_text(image, text, position, font_path, font_size, font_color=(255, 255, font = ImageFont.truetype(font_path, font_size) draw = ImageDraw.Draw(txt_layer) - img_width, img_height = image_pil.size + img_width, _ = image_pil.size if max_width: # Prepare for text wrapping if max_width is provided wrapped_text = textwrap.fill(text, width=max_width) @@ -244,7 +267,7 @@ def add_text(image, text, position, font_path, font_size, font_color=(255, 255, y_offset = 0 max_line_width = 0 # Keep track of the widest line total_height = 0 # Accumulate total height of text block - for i, line in enumerate(lines): + for line in lines: bbox = draw.textbbox((0, 0), line, font=font) line_width = bbox[2] - bbox[0] line_height = bbox[3] - bbox[1] diff --git a/ui/music/interface.py b/ui/music/interface.py index 1f13624..3361495 100644 --- a/ui/music/interface.py +++ b/ui/music/interface.py @@ -73,8 +73,9 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image): with gr.Column(): gr.Markdown("## Input") with gr.Group(): - input_image = gr.Image(sources=["upload"], label="Cover Image (png)", type="filepath", show_download_button=False, - scale=2, elem_classes=["single-image-input"], image_mode="RGBA") + input_image = gr.Image(sources=["upload"], label="Cover Image (png)", type="filepath", + show_download_button=False, scale=2, elem_classes=["single-image-input"], + image_mode="RGBA") with gr.Row(equal_height=False): with gr.Group(): diff --git a/ui/music/utils.py b/ui/music/utils.py index 4ff294c..7caebca 100644 --- a/ui/music/utils.py +++ b/ui/music/utils.py @@ -1,29 +1,41 @@ +""" +This file contains the functions and utilities used to generate the music video and cover image. +""" import os import subprocess import re import time import cv2 +from typing import List, Dict, Optional from moviepy.editor import AudioFileClip -import utils.font_manager as font_manager +from utils import font_manager import utils.image as image_utils import numpy as np import tempfile -import api.chatgpt as chatgpt_api -import processing.image as image_processing +from api import chatgpt as chatgpt_api +from processing import image as image_processing import librosa from utils import progress, visualizer import cProfile -def analyze_audio(audio, target_fps): - y, sr = librosa.load(audio, sr=None) - D = librosa.stft(y) - D_db = librosa.amplitude_to_db(np.abs(D), ref=np.max) +def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]], np.ndarray): + """ + Analyzes the audio file at the given path and returns the frequency loudness and times relating to the frequency + loudness. + :param audio_path: The path to the audio file to analyze. + :param target_fps: The target frames per second for the audio visualizer. This is used to downsample the audio so + that it aligns with the video. + :return: A tuple containing the frequency loudness and times relating to the frequency loudness. + """ + y, sr = librosa.load(audio_path, sr=None) + d = librosa.stft(y) + d_db = librosa.amplitude_to_db(np.abs(d), ref=np.max) frequencies = librosa.fft_frequencies(sr=sr) - times = librosa.frames_to_time(np.arange(D_db.shape[1]), sr=sr) + times = librosa.frames_to_time(np.arange(d_db.shape[1]), sr=sr) - audio_clip = AudioFileClip(audio) + audio_clip = AudioFileClip(audio_path) audio_frames_per_video_frame = len(times) / (target_fps * audio_clip.duration) sample_indices = np.arange(0, len(times), audio_frames_per_video_frame) @@ -31,36 +43,82 @@ def analyze_audio(audio, target_fps): sample_indices = sample_indices[sample_indices < len(times)] downsampled_times = times[sample_indices] - downsampled_frequency_loudness = [dict(zip(frequencies, D_db[:, i])) for i in sample_indices] + downsampled_frequency_loudness = [dict(zip(frequencies, d_db[:, i])) for i in sample_indices] return downsampled_frequency_loudness, downsampled_times def create_music_video( - image, audio, fps, - artist, artist_font_type, artist_font_style, artist_font_size, artist_font_color, artist_font_opacity, - artist_shadow_enabled, artist_shadow_color, artist_shadow_opacity, artist_shadow_radius, - artist_background_enabled, artist_background_color, artist_background_opacity, - song, song_font_type, song_font_style, song_font_size, song_font_color, song_font_opacity, song_shadow_enabled, - song_shadow_color, song_shadow_opacity, song_shadow_radius, song_background_enabled, song_background_color, - song_background_opacity, - background_color=(0, 0, 0), background_opacity=66, generate_audio_visualizer=False, - audio_visualizer_color=(255, 255, 255), audio_visualizer_opacity=100, visualizer_drawing=None, - audio_visualizer_num_rows=90, audio_visualizer_num_columns=65, audio_visualizer_min_size=1, - audio_visualizer_max_size=7): - if image is None: + image_path: str, audio_path: str, fps: int, + artist: str, artist_font_type: str, artist_font_style: str, artist_font_size: int, + artist_font_color: tuple[int, int, int], artist_font_opacity: int, artist_shadow_enabled: bool, + artist_shadow_color: tuple[int, int, int], artist_shadow_opacity: int, artist_shadow_radius: int, + artist_background_enabled: bool, artist_background_color: tuple[int, int, int], artist_background_opacity: int, + song: str, song_font_type: str, song_font_style: str, song_font_size: int, + song_font_color: tuple[int, int, int], song_font_opacity: int, song_shadow_enabled: bool, + song_shadow_color: tuple[int, int, int], song_shadow_opacity: int, song_shadow_radius: int, + song_background_enabled: bool, song_background_color: tuple[int, int, int], song_background_opacity: int, + background_color: tuple[int, int, int] = (0, 0, 0), background_opacity: int = 66, + generate_audio_visualizer: bool = False, audio_visualizer_color: tuple[int, int, int] =(255, 255, 255), + audio_visualizer_opacity: int = 100, visualizer_drawing: Optional[str] = None, + audio_visualizer_num_rows: int = 90, audio_visualizer_num_columns: int = 65, audio_visualizer_min_size: int = 1, + audio_visualizer_max_size: int = 7) -> Optional[str]: + """ + Creates a music video using the given parameters. + :param image_path: The path to the image to use as the cover + background for the video. + :param audio_path: The path to the audio file to use for the video. + :param fps: The frames per second to use for the video. + :param artist: The artist name to add to the video. + :param artist_font_type: The font family to use for the artist name. + :param artist_font_style: The font style to use for the artist name. + :param artist_font_size: The font size to use for the artist name. + :param artist_font_color: The font color to use for the artist name. + :param artist_font_opacity: The font opacity to use for the artist name. + :param artist_shadow_enabled: Whether to show a shadow for the artist name. + :param artist_shadow_color: The shadow color to use for the artist name. + :param artist_shadow_opacity: The shadow opacity to use for the artist name. + :param artist_shadow_radius: The shadow radius to use for the artist name. + :param artist_background_enabled: Whether to show a background for the artist name. + :param artist_background_color: The background color to use for the artist name. + :param artist_background_opacity: The background opacity to use for the artist name. + :param song: The song name to add to the video. + :param song_font_type: The font family to use for the song name. + :param song_font_style: The font style to use for the song name. + :param song_font_size: The font size to use for the song name. + :param song_font_color: The font color to use for the song name. + :param song_font_opacity: The font opacity to use for the song name. + :param song_shadow_enabled: Whether to show a shadow for the song name. + :param song_shadow_color: The shadow color to use for the song name. + :param song_shadow_opacity: The shadow opacity to use for the song name. + :param song_shadow_radius: The shadow radius to use for the song name. + :param song_background_enabled: Whether to show a background for the song name. + :param song_background_color: The background color to use for the song name. + :param song_background_opacity: The background opacity to use for the song name. + :param background_color: The background color to use for the video. + :param background_opacity: The background opacity to use for the video. + :param generate_audio_visualizer: Whether to generate an audio visualizer for the video. + :param audio_visualizer_color: The color to use for the audio visualizer. + :param audio_visualizer_opacity: The opacity to use for the audio visualizer. + :param visualizer_drawing: The path to the image to use for the audio visualizer. If None, uses a circle. + :param audio_visualizer_num_rows: The number of rows to use for the audio visualizer's drawings. + :param audio_visualizer_num_columns: The number of columns to use for the audio visualizer's drawings. + :param audio_visualizer_min_size: The minimum size to use for the audio visualizer's drawings (silence). + :param audio_visualizer_max_size: The maximum size to use for the audio visualizer's drawings (peak loudness). + :return: The path to the generated video, or None if there was an error. + """ + if image_path is None: print("No cover image for the video.") - return - if audio is None: + return None + if audio_path is None: print("No audio to add to the video.") - return + return None # Could probably expand to 4k, but unnecessary for this type of music video # Maybe in a future iteration it could be worth it width, height = 1920, 1080 # Set up cover - cover = cv2.imread(image, cv2.IMREAD_UNCHANGED) + cover = cv2.imread(image_path, cv2.IMREAD_UNCHANGED) if cover.shape[2] == 3: cover = cv2.cvtColor(cover, cv2.COLOR_BGR2RGBA) else: @@ -84,10 +142,10 @@ def create_music_video( canvas[cover_pos[1]:cover_pos[1] + new_height, cover_pos[0]:cover_pos[0] + new_width] = cover # Load song / audio - audio_clip = AudioFileClip(audio) + audio_clip = AudioFileClip(audio_path) # Add video background - background = cv2.imread(image) + background = cv2.imread(image_path) background = cv2.resize(background, (width, height)) background = cv2.GaussianBlur(background, (49, 49), 0) if background.shape[2] == 3: @@ -117,7 +175,7 @@ def create_music_video( if generate_audio_visualizer: print("Generating audio visualizer...") - frequency_loudness, times = analyze_audio(audio, fps) + frequency_loudness, times = analyze_audio(audio_path, fps) frame_cache = np.zeros((height, width, 4), dtype=np.uint8) total_iterations = len(times) @@ -161,16 +219,16 @@ def create_music_video( song_background_opacity)) artist_pos = (song_pos[0], song_pos[1] - song_height - 5) text_canvas, (_, _) = image_processing.add_text(text_canvas, artist, artist_pos, - font_families[artist_font_type][artist_font_style], - font_size=artist_font_size, - font_color=image_utils.get_rgba(artist_font_color, - artist_font_opacity), - show_shadow=artist_shadow_enabled, - shadow_radius=artist_shadow_radius, - shadow_color=image_utils.get_rgba(artist_shadow_color, - artist_shadow_opacity), - show_background=artist_background_enabled, - background_color=image_utils.get_rgba( + font_families[artist_font_type][artist_font_style], + font_size=artist_font_size, + font_color=image_utils.get_rgba(artist_font_color, + artist_font_opacity), + show_shadow=artist_shadow_enabled, + shadow_radius=artist_shadow_radius, + shadow_color=image_utils.get_rgba(artist_shadow_color, + artist_shadow_opacity), + show_background=artist_background_enabled, + background_color=image_utils.get_rgba( artist_background_color, artist_background_opacity)) text_np = np.array(text_canvas) @@ -216,7 +274,7 @@ def create_music_video( ffmpeg_commands.extend([ "-framerate", str(fps), "-i", temp_canvas_image_path, - "-i", audio, + "-i", audio_path, "-filter_complex", filter_complex, "-map", audio_input_map, "-c:v", "libx264", @@ -268,7 +326,14 @@ def create_music_video( return temp_final_video_path -def generate_cover_image(api_key, api_model, prompt): +def generate_cover_image(api_key: str, api_model: str, prompt: str) -> Optional[str]: + """ + Generates a cover image using the OpenAI API based on a given prompt and specified parameters. + :param api_key: The API key to use for the OpenAI API. + :param api_model: The model to use for image generation (e.g., 'dall-e-3'). + :param prompt: The text prompt based on which the image is generated. + :return: The URL of the generated image, or None if no image was generated or if there was an error. + """ client = chatgpt_api.get_openai_client(api_key) image_url = chatgpt_api.get_image_response(client, api_model, prompt, portrait=False) if image_url is None or image_url == "": @@ -277,12 +342,46 @@ def generate_cover_image(api_key, api_model, prompt): return chatgpt_api.url_to_gradio_image_name(image_url) -def process(image_path, artist, song, - af_family, af_style, afs, afc, afo, ase, asc, aso, asr, abe, abc, abo, - sf_family, sf_style, sfs, sfc, sfo, sse, ssc, sso, ssr, sbe, sbc, sbo): +def process(image_path: str, artist: str, song: str, + af_family: str, af_style: str, afs: int, afc: tuple[int, int, int], afo: int, ase: bool, + asc: tuple[int, int, int], aso: int, asr: Optional[int], abe: bool, abc: tuple[int, int, int], abo: int, + sf_family: str, sf_style: str, sfs: int, sfc: tuple[int, int, int], sfo: int, sse: bool, + ssc: tuple[int, int, int], sso: int, ssr: Optional[int], sbe: bool, sbc: tuple[int, int, int], sbo: int) \ + -> Optional[np.ndarray]: + """ + Processes the image at the given path (by adding the requested text) and returns the processed image. + :param image_path: The path to the image to process. + :param artist: The artist name to add to the image. + :param song: The song name to add to the image. + :param af_family: The font family to use for the artist name. + :param af_style: The font style to use for the artist name. + :param afs: The font size to use for the artist name. + :param afc: The font color to use for the artist name. + :param afo: The font opacity to use for the artist name. + :param ase: Whether to show a shadow for the artist name. + :param asc: The shadow color to use for the artist name. + :param aso: The shadow opacity to use for the artist name. + :param asr: The shadow radius to use for the artist name. + :param abe: Whether to show a background for the artist name. + :param abc: The background color to use for the artist name. + :param abo: The background opacity to use for the artist name. + :param sf_family: The font family to use for the song name. + :param sf_style: The font style to use for the song name. + :param sfs: The font size to use for the song name. + :param sfc: The font color to use for the song name. + :param sfo: The font opacity to use for the song name. + :param sse: Whether to show a shadow for the song name. + :param ssc: The shadow color to use for the song name. + :param sso: The shadow opacity to use for the song name. + :param ssr: The shadow radius to use for the song name. + :param sbe: Whether to show a background for the song name. + :param sbc: The background color to use for the song name. + :param sbo: The background opacity to use for the song name. + :return: The processed image as a numpy array. If there was no image to process, returns None. + """ if image_path is None: print("No image to modify.") - return + return None font_families = font_manager.get_fonts() aff = font_families[af_family][af_style] diff --git a/utils/image.py b/utils/image.py index 1bbe984..0204e57 100644 --- a/utils/image.py +++ b/utils/image.py @@ -1,9 +1,26 @@ -def get_alpha_from_opacity(opacity): +""" +This file contains functions for image processing. +""" +from typing import Tuple, Union + + +def get_alpha_from_opacity(opacity: int) -> int: + """ + Converts an opacity value from 0-100 to 0-255. + :param opacity: The opacity value from 0-100. + :return: The opacity value from 0-255. + """ # Opacity should be 0 -> 0, 100 -> 255 return int(opacity * 255 / 100) -def get_rgba(color, opacity): +def get_rgba(color: Union[str, Tuple[int, int, int]], opacity: int) -> Tuple[int, int, int, int]: + """ + Gets the RGBA value for a given color and opacity. + :param color: The color to use. Either a hex string or a tuple of RGB values. + :param opacity: The opacity to use, from 0 to 100. + :return: The RGBA value. + """ # if color is hex, convert to rgb if not isinstance(color, tuple) and color.startswith("#"): color = color.lstrip("#") @@ -12,7 +29,13 @@ def get_rgba(color, opacity): return color[0], color[1], color[2], get_alpha_from_opacity(opacity) -def get_bgra(color, opacity): +def get_bgra(color: Union[str, Tuple[int, int, int]], opacity: int) -> Tuple[int, int, int, int]: + """ + Gets the BGRA value for a given color and opacity. + :param color: The color to use. Either a hex string or a tuple of BGR values. + :param opacity: The opacity to use, from 0 to 100. + :return: The BGRA value. + """ # if color is hex, convert to rgb if not isinstance(color, tuple) and color.startswith("#"): color = color.lstrip("#") From 17640764821d5f272511af531884a84eb3438bbe Mon Sep 17 00:00:00 2001 From: Fabian Gonzalez Date: Sun, 14 Jan 2024 22:26:12 -0500 Subject: [PATCH 09/13] More linting - 95% done --- ui/listicles/interface.py | 22 ++++++++++++++--- ui/listicles/utils.py | 52 +++++++++++++++++++++++++++++++++------ ui/music/utils.py | 9 +++---- utils/progress.py | 9 ++++--- utils/visualizer.py | 24 ++++++++++++++++-- 5 files changed, 93 insertions(+), 23 deletions(-) diff --git a/ui/listicles/interface.py b/ui/listicles/interface.py index b27d214..664cd72 100644 --- a/ui/listicles/interface.py +++ b/ui/listicles/interface.py @@ -1,12 +1,18 @@ +""" +The interface for the Listicles section of the web app. +""" +import json import gradio as gr import processing.image as image_processing -import json import ui.listicles.utils as listicle_utils import ui.components.openai as openai_components import utils.gradio as gru -def render_listicles_section(): +def render_listicles_section() -> None: + """ + Renders the Listicles section of the web app. + """ gru.render_tool_description("Create images in the style of those 'Your birth month is your ___' TikToks.") with gr.Tab("Generate Artifacts"): send_artifacts_to_batch_button, listicle_image_output, listicle_json_output = render_generate_section() @@ -20,7 +26,11 @@ def render_listicles_section(): ) -def render_batch_section(): +def render_batch_section() -> (gr.File, gr.Code): + """ + Renders the Batch Image Processing section of the web app. + :return: The input images and input json components. + """ with gr.Column(): gr.Markdown("# Input") with gr.Row(equal_height=False): @@ -106,7 +116,11 @@ def set_json(json_file): return input_batch_images, input_batch_json -def render_generate_section(): +def render_generate_section() -> (gr.Button, gr.Gallery, gr.Code): + """ + Renders the Generate Artifacts section of the web app. + :return: The send artifacts to batch button, the listicle image output gallery, and the listicle json output. + """ api_key, api_text_model, api_image_model = openai_components.render_openai_setup() with gr.Row(equal_height=False): with gr.Group(): diff --git a/ui/listicles/utils.py b/ui/listicles/utils.py index d43eeb5..b4c75b3 100644 --- a/ui/listicles/utils.py +++ b/ui/listicles/utils.py @@ -1,7 +1,11 @@ -import gradio as gr +""" +This file contains the functions that are used by the Gradio UI to generate listicles. +""" +import os import json +from typing import Optional +import gradio as gr import processing.image as image_processing -import os from utils import font_manager, image as image_utils import api.chatgpt as chatgpt_api @@ -93,7 +97,12 @@ def process(image_files, json_data, return images -def validate_json(json_file): +def validate_json(json_file: str) -> None: + """ + Validates the JSON file to make sure it has the required fields. + :param json_file: The JSON file to validate. + :return: None + """ if not json_file or len(json_file) == 0: gr.Warning("No JSON in the code block.") return @@ -117,7 +126,13 @@ def validate_json(json_file): gr.Info("JSON is valid!") -def send_artifacts_to_batch(listicle_images, json_data): +def send_artifacts_to_batch(listicle_images: gr.data_classes.RootModel, json_data: str) -> (list, str): + """ + Sends the artifacts to the batch processing section. + :param listicle_images: The list of images to send. This is a Gradio Gallery. + :param json_data: The JSON data to send. + :return: The list of images and the JSON data sent. + """ if not listicle_images or len(listicle_images.root) == 0: gr.Warning("No images to send.") return @@ -130,10 +145,17 @@ def send_artifacts_to_batch(listicle_images, json_data): return listicle_images, json_data -def save_artifacts(listicle_images, image_type, json_data): +def save_artifacts(listicle_images: gr.data_classes.RootModel, image_type: gr.Dropdown, json_data: str) -> None: + """ + Saves the artifacts to disk. + :param listicle_images: The list of images to save. This is a Gradio Gallery. + :param image_type: The type of image to save. + :param json_data: The JSON data to save. + :return: None + """ if not json_data or len(json_data) == 0: gr.Warning("No JSON data to save.") - return + return None # Save the images save_dir = image_processing.save_images_to_disk(listicle_images, image_type) @@ -148,8 +170,22 @@ def save_artifacts(listicle_images, image_type, json_data): gr.Info(f"Saved generated artifacts to {save_dir}.") -def generate_listicle(api_key, api_text_model, api_image_model, number_of_items, topic, association, - rating_type, details="", generate_artifacts=False): +def generate_listicle(api_key: str, api_text_model: str, api_image_model: str, number_of_items: int, topic: str, + association: str, rating_type: str, details: str = "", generate_artifacts: bool = False) \ + -> (Optional[str], Optional[str], Optional[list[str]]): + """ + Generates a listicle using the OpenAI API. + :param api_key: The OpenAI API key to use. + :param api_text_model: The OpenAI API text model to use (e.g. 'gpt-4'). + :param api_image_model: The OpenAI API image model to use (e.g. 'dall-e-3'). + :param number_of_items: The number of items to generate. + :param topic: The topic of the listicle. + :param association: What each item is associated with. + :param rating_type: What the rating represents. + :param details: Additional details about the listicle you want to generate. + :param generate_artifacts: Whether to generate artifacts (images and JSON) for the listicle. + :return: The listicle content, the listicle JSON, and the listicle images. + """ openai = chatgpt_api.get_openai_client(api_key) if openai is None: gr.Warning("No OpenAI client. Cannot generate listicle.") diff --git a/ui/music/utils.py b/ui/music/utils.py index 7caebca..eb6dd98 100644 --- a/ui/music/utils.py +++ b/ui/music/utils.py @@ -5,18 +5,17 @@ import subprocess import re import time -import cv2 +import tempfile from typing import List, Dict, Optional +import cv2 from moviepy.editor import AudioFileClip +import numpy as np +import librosa from utils import font_manager import utils.image as image_utils -import numpy as np -import tempfile from api import chatgpt as chatgpt_api from processing import image as image_processing -import librosa from utils import progress, visualizer -import cProfile def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]], np.ndarray): diff --git a/utils/progress.py b/utils/progress.py index c5a14b5..52569ba 100644 --- a/utils/progress.py +++ b/utils/progress.py @@ -6,9 +6,11 @@ def print_progress_bar(current_iteration: int, total_iterations: int, bar_length start_time: Optional[float] = None, end: str = ''): progress_percentage = (current_iteration / total_iterations) * 100 completed_length = int(bar_length * current_iteration // total_iterations) - bar = 'â–ˆ' * completed_length + 'â–‘' * (bar_length - completed_length) + progress_bar = 'â–ˆ' * completed_length + 'â–‘' * (bar_length - completed_length) elapsed_time = None + estimated_remaining_time = None + iterations_per_sec = None if start_time is not None: elapsed_time = time.time() - start_time if current_iteration > 0: @@ -19,7 +21,6 @@ def print_progress_bar(current_iteration: int, total_iterations: int, bar_length estimated_remaining_time = None time_string = '' - if estimated_remaining_time is not None: + if estimated_remaining_time is not None and iterations_per_sec is not None: time_string = f'[{elapsed_time:.2f}s/{estimated_remaining_time:.2f}s, {iterations_per_sec:.2f}it/s]' - print(f'\r{progress_percentage:3.0f}%|{bar}| {current_iteration}/{total_iterations} {time_string}', end=end, flush=True) - + print(f'\r{progress_percentage:3.0f}%|{progress_bar}| {current_iteration}/{total_iterations} {time_string}', end=end, flush=True) diff --git a/utils/visualizer.py b/utils/visualizer.py index 8739a70..e32e7bf 100644 --- a/utils/visualizer.py +++ b/utils/visualizer.py @@ -1,8 +1,16 @@ +""" +This module defines the Visualizer class, which is used to draw the visualizer on the canvas. +""" +from typing import Dict, Optional import numpy as np import cv2 class Visualizer: + """ + This class is used to draw the visualizer on the canvas. + Will be replaced with a more general solution in the future to allow for more customization. + """ def __init__(self, base_size, max_size, color, dot_count, width, height): self.base_size = base_size self.max_size = max_size @@ -13,7 +21,11 @@ def __init__(self, base_size, max_size, color, dot_count, width, height): self.cached_dot_positions = None self.cached_resized_drawing = {} - def initialize_static_values(self): + def initialize_static_values(self: "Visualizer") -> None: + """ + Initializes static values for the visualizer. + :return: None. + """ # Calculate and store dot positions x_positions = (self.width / self.dot_count[0]) * np.arange(self.dot_count[0]) + ( self.width / self.dot_count[0] / 2) @@ -23,7 +35,15 @@ def initialize_static_values(self): self.cached_dot_positions = [(grid_x[y, x], grid_y[y, x]) for x in range(self.dot_count[0]) for y in range(self.dot_count[1])] - def draw_visualizer(self, canvas, frequency_data, custom_drawing=None): + def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict[float, float], + custom_drawing: Optional[np.ndarray] = None) -> None: + """ + Draws the visualizer on the canvas (a single frame). + :param canvas: The canvas to draw on. + :param frequency_data: The frequency data to use for drawing which correlates to the loudness + frequency. + :param custom_drawing: A custom drawing to use instead of the default circle. + :return: None. + """ # Calculate and store dot positions dot_count_x = self.dot_count[0] dot_count_y = self.dot_count[1] From 2a3fa120d33a30cdbe516afb422b32f873f1c72a Mon Sep 17 00:00:00 2001 From: Fabian Gonzalez Date: Mon, 15 Jan 2024 00:11:01 -0500 Subject: [PATCH 10/13] More linting - 98% done --- .pylintrc | 4 ++ processing/image.py | 23 +++++----- ui/listicles/interface.py | 2 +- ui/listicles/utils.py | 92 ++++++++++++++++++++++++++++++++------- ui/music/utils.py | 28 ++++++------ utils/dataclasses.py | 28 ++++++++++++ utils/font_manager.py | 8 ++-- utils/image.py | 7 +-- utils/path_handler.py | 2 +- utils/progress.py | 18 +++++++- utils/visualizer.py | 2 +- 11 files changed, 161 insertions(+), 53 deletions(-) create mode 100644 utils/dataclasses.py diff --git a/.pylintrc b/.pylintrc index 59c5a5c..2083c87 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,5 +1,9 @@ [MAIN] max-line-length=120 +max-attributes=10 +# Currently, this is added because gradio Inputs don't support passing tuples/dataclasses/etc. as arguments, meaning I +# can't shorten some methods that take a lot of arguments. +disable=too-many-arguments [TYPECHECK] generated-members=gradio.components.dropdown.*,gradio.components.button.*,cv2.* \ No newline at end of file diff --git a/processing/image.py b/processing/image.py index fd2cec9..9fe7628 100644 --- a/processing/image.py +++ b/processing/image.py @@ -11,10 +11,10 @@ import numpy as np import gradio as gr import cv2 -from utils import path_handler -import utils.gradio as gru +from utils import gradio as gru, path_handler, dataclasses IMAGE_FOLDER = "images" + default_path = os.path.join(path_handler.get_default_path(), IMAGE_FOLDER) @@ -82,17 +82,19 @@ def add_background(image_pil: Image, draw: ImageDraw, position: tuple[int, int], bbox = draw.textbbox((0, 0), text, font=font) text_width = bbox[2] - bbox[0] text_height = bbox[3] - bbox[1] - left = position[0] - padding[0] - top = position[1] - padding[1] - right = left + text_width + 2 * padding[0] - bottom = top + text_height + 2 * padding[1] + rect_pos = dataclasses.FourEdges(left=position[0] - padding[0], + top=position[1] - padding[1], + right=(position[0] - padding[0]) + text_width + 2 * padding[0], + bottom=(position[1] - padding[1]) + text_height + 2 * padding[1]) rect_img = Image.new('RGBA', image_pil.size, (0, 0, 0, 0)) rect_draw = ImageDraw.Draw(rect_img) - rect_draw.rounded_rectangle([left, top, right, bottom], fill=fill_color, radius=border_radius) + rect_draw.rounded_rectangle([rect_pos.left, rect_pos.top, rect_pos.right, rect_pos.bottom], fill=fill_color, + radius=border_radius) image_pil.paste(rect_img, (0, 0), rect_img) - return (left + padding[0], top + padding[1]), (right - left, bottom - top) + return ((rect_pos.left + padding[0], rect_pos.top + padding[1]), + (rect_pos.right - rect_pos.left, rect_pos.bottom - rect_pos.top)) def add_blurred_shadow(image_pil: Image, text: str, position: tuple[float, float], font: ImageFont, @@ -263,7 +265,6 @@ def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[i lines = wrapped_text.split('\n') - x_pos, y_pos = position y_offset = 0 max_line_width = 0 # Keep track of the widest line total_height = 0 # Accumulate total height of text block @@ -274,10 +275,10 @@ def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[i max_line_width = max(max_line_width, line_width) total_height += line_height - text_x = x_pos # Adjusted to use numpy width + text_x = position[0] if x_center: text_x = (img_width - line_width) / 2 - line_y = y_pos + y_offset + line_y = position[1] + y_offset y_offset += (line_height + 6) if show_background: diff --git a/ui/listicles/interface.py b/ui/listicles/interface.py index 664cd72..5dd1ab6 100644 --- a/ui/listicles/interface.py +++ b/ui/listicles/interface.py @@ -48,7 +48,7 @@ def set_json(json_file): if not json_file: gr.Warning("No JSON file uploaded. Reverse to default.") return input_batch_json.value - with open(json_file.name, "r") as file: + with open(json_file.name, "r", encoding="utf-8") as file: json_data = json.load(file) json_data = json.dumps(json_data, indent=4) diff --git a/ui/listicles/utils.py b/ui/listicles/utils.py index b4c75b3..13a6e2b 100644 --- a/ui/listicles/utils.py +++ b/ui/listicles/utils.py @@ -3,24 +3,84 @@ """ import os import json -from typing import Optional +from typing import Optional, Any, List import gradio as gr +import numpy as np import processing.image as image_processing -from utils import font_manager, image as image_utils +from utils import font_manager, image as image_utils, dataclasses import api.chatgpt as chatgpt_api -def process(image_files, json_data, - nf_family, nf_style, nfs, nfc, nfo, nse, nsc, nso, nsr, nbe, nbc, nbo, - df_family, df_style, dfs, dfc, dfo, dse, dsc, dso, dsr, dbe, dbc, dbo, - mf_family, mf_style, mfs, mfc, mfo, mse, msc, mso, msr, mbe, mbc, mbo, - rf_family, rf_style, rfs, rfc, rfo, rse, rsc, rso, rsr, rbe, rbc, rbo): +def process(image_files: list[Any], json_data: str, + nf_family: str, nf_style: str, nfs: int, nfc: dataclasses.RGBColor, nfo: int, nse: bool, + nsc: dataclasses.RGBColor, nso: int, nsr, nbe: bool, nbc: dataclasses.RGBColor, nbo: int, + df_family: str, df_style: str, dfs: int, dfc: dataclasses.RGBColor, dfo: int, dse: bool, + dsc: dataclasses.RGBColor, dso: int, dsr, dbe: bool, dbc: dataclasses.RGBColor, dbo: int, + mf_family: str, mf_style: str, mfs: int, mfc: dataclasses.RGBColor, mfo: int, mse: bool, + msc: dataclasses.RGBColor, mso: int, msr, mbe: bool, mbc: dataclasses.RGBColor, mbo: int, + rf_family: str, rf_style: str, rfs: int, rfc: dataclasses.RGBColor, rfo: int, rse: bool, + rsc: dataclasses.RGBColor, rso: int, rsr, rbe: bool, rbc: dataclasses.RGBColor, rbo: int) \ + -> Optional[List[np.ndarray]]: + """ + Processes the images and JSON data to generate the listicle images. + :param image_files: The list of images to process. This is a gradio File. + :param json_data: The JSON data to process. + :param nf_family: The font family for the name. + :param nf_style: The font style for the name. + :param nfs: The font size for the name. + :param nfc: The font color for the name. + :param nfo: The font opacity for the name. + :param nse: Whether to show the shadow for the name. + :param nsc: The shadow color for the name. + :param nso: The shadow opacity for the name. + :param nsr: The shadow radius for the name. + :param nbe: Whether to show the background for the name. + :param nbc: The background color for the name. + :param nbo: The background opacity for the name. + :param df_family: The font family for the description. + :param df_style: The font style for the description. + :param dfs: The font size for the description. + :param dfc: The font color for the description. + :param dfo: The font opacity for the description. + :param dse: Whether to show the shadow for the description. + :param dsc: The shadow color for the description. + :param dso: The shadow opacity for the description. + :param dsr: The shadow radius for the description. + :param dbe: Whether to show the background for the description. + :param dbc: The background color for the description. + :param dbo: The background opacity for the description. + :param mf_family: The font family for the association. + :param mf_style: The font style for the association. + :param mfs: The font size for the association. + :param mfc: The font color for the association. + :param mfo: The font opacity for the association. + :param mse: Whether to show the shadow for the association. + :param msc: The shadow color for the association. + :param mso: The shadow opacity for the association. + :param msr: The shadow radius for the association. + :param mbe: Whether to show the background for the association. + :param mbc: The background color for the association. + :param mbo: The background opacity for the association. + :param rf_family: The font family for the rating. + :param rf_style: The font style for the rating. + :param rfs: The font size for the rating. + :param rfc: The font color for the rating. + :param rfo: The font opacity for the rating. + :param rse: Whether to show the shadow for the rating. + :param rsc: The shadow color for the rating. + :param rso: The shadow opacity for the rating. + :param rsr: The shadow radius for the rating. + :param rbe: Whether to show the background for the rating. + :param rbc: The background color for the rating. + :param rbo: The background opacity for the rating. + :return: The list of processed images as numpy arrays. If there was an error, returns None. + """ if not json_data: print("No JSON file uploaded.") - return + return None if not image_files: print("No images uploaded.") - return + return None font_families = font_manager.get_fonts() nff = font_families[nf_family][nf_style] @@ -126,7 +186,8 @@ def validate_json(json_file: str) -> None: gr.Info("JSON is valid!") -def send_artifacts_to_batch(listicle_images: gr.data_classes.RootModel, json_data: str) -> (list, str): +def send_artifacts_to_batch(listicle_images: gr.data_classes.RootModel, json_data: str) \ + -> (Optional[list], Optional[str]): """ Sends the artifacts to the batch processing section. :param listicle_images: The list of images to send. This is a Gradio Gallery. @@ -135,10 +196,10 @@ def send_artifacts_to_batch(listicle_images: gr.data_classes.RootModel, json_dat """ if not listicle_images or len(listicle_images.root) == 0: gr.Warning("No images to send.") - return + return None, None if not json_data or len(json_data) == 0: gr.Warning("No JSON data to send.") - return + return None, None # Parse the listicle_images GalleryData to get file paths listicle_images = listicle_images.root listicle_images = [image.image.path for image in listicle_images] @@ -163,12 +224,14 @@ def save_artifacts(listicle_images: gr.data_classes.RootModel, image_type: gr.Dr # Save the JSON data if save_dir is not None and save_dir != "": json_filepath = os.path.join(save_dir, "data.json") - with open(json_filepath, "w") as file: + with open(json_filepath, "w", encoding="utf-8") as file: json_data = json.loads(json_data) json.dump(json_data, file, indent=4) gr.Info(f"Saved generated artifacts to {save_dir}.") + return None + def generate_listicle(api_key: str, api_text_model: str, api_image_model: str, number_of_items: int, topic: str, association: str, rating_type: str, details: str = "", generate_artifacts: bool = False) \ @@ -226,8 +289,7 @@ def generate_listicle(api_key: str, api_text_model: str, api_image_model: str, n message = (f"Format the listicle into JSON. For the items, store as a list named 'items' with the content " f"format: {json_format}.") if rating_type is not None and rating_type != "": - message += (f"Include a top-level field `rating_type: ` with what the rating " - f"represents.") + message += "Include a top-level field `rating_type: ` with what the rating represents." listicle_json = chatgpt_api.get_chat_response(openai, json_model, role, prompt=message, context=listicle_json_context, as_json=True) diff --git a/ui/music/utils.py b/ui/music/utils.py index eb6dd98..7bd5105 100644 --- a/ui/music/utils.py +++ b/ui/music/utils.py @@ -11,11 +11,9 @@ from moviepy.editor import AudioFileClip import numpy as np import librosa -from utils import font_manager -import utils.image as image_utils from api import chatgpt as chatgpt_api from processing import image as image_processing -from utils import progress, visualizer +from utils import progress, visualizer, font_manager, image as image_utils, dataclasses def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]], np.ndarray): @@ -50,15 +48,15 @@ def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]] def create_music_video( image_path: str, audio_path: str, fps: int, artist: str, artist_font_type: str, artist_font_style: str, artist_font_size: int, - artist_font_color: tuple[int, int, int], artist_font_opacity: int, artist_shadow_enabled: bool, - artist_shadow_color: tuple[int, int, int], artist_shadow_opacity: int, artist_shadow_radius: int, - artist_background_enabled: bool, artist_background_color: tuple[int, int, int], artist_background_opacity: int, + artist_font_color: dataclasses.RGBColor, artist_font_opacity: int, artist_shadow_enabled: bool, + artist_shadow_color: dataclasses.RGBColor, artist_shadow_opacity: int, artist_shadow_radius: int, + artist_background_enabled: bool, artist_background_color: dataclasses.RGBColor, artist_background_opacity: int, song: str, song_font_type: str, song_font_style: str, song_font_size: int, - song_font_color: tuple[int, int, int], song_font_opacity: int, song_shadow_enabled: bool, - song_shadow_color: tuple[int, int, int], song_shadow_opacity: int, song_shadow_radius: int, - song_background_enabled: bool, song_background_color: tuple[int, int, int], song_background_opacity: int, - background_color: tuple[int, int, int] = (0, 0, 0), background_opacity: int = 66, - generate_audio_visualizer: bool = False, audio_visualizer_color: tuple[int, int, int] =(255, 255, 255), + song_font_color: dataclasses.RGBColor, song_font_opacity: int, song_shadow_enabled: bool, + song_shadow_color: dataclasses.RGBColor, song_shadow_opacity: int, song_shadow_radius: int, + song_background_enabled: bool, song_background_color: dataclasses.RGBColor, song_background_opacity: int, + background_color: dataclasses.RGBColor = (0, 0, 0), background_opacity: int = 66, + generate_audio_visualizer: bool = False, audio_visualizer_color: dataclasses.RGBColor = (255, 255, 255), audio_visualizer_opacity: int = 100, visualizer_drawing: Optional[str] = None, audio_visualizer_num_rows: int = 90, audio_visualizer_num_columns: int = 65, audio_visualizer_min_size: int = 1, audio_visualizer_max_size: int = 7) -> Optional[str]: @@ -342,10 +340,10 @@ def generate_cover_image(api_key: str, api_model: str, prompt: str) -> Optional[ def process(image_path: str, artist: str, song: str, - af_family: str, af_style: str, afs: int, afc: tuple[int, int, int], afo: int, ase: bool, - asc: tuple[int, int, int], aso: int, asr: Optional[int], abe: bool, abc: tuple[int, int, int], abo: int, - sf_family: str, sf_style: str, sfs: int, sfc: tuple[int, int, int], sfo: int, sse: bool, - ssc: tuple[int, int, int], sso: int, ssr: Optional[int], sbe: bool, sbc: tuple[int, int, int], sbo: int) \ + af_family: str, af_style: str, afs: int, afc: dataclasses.RGBColor, afo: int, ase: bool, + asc: dataclasses.RGBColor, aso: int, asr: Optional[int], abe: bool, abc: dataclasses.RGBColor, abo: int, + sf_family: str, sf_style: str, sfs: int, sfc: dataclasses.RGBColor, sfo: int, sse: bool, + ssc: dataclasses.RGBColor, sso: int, ssr: Optional[int], sbe: bool, sbc: dataclasses.RGBColor, sbo: int) \ -> Optional[np.ndarray]: """ Processes the image at the given path (by adding the requested text) and returns the processed image. diff --git a/utils/dataclasses.py b/utils/dataclasses.py new file mode 100644 index 0000000..655e7dd --- /dev/null +++ b/utils/dataclasses.py @@ -0,0 +1,28 @@ +""" +This module contains dataclasses and type aliases used in the project. +""" +from typing import Union +from dataclasses import dataclass + + +@dataclass +class FourEdges: + """ + A dataclass representing the four edges of a rectangle. + """ + top: int + bottom: int + left: int + right: int + + +@dataclass +class Position: + """ + A dataclass representing a position on a 2d plane. + """ + x: int + y: int + + +RGBColor = Union[str, tuple[int, int, int]] diff --git a/utils/font_manager.py b/utils/font_manager.py index 4074ec1..f6dcbce 100644 --- a/utils/font_manager.py +++ b/utils/font_manager.py @@ -27,7 +27,7 @@ def initialize_inflect() -> inflect.engine: Initializes the inflect engine. :return: The inflect engine. """ - global P + global P # pylint: disable=global-statement if P is None: P = inflect.engine() @@ -39,7 +39,7 @@ def initialize_fonts() -> NestedDict: Initializes the font families from the global FONTS_DIRS. :return: The font families and their paths. They are called by map[font_family][font_style]. """ - global FONT_FAMILIES + global FONT_FAMILIES # pylint: disable=global-statement font_files = [] # Add TrendGenie fonts @@ -76,7 +76,7 @@ def get_fonts() -> NestedDict: Gets the font families. If they are not initialized, it initializes them. :return: The font families and their paths. They are called by map[font_family][font_style]. """ - global FONT_FAMILIES + global FONT_FAMILIES # pylint: disable=global-statement if FONT_FAMILIES is None: FONT_FAMILIES = initialize_fonts() @@ -88,7 +88,7 @@ def get_inflect() -> inflect.engine: Gets the inflect engine. If it is not initialized, it initializes it. :return: The inflect engine. """ - global P + global P # pylint: disable=global-statement if P is None: P = initialize_inflect() diff --git a/utils/image.py b/utils/image.py index 0204e57..b070a04 100644 --- a/utils/image.py +++ b/utils/image.py @@ -1,7 +1,8 @@ """ This file contains functions for image processing. """ -from typing import Tuple, Union +from typing import Tuple +from utils import dataclasses def get_alpha_from_opacity(opacity: int) -> int: @@ -14,7 +15,7 @@ def get_alpha_from_opacity(opacity: int) -> int: return int(opacity * 255 / 100) -def get_rgba(color: Union[str, Tuple[int, int, int]], opacity: int) -> Tuple[int, int, int, int]: +def get_rgba(color: dataclasses.RGBColor, opacity: int) -> Tuple[int, int, int, int]: """ Gets the RGBA value for a given color and opacity. :param color: The color to use. Either a hex string or a tuple of RGB values. @@ -29,7 +30,7 @@ def get_rgba(color: Union[str, Tuple[int, int, int]], opacity: int) -> Tuple[int return color[0], color[1], color[2], get_alpha_from_opacity(opacity) -def get_bgra(color: Union[str, Tuple[int, int, int]], opacity: int) -> Tuple[int, int, int, int]: +def get_bgra(color: dataclasses.RGBColor, opacity: int) -> Tuple[int, int, int, int]: """ Gets the BGRA value for a given color and opacity. :param color: The color to use. Either a hex string or a tuple of BGR values. diff --git a/utils/path_handler.py b/utils/path_handler.py index 58251cb..7bf6228 100644 --- a/utils/path_handler.py +++ b/utils/path_handler.py @@ -12,7 +12,7 @@ def get_default_path() -> str: Gets the default path for saving files, which is the user's home directory under a folder called "trendgenie". :return: The default path. """ - global DEFAULT_PATH + global DEFAULT_PATH # pylint: disable=global-statement if DEFAULT_PATH is None: homepath = Path.home() DEFAULT_PATH = os.path.join(homepath, "trendgenie") diff --git a/utils/progress.py b/utils/progress.py index 52569ba..5cb5cd8 100644 --- a/utils/progress.py +++ b/utils/progress.py @@ -1,9 +1,22 @@ +""" +This module contains a function to print a progress bar to the console. +""" + import time from typing import Optional def print_progress_bar(current_iteration: int, total_iterations: int, bar_length: int = 50, - start_time: Optional[float] = None, end: str = ''): + start_time: Optional[float] = None, end: str = '') -> None: + """ + Prints a progress bar to the console. + :param current_iteration: The current iteration. + :param total_iterations: The total number of iterations. Used to calculate the percentage. + :param bar_length: The length of the progress bar. + :param start_time: The time the process started. Used to calculate the elapsed time. + :param end: The end character to use. Defaults to '' which prints all in the same line. + :return: None + """ progress_percentage = (current_iteration / total_iterations) * 100 completed_length = int(bar_length * current_iteration // total_iterations) progress_bar = 'â–ˆ' * completed_length + 'â–‘' * (bar_length - completed_length) @@ -23,4 +36,5 @@ def print_progress_bar(current_iteration: int, total_iterations: int, bar_length time_string = '' if estimated_remaining_time is not None and iterations_per_sec is not None: time_string = f'[{elapsed_time:.2f}s/{estimated_remaining_time:.2f}s, {iterations_per_sec:.2f}it/s]' - print(f'\r{progress_percentage:3.0f}%|{progress_bar}| {current_iteration}/{total_iterations} {time_string}', end=end, flush=True) + print(f'\r{progress_percentage:3.0f}%|{progress_bar}| {current_iteration}/{total_iterations} {time_string}', + end=end, flush=True) diff --git a/utils/visualizer.py b/utils/visualizer.py index e32e7bf..88619ba 100644 --- a/utils/visualizer.py +++ b/utils/visualizer.py @@ -67,7 +67,7 @@ def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict upper_bound = log_freqs[x + 1] if x < dot_count_x - 1 else end_freq + 1 band_freqs = [freq for freq in freq_keys if lower_bound <= freq < upper_bound] if not band_freqs: - closest_freq = min(freq_keys, key=lambda f: abs(f - lower_bound)) + closest_freq = min(freq_keys, key=lambda f, lb=lower_bound: abs(f - lb)) band_freqs = [closest_freq] band_loudness = [frequency_data[freq] for freq in band_freqs] From 6593f0ee6813c89f7debee1380525de3d974aac6 Mon Sep 17 00:00:00 2001 From: Fabian Gonzalez Date: Mon, 15 Jan 2024 00:20:51 -0500 Subject: [PATCH 11/13] Add requirements installation when linting --- .github/workflows/pylint.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 2d4b561..aa949d3 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -15,6 +15,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip + pip install -r requirements.txt pip install pylint - name: Analysing the code with pylint run: | From 72d3eb88658343c38f4e552355b1ec72eba48c05 Mon Sep 17 00:00:00 2001 From: Fabian Gonzalez Date: Mon, 15 Jan 2024 11:39:07 -0500 Subject: [PATCH 12/13] Flip row/col in visualizer --- processing/image.py | 61 ++++++++++---------- ui/components/openai.py | 7 ++- ui/listicles/interface.py | 48 ++++++++++------ ui/listicles/utils.py | 38 +++++------- ui/music/interface.py | 49 ++++++++++------ ui/music/utils.py | 118 +++++++++++++++++++++----------------- utils/dataclasses.py | 82 +++++++++++++++++++++++++- utils/gradio.py | 7 +-- utils/visualizer.py | 37 ++++++------ 9 files changed, 273 insertions(+), 174 deletions(-) diff --git a/processing/image.py b/processing/image.py index 9fe7628..eb97bc1 100644 --- a/processing/image.py +++ b/processing/image.py @@ -35,32 +35,32 @@ def render_image_output() -> (gr.Image, gr.Textbox, gr.Dropdown, gr.Button): return image_output, image_name, image_suffix, save_image_button -def render_text_editor_parameters(name: str) -> ((gr.Dropdown, gr.Dropdown, gr.ColorPicker, gr.Slider, gr.Number), - (gr.Checkbox, gr.ColorPicker, gr.Slider, gr.Number), - (gr.Checkbox, gr.ColorPicker, gr.Slider)): +def render_text_editor_parameters(name: str) -> (dataclasses.FontGradioComponents, + dataclasses.FontDropShadowGradioComponents, + dataclasses.FontBackgroundGradioComponents): """ Renders the text editor parameters. :param name: The name of the text editor parameters. This is used as the label for the accordion. - :return: A tuple containing the font, drop shadow, and background components. + :return: Classes containing the font, drop shadow, and background components. """ with gr.Accordion(label=name): with gr.Column(): - font_family, font_style, font_color, font_opacity, font_size = gru.render_font_picker() + font_data = gru.render_font_picker() with gr.Group(): - drop_shadow_checkbox = gr.Checkbox(False, label="Enable Drop Shadow", interactive=True) - with gr.Group(visible=drop_shadow_checkbox.value) as additional_options: + drop_shadow_enabled = gr.Checkbox(False, label="Enable Drop Shadow", interactive=True) + with gr.Group(visible=drop_shadow_enabled.value) as additional_options: drop_shadow_color, drop_shadow_opacity = gru.render_color_opacity_picker() drop_shadow_radius = gr.Number(0, label="Shadow Radius") - gru.bind_checkbox_to_visibility(drop_shadow_checkbox, additional_options) + gru.bind_checkbox_to_visibility(drop_shadow_enabled, additional_options) with gr.Group(): - background_checkbox = gr.Checkbox(False, label="Enable Background", interactive=True) - with gr.Group(visible=background_checkbox.value) as additional_options: + background_enabled = gr.Checkbox(False, label="Enable Background", interactive=True) + with gr.Group(visible=background_enabled.value) as additional_options: background_color, background_opacity = gru.render_color_opacity_picker() - gru.bind_checkbox_to_visibility(background_checkbox, additional_options) + gru.bind_checkbox_to_visibility(background_enabled, additional_options) - return ((font_family, font_style, font_size, font_color, font_opacity), - (drop_shadow_checkbox, drop_shadow_color, drop_shadow_opacity, drop_shadow_radius), - (background_checkbox, background_color, background_opacity)) + return (font_data, dataclasses.FontDropShadowGradioComponents(drop_shadow_enabled, drop_shadow_color, + drop_shadow_opacity, drop_shadow_radius), + dataclasses.FontBackgroundGradioComponents(background_enabled, background_color, background_opacity)) def add_background(image_pil: Image, draw: ImageDraw, position: tuple[int, int], text: str, font: ImageFont, @@ -256,8 +256,6 @@ def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[i font = ImageFont.truetype(font_path, font_size) draw = ImageDraw.Draw(txt_layer) - img_width, _ = image_pil.size - if max_width: # Prepare for text wrapping if max_width is provided wrapped_text = textwrap.fill(text, width=max_width) else: @@ -266,31 +264,32 @@ def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[i lines = wrapped_text.split('\n') y_offset = 0 - max_line_width = 0 # Keep track of the widest line - total_height = 0 # Accumulate total height of text block + # max_line_width = 0 # Keep track of the widest line + # total_height = 0 # Accumulate total height of text block + text_container = dataclasses.Size(width=0, height=0) for line in lines: bbox = draw.textbbox((0, 0), line, font=font) - line_width = bbox[2] - bbox[0] - line_height = bbox[3] - bbox[1] - max_line_width = max(max_line_width, line_width) - total_height += line_height + line_size = dataclasses.Size(width=bbox[2] - bbox[0], height=bbox[3] - bbox[1]) + text_container.width = max(text_container.width, line_size.width) + text_container.height += line_size.height - text_x = position[0] + pos = dataclasses.Position + pos.x = position[0] if x_center: - text_x = (img_width - line_width) / 2 - line_y = position[1] + y_offset - y_offset += (line_height + 6) + pos.x = (image_pil.width - line_size.width) / 2 + pos.y = position[1] + y_offset + y_offset += (line_size.height + 6) if show_background: - (text_x, line_y), _ = add_background(image_pil, draw, (text_x, line_y), line, font, - fill_color=background_color, border_radius=10) + (pos.x, pos.y), _ = add_background(image_pil, draw, (pos.x, pos.y), line, font, + fill_color=background_color, border_radius=10) if show_shadow: - shadow_position = (text_x, line_y) + shadow_position = (pos.x, pos.y) add_blurred_shadow(image_pil, line, shadow_position, font, shadow_color=shadow_color, blur_radius=shadow_radius) - draw.text((text_x, line_y), line, font=font, fill=font_color) + draw.text((pos.x, pos.y), line, font=font, fill=font_color) image_pil = Image.alpha_composite(image_pil, txt_layer) - return np.array(image_pil), (max_line_width, total_height) + return np.array(image_pil), (text_container.width, text_container.height) diff --git a/ui/components/openai.py b/ui/components/openai.py index 4fec6bf..8470d54 100644 --- a/ui/components/openai.py +++ b/ui/components/openai.py @@ -2,15 +2,16 @@ This module contains ui components for the OpenAI API. """ import gradio as gr +from utils import dataclasses def render_openai_setup(show_text_model: bool = True, show_image_model: bool = True) \ - -> (gr.Textbox, gr.Dropdown, gr.Dropdown): + -> dataclasses.OpenAIGradioComponents: """ Renders the OpenAI API setup components. :param show_text_model: Whether to show the text model dropdown. :param show_image_model: Whether to show the image model dropdown. - :return: A tuple containing the API key, text model, and image model components. + :return: A class containing the API key, text model, and image model components. """ api_text_model = None api_image_model = None @@ -25,4 +26,4 @@ def render_openai_setup(show_text_model: bool = True, show_image_model: bool = T api_image_model = gr.Dropdown(["dall-e-2", "dall-e-3"], label="API Image Model", value="dall-e-2", interactive=True) - return api_key, api_text_model, api_image_model + return dataclasses.OpenAIGradioComponents(api_key, api_text_model, api_image_model) diff --git a/ui/listicles/interface.py b/ui/listicles/interface.py index 5dd1ab6..bb15613 100644 --- a/ui/listicles/interface.py +++ b/ui/listicles/interface.py @@ -81,15 +81,11 @@ def set_json(json_file): with gr.Column(scale=3): gr.Markdown("# Parameters") with gr.Row(equal_height=False): - (nf_family, nf_style, nfs, nfc, nfo), (nse, nsc, nso, nsr), ( - nbe, nbc, nbo) = image_processing.render_text_editor_parameters("Name") - (df_family, df_style, dfs, dfc, dfo), (dse, dsc, dso, dsr), ( - dbe, dbc, dbo) = image_processing.render_text_editor_parameters("Description") + name_font, name_shadow, name_background = image_processing.render_text_editor_parameters("Name") + desc_font, desc_shadow, desc_background = image_processing.render_text_editor_parameters("Description") with gr.Row(equal_height=False): - (mf_family, mf_style, mfs, mfc, mfo), (mse, msc, mso, msr), ( - mbe, mbc, mbo) = image_processing.render_text_editor_parameters("Association") - (rf_family, rf_style, rfs, rfc, rfo), (rse, rsc, rso, rsr), ( - rbe, rbc, rbo) = image_processing.render_text_editor_parameters("Rating") + asc_font, asc_shadow, asc_background = image_processing.render_text_editor_parameters("Association") + rate_font, rate_shadow, rate_background = image_processing.render_text_editor_parameters("Rating") with gr.Column(scale=1): gr.Markdown("# Output") @@ -103,14 +99,26 @@ def set_json(json_file): save_button.click(image_processing.save_images_to_disk, inputs=[output_preview, image_type], outputs=[]) process_button.click(listicle_utils.process, inputs=[input_batch_images, input_batch_json, - nf_family, nf_style, nfs, nfc, nfo, nse, nsc, nso, nsr, nbe, - nbc, nbo, - df_family, df_style, dfs, dfc, dfo, dse, dsc, dso, dsr, dbe, - dbc, dbo, - mf_family, mf_style, mfs, mfc, mfo, mse, msc, mso, msr, mbe, - mbc, mbo, - rf_family, rf_style, rfs, rfc, rfo, rse, rsc, rso, rsr, rbe, - rbc, rbo + name_font.family, name_font.style, name_font.size, + name_font.color, name_font.opacity, name_shadow.enabled, + name_shadow.color, name_shadow.opacity, name_shadow.radius, + name_background.enabled, name_background.color, + name_background.opacity, + desc_font.family, desc_font.style, desc_font.size, + desc_font.color, desc_font.opacity, desc_shadow.enabled, + desc_shadow.color, desc_shadow.opacity, desc_shadow.radius, + desc_background.enabled, desc_background.color, + desc_background.opacity, + asc_font.family, asc_font.style, asc_font.size, + asc_font.color, asc_font.opacity, asc_shadow.enabled, + asc_shadow.color, asc_shadow.opacity, asc_shadow.radius, + asc_background.enabled, asc_background.color, + asc_background.opacity, + rate_font.family, rate_font.style, rate_font.size, + rate_font.color, rate_font.opacity, rate_shadow.enabled, + rate_shadow.color, rate_shadow.opacity, rate_shadow.radius, + rate_background.enabled, rate_background.color, + rate_background.opacity, ], outputs=[output_preview]) return input_batch_images, input_batch_json @@ -121,7 +129,8 @@ def render_generate_section() -> (gr.Button, gr.Gallery, gr.Code): Renders the Generate Artifacts section of the web app. :return: The send artifacts to batch button, the listicle image output gallery, and the listicle json output. """ - api_key, api_text_model, api_image_model = openai_components.render_openai_setup() + # api_key, api_text_model, api_image_model = openai_components.render_openai_setup() + open_ai_components = openai_components.render_openai_setup() with gr.Row(equal_height=False): with gr.Group(): with gr.Group(): @@ -159,8 +168,9 @@ def render_generate_section() -> (gr.Button, gr.Gallery, gr.Code): send_artifacts_to_batch_button = gr.Button("Send Artifacts to 'Batch Processing'", variant="secondary") generate_listicle_button.click(listicle_utils.generate_listicle, - inputs=[api_key, api_text_model, api_image_model, num_items, topic, - association, rating_type, details, generate_artifacts], + inputs=[open_ai_components.api_key, open_ai_components.api_text_model, + open_ai_components.api_image_model, num_items, topic, association, + rating_type, details, generate_artifacts], outputs=[listicle_output, listicle_json_output, listicle_image_output]) download_artifacts_button.click( listicle_utils.save_artifacts, diff --git a/ui/listicles/utils.py b/ui/listicles/utils.py index 13a6e2b..747c6c4 100644 --- a/ui/listicles/utils.py +++ b/ui/listicles/utils.py @@ -83,24 +83,16 @@ def process(image_files: list[Any], json_data: str, return None font_families = font_manager.get_fonts() - nff = font_families[nf_family][nf_style] - dff = font_families[df_family][df_style] - mff = font_families[mf_family][mf_style] - rff = font_families[rf_family][rf_style] images = [] - rating_offset = 34 - text_offset = 49 json_data = json.loads(json_data) - if len(image_files) != len(json_data["items"]): gr.Warning( f"Number of images ({len(image_files)}) does not match the number of items in the JSON ({len(json_data)}).") # We skip any entries that don't have an image field. - json_data_items = json_data["items"] - json_dict = {item["image"]: item for item in json_data_items if "image" in item} + json_dict = {item["image"]: item for item in json_data["items"] if "image" in item} for image_file in image_files: img_name = os.path.basename(image_file.name) @@ -112,31 +104,31 @@ def process(image_files: list[Any], json_data: str, img = image_processing.read_image_from_disk(image_file.name, size=(1080, 1920)) item = json_dict[img_name] - # Calculate positions for the text + # Calculate y-positions for the text top_center = (0, int(img.shape[0] * 0.13)) bottom_center = (0, int(img.shape[0] * 0.70)) # Add association and rating at the top center, one above the other - img, (_, association_height) = image_processing.add_text(img, item["association"], top_center, mff, - font_size=mfs, + img, (_, association_height) = image_processing.add_text(img, item["association"], top_center, + font_families[mf_family][mf_style], font_size=mfs, font_color=image_utils.get_rgba(mfc, mfo), - show_shadow=mse, - shadow_radius=msr, + show_shadow=mse, shadow_radius=msr, shadow_color=image_utils.get_rgba(msc, mso), show_background=mbe, background_color=image_utils.get_rgba(mbc, mbo), x_center=True) img, (_, _) = image_processing.add_text(img, f'{json_data["rating_type"]}: {item["rating"]}%', - (0, top_center[1] + association_height + rating_offset), - rff, font_size=rfs, font_color=image_utils.get_rgba(rfc, rfo), - show_shadow=rse, shadow_radius=rsr, - shadow_color=image_utils.get_rgba(rsc, rso), + (0, top_center[1] + association_height + 34), + font_families[rf_family][rf_style], font_size=rfs, + font_color=image_utils.get_rgba(rfc, rfo), show_shadow=rse, + shadow_radius=rsr, shadow_color=image_utils.get_rgba(rsc, rso), show_background=rbe, background_color=image_utils.get_rgba(rbc, rbo), x_center=True) # Add name and description at the bottom center, one above the other - img, (_, name_height) = image_processing.add_text(img, item["name"], bottom_center, nff, font_size=nfs, + img, (_, name_height) = image_processing.add_text(img, item["name"], bottom_center, + font_families[nf_family][nf_style], font_size=nfs, font_color=image_utils.get_rgba(nfc, nfo), max_width=15, show_shadow=nse, shadow_radius=nsr, @@ -145,10 +137,10 @@ def process(image_files: list[Any], json_data: str, background_color=image_utils.get_rgba(nbc, nbo), x_center=True) img, (_, _) = image_processing.add_text(img, f'"{item["description"]}"', - (0, bottom_center[1] + name_height + text_offset), dff, - font_size=dfs, font_color=image_utils.get_rgba(dfc, dfo), - show_shadow=dse, shadow_radius=dsr, - shadow_color=image_utils.get_rgba(dsc, dso), + (0, bottom_center[1] + name_height + 49), + font_families[df_family][df_style], font_size=dfs, + font_color=image_utils.get_rgba(dfc, dfo), show_shadow=dse, + shadow_radius=dsr, shadow_color=image_utils.get_rgba(dsc, dso), show_background=dbe, background_color=image_utils.get_rgba(dbc, dbo), max_width=43, x_center=True) diff --git a/ui/music/interface.py b/ui/music/interface.py index 3361495..8808022 100644 --- a/ui/music/interface.py +++ b/ui/music/interface.py @@ -41,7 +41,7 @@ def render_generate_cover() -> (gr.Button, gr.Button, gr.Image): sending the generated cover image to the "Add Text to Image" section, and an image display component for displaying the generated cover image. """ - api_key, _, api_image_model = openai_components.render_openai_setup(show_text_model=False) + open_ai_components = openai_components.render_openai_setup(show_text_model=False) with gr.Row(equal_height=False): with gr.Group(): image_prompt = gr.Textbox(label="Image Prompt", lines=6, max_lines=10) @@ -54,7 +54,8 @@ def render_generate_cover() -> (gr.Button, gr.Button, gr.Image): send_to_process_button = gr.Button("Send Image to 'Add Text to Image'", variant="secondary") send_to_create_video_button = gr.Button("Send Image to 'Create Music Video'", variant="secondary") - generate_image_button.click(generate_cover_image, inputs=[api_key, api_image_model, image_prompt], + generate_image_button.click(generate_cover_image, inputs=[open_ai_components.api_key, + open_ai_components.api_image_model, image_prompt], outputs=[image_output]) save_image_button.click(image_processing.save_image_to_disk, inputs=[image_output, image_name, image_suffix], outputs=[]) @@ -70,7 +71,7 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image): display component for displaying the cover image before processing, and an image display component for displaying the cover image after processing. """ - with gr.Column(): + with (gr.Column()): gr.Markdown("## Input") with gr.Group(): input_image = gr.Image(sources=["upload"], label="Cover Image (png)", type="filepath", @@ -80,13 +81,13 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image): with gr.Row(equal_height=False): with gr.Group(): artist_name = gr.Textbox(label="Artist Name", lines=1, max_lines=1, scale=1) - (af_family, af_style, afs, afc, afo), (ase, asc, aso, asr), ( - abe, abc, abo) = image_processing.render_text_editor_parameters("Artist Text Parameters") + artist_font, artist_shadow, artist_background = image_processing.render_text_editor_parameters( + "Artist Text Parameters") with gr.Group(): song_name = gr.Textbox(label="Song Title", lines=1, max_lines=1, scale=2) - (sf_family, sf_style, sfs, sfc, sfo), (sse, ssc, sso, ssr), ( - sbe, sbc, sbo) = image_processing.render_text_editor_parameters("Song Text Parameters") + song_font, song_shadow, song_background = \ + image_processing.render_text_editor_parameters("Song Text Parameters") process_button = gr.Button("Process", variant="primary") @@ -97,8 +98,14 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image): send_to_create_video_button = gr.Button("Send Image to 'Create Music Video'", variant="secondary") process_button.click(process, inputs=[input_image, artist_name, song_name, - af_family, af_style, afs, afc, afo, ase, asc, aso, asr, abe, abc, abo, - sf_family, sf_style, sfs, sfc, sfo, sse, ssc, sso, ssr, sbe, sbc, sbo], + artist_font.family, artist_font.style, artist_font.size, artist_font.color, + artist_font.opacity, artist_shadow.enabled, artist_shadow.color, + artist_shadow.opacity, artist_shadow.radius, artist_background.enabled, + artist_background.color, artist_background.opacity, song_font.family, + song_font.style, song_font.size, song_font.color, song_font.opacity, + song_shadow.enabled, song_shadow.color, song_shadow.opacity, + song_shadow.radius, song_background.enabled, song_background.color, + song_background.opacity], outputs=[image_output]) save_image_button.click(image_processing.save_image_to_disk, inputs=[image_output, image_name, image_suffix], outputs=[]) @@ -120,12 +127,11 @@ def render_music_video_creation() -> gr.Image: background_color, background_opacity = gru.render_color_opacity_picker(default_name_label="Background") with gr.Group(): artist_name = gr.Textbox(label="Artist Name", lines=1, max_lines=1, scale=1) - (artist_ffamily, artist_fstyle, artist_fsize, artist_fcolor, artist_fopacity), (ase, asc, aso, asr), ( - abe, abc, abo) = image_processing.render_text_editor_parameters("Text Parameters") + artist_font, artist_shadow, artist_background = \ + image_processing.render_text_editor_parameters("Text Parameters") with gr.Group(): song_title = gr.Textbox(label="Song Title", lines=1, max_lines=1, scale=2) - (song_ffamily, song_fstyle, song_fsize, song_fcolor, song_fopacity), (sse, ssc, sso, ssr), ( - sbe, sbc, sbo) = image_processing.render_text_editor_parameters("Text Parameters") + song_font, song_shadow, song_background = image_processing.render_text_editor_parameters("Text Parameters") with gr.Column(): # Defaulting to 1. It's a still image, but may expand by adding some effects (grain, and not sure what else) fps = gr.Number(value=1, label="FPS", minimum=1, maximum=144) @@ -159,12 +165,17 @@ def render_music_video_creation() -> gr.Image: with gr.Group(): video_output, video_name, video_suffix, save_video_button = video_processing.render_video_output() - create_video_button.click(create_music_video, inputs=[cover_image, audio_filepath, fps, - artist_name, artist_ffamily, artist_fstyle, artist_fsize, - artist_fcolor, artist_fopacity, ase, asc, aso, asr, abe, abc, - abo, song_title, song_ffamily, song_fstyle, song_fsize, - song_fcolor, song_fopacity, sse, ssc, sso, ssr, sbe, sbc, sbo, - background_color, background_opacity, + create_video_button.click(create_music_video, inputs=[cover_image, audio_filepath, fps, artist_name, + artist_font.family, artist_font.style, artist_font.size, + artist_font.color, artist_font.opacity, artist_shadow.enabled, + artist_shadow.color, artist_shadow.opacity, + artist_shadow.radius, artist_background.enabled, + artist_background.color, artist_background.opacity, + song_title, song_font.family, song_font.style, song_font.size, + song_font.color, song_font.opacity, song_shadow.enabled, + song_shadow.color, song_shadow.opacity, song_shadow.radius, + song_background.enabled, song_background.color, + song_background.opacity, background_color, background_opacity, generate_audio_visualizer_button, audio_visualizer_color, audio_visualizer_opacity, audio_visualizer_drawing, audio_visualizer_num_rows, audio_visualizer_num_columns, diff --git a/ui/music/utils.py b/ui/music/utils.py index 7bd5105..10726b9 100644 --- a/ui/music/utils.py +++ b/ui/music/utils.py @@ -45,6 +45,48 @@ def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]] return downsampled_frequency_loudness, downsampled_times +def _audio_visualizer_generator(frame_size: dataclasses.Size, audio_path: str, audio_length: int, fps: int, + audio_visualizer: dataclasses.RGBOpacity, dot_size: dataclasses.MinMax, + dot_count: dataclasses.RowCol, visualizer_drawing: Optional[str] = None) -> str: + print("Generating audio visualizer...") + + audio_visualizer_color_and_opacity = image_utils.get_rgba(audio_visualizer.color, audio_visualizer.opacity) + + custom_drawing = None + if visualizer_drawing is not None and visualizer_drawing != "": + custom_drawing = cv2.imread(visualizer_drawing, cv2.IMREAD_UNCHANGED) + if custom_drawing.shape[2] == 3: + custom_drawing = cv2.cvtColor(custom_drawing, cv2.COLOR_BGR2RGBA) + else: + custom_drawing = cv2.cvtColor(custom_drawing, cv2.COLOR_BGRA2RGBA) + + frequency_loudness, times = analyze_audio(audio_path, fps) + frame_cache = np.zeros((frame_size.height, frame_size.width, 4), dtype=np.uint8) + + total_iterations = len(times) + start_time = time.time() + vis = visualizer.Visualizer(size=dataclasses.Size(frame_size.width, frame_size.height), + dot_size=dot_size, color=audio_visualizer_color_and_opacity, + dot_count=dataclasses.RowCol(dot_count.row, dot_count.col)) + vis.initialize_static_values() + temp_visualizer_images_dir = tempfile.mkdtemp() + os.makedirs(temp_visualizer_images_dir, exist_ok=True) + for i, time_point in enumerate(times): + if time_point > audio_length: + break + frame = frame_cache.copy() + vis.draw_visualizer(frame, frequency_loudness[i], custom_drawing=custom_drawing) + frame_np = np.array(frame) + frame_np = cv2.cvtColor(frame_np, cv2.COLOR_RGBA2BGRA) + frame_filename = f'{temp_visualizer_images_dir}/frame_{i:05d}.png' + cv2.imwrite(frame_filename, frame_np) + + progress.print_progress_bar(i, total_iterations, start_time=start_time) + progress.print_progress_bar(total_iterations, total_iterations, end='\n', start_time=start_time) + + return temp_visualizer_images_dir + + def create_music_video( image_path: str, audio_path: str, fps: int, artist: str, artist_font_type: str, artist_font_style: str, artist_font_size: int, @@ -112,7 +154,7 @@ def create_music_video( # Could probably expand to 4k, but unnecessary for this type of music video # Maybe in a future iteration it could be worth it - width, height = 1920, 1080 + frame_size = dataclasses.Size(1920, 1080) # Set up cover cover = cv2.imread(image_path, cv2.IMREAD_UNCHANGED) @@ -122,33 +164,32 @@ def create_music_video( cover = cv2.cvtColor(cover, cv2.COLOR_BGRA2RGBA) # Create canvas with 4 channels (RGBA) - canvas = np.zeros((height, width, 4), dtype=np.uint8) + canvas = np.zeros((frame_size.height, frame_size.width, 4), dtype=np.uint8) # Calculate dimensions for resizing the cover to fit within the canvas while maintaining its aspect ratio - cover_width, cover_height = cover.shape[1], cover.shape[0] - canvas_width, canvas_height = width, height - resize_factor = min(canvas_width / cover_width, canvas_height / cover_height) + cover_size = dataclasses.Size(cover.shape[1], cover.shape[0]) + resize_factor = min(frame_size.width / cover_size.width, frame_size.height / cover_size.height) resize_factor *= (7 / 10) - new_width = int(cover_width * resize_factor) - new_height = int(cover_height * resize_factor) + cover_size.width = int(cover_size.width * resize_factor) + cover_size.height = int(cover_size.height * resize_factor) # Calculate cover position to center it on the canvas - cover_pos = ((canvas_width - new_width) // 2, (canvas_height - new_height) // 2) - cover = cv2.resize(cover, (new_width, new_height)) + cover_pos = ((frame_size.width - cover_size.width) // 2, (frame_size.height - cover_size.height) // 2) + cover = cv2.resize(cover, (cover_size.width, cover_size.height)) - canvas[cover_pos[1]:cover_pos[1] + new_height, cover_pos[0]:cover_pos[0] + new_width] = cover + canvas[cover_pos[1]:cover_pos[1] + cover_size.height, cover_pos[0]:cover_pos[0] + cover_size.width] = cover # Load song / audio audio_clip = AudioFileClip(audio_path) # Add video background background = cv2.imread(image_path) - background = cv2.resize(background, (width, height)) + background = cv2.resize(background, (frame_size.width, frame_size.height)) background = cv2.GaussianBlur(background, (49, 49), 0) if background.shape[2] == 3: background = cv2.cvtColor(background, cv2.COLOR_BGR2BGRA) background_color_overlay = image_utils.get_bgra(background_color, background_opacity) - overlay = np.full((height, width, 4), background_color_overlay, dtype=np.uint8) + overlay = np.full((frame_size.height, frame_size.width, 4), background_color_overlay, dtype=np.uint8) alpha_overlay = overlay[:, :, 3] / 255.0 alpha_background = background[:, :, 3] / 255.0 for c in range(0, 3): @@ -159,48 +200,21 @@ def create_music_video( tmp_background_image_path = tempfile.mktemp(suffix=".png") cv2.imwrite(tmp_background_image_path, background_bgr) - audio_visualizer_color_and_opacity = image_utils.get_rgba(audio_visualizer_color, audio_visualizer_opacity) - - # Add audio visualizer - custom_drawing = None - if visualizer_drawing is not None and visualizer_drawing != "": - custom_drawing = cv2.imread(visualizer_drawing, cv2.IMREAD_UNCHANGED) - if custom_drawing.shape[2] == 3: - custom_drawing = cv2.cvtColor(custom_drawing, cv2.COLOR_BGR2RGBA) - else: - custom_drawing = cv2.cvtColor(custom_drawing, cv2.COLOR_BGRA2RGBA) - if generate_audio_visualizer: - print("Generating audio visualizer...") - frequency_loudness, times = analyze_audio(audio_path, fps) - frame_cache = np.zeros((height, width, 4), dtype=np.uint8) - - total_iterations = len(times) - start_time = time.time() - vis = visualizer.Visualizer(width=width, height=height, base_size=audio_visualizer_min_size, - max_size=audio_visualizer_max_size, color=audio_visualizer_color_and_opacity, - dot_count=(audio_visualizer_num_rows, audio_visualizer_num_columns)) - vis.initialize_static_values() - temp_visualizer_images_dir = tempfile.mkdtemp() - os.makedirs(temp_visualizer_images_dir, exist_ok=True) - for i, time_point in enumerate(times): - if time_point > audio_clip.duration: - break - frame = frame_cache.copy() - vis.draw_visualizer(frame, frequency_loudness[i], custom_drawing=custom_drawing) - frame_np = np.array(frame) - frame_np = cv2.cvtColor(frame_np, cv2.COLOR_RGBA2BGRA) - frame_filename = f'{temp_visualizer_images_dir}/frame_{i:05d}.png' - cv2.imwrite(frame_filename, frame_np) - - progress.print_progress_bar(i, total_iterations, start_time=start_time) - progress.print_progress_bar(total_iterations, total_iterations, end='\n', start_time=start_time) + temp_visualizer_images_dir = _audio_visualizer_generator(frame_size, audio_path, audio_clip.duration, fps, + dataclasses.RGBOpacity(audio_visualizer_color, + audio_visualizer_opacity), + dataclasses.MinMax(audio_visualizer_min_size, + audio_visualizer_max_size), + dataclasses.RowCol(audio_visualizer_num_rows, + audio_visualizer_num_columns), + visualizer_drawing=visualizer_drawing) # Add text font_families = font_manager.get_fonts() - text_canvas = np.zeros((height, width, 4), dtype=np.uint8) + text_canvas = np.zeros((frame_size.height, frame_size.width, 4), dtype=np.uint8) - song_pos = (20, int(height * 0.925)) + song_pos = (20, int(frame_size.height * 0.925)) text_canvas, (_, song_height) = image_processing.add_text(text_canvas, song, song_pos, font_families[song_font_type][song_font_style], font_size=song_font_size, @@ -381,8 +395,6 @@ def process(image_path: str, artist: str, song: str, return None font_families = font_manager.get_fonts() - aff = font_families[af_family][af_style] - sff = font_families[sf_family][sf_style] img = image_processing.read_image_from_disk(image_path) @@ -390,7 +402,7 @@ def process(image_path: str, artist: str, song: str, top_center = (0, int(img.shape[0] * 0.13)) bottom_center = (0, int(img.shape[0] * 0.87)) - img, (_, _) = image_processing.add_text(img, artist, top_center, aff, + img, (_, _) = image_processing.add_text(img, artist, top_center, font_families[af_family][af_style], font_size=afs, font_color=image_utils.get_rgba(afc, afo), show_shadow=ase, @@ -400,7 +412,7 @@ def process(image_path: str, artist: str, song: str, background_color=image_utils.get_rgba(abc, abo), x_center=True) - img, (_, _) = image_processing.add_text(img, song, bottom_center, sff, font_size=sfs, + img, (_, _) = image_processing.add_text(img, song, bottom_center, font_families[sf_family][sf_style], font_size=sfs, font_color=image_utils.get_rgba(sfc, sfo), max_width=15, show_shadow=sse, shadow_radius=ssr, diff --git a/utils/dataclasses.py b/utils/dataclasses.py index 655e7dd..77c3531 100644 --- a/utils/dataclasses.py +++ b/utils/dataclasses.py @@ -1,8 +1,9 @@ """ This module contains dataclasses and type aliases used in the project. """ -from typing import Union +from typing import Union, Optional from dataclasses import dataclass +import gradio as gr @dataclass @@ -25,4 +26,83 @@ class Position: y: int +@dataclass +class Size: + """ + A dataclass representing a size on a 2d plane. + """ + width: int + height: int + + +@dataclass +class OpenAIGradioComponents: + """ + A dataclass representing the components of the OpenAI API. + """ + api_key: gr.Textbox + api_text_model: Optional[gr.Dropdown] + api_image_model: Optional[gr.Dropdown] + + +@dataclass +class RGBOpacity: + """ + A dataclass representing an RGB color with an opacity value. + """ + color: tuple[int, int, int] + opacity: int + + +@dataclass +class MinMax: + """ + A dataclass representing a minimum and maximum value. + """ + min: int + max: int + + +@dataclass +class RowCol: + """ + A dataclass representing a row and column. + """ + row: int + col: int + + +@dataclass +class FontGradioComponents: + """ + A dataclass representing the components of the font editor. + """ + family: gr.Dropdown + style: gr.Dropdown + color: gr.ColorPicker + opacity: gr.Slider + size: gr.Number + + +@dataclass +class FontDropShadowGradioComponents: + """ + A dataclass representing the components of the drop shadow editor. + """ + enabled: gr.Checkbox + color: gr.ColorPicker + opacity: gr.Slider + radius: gr.Number + + +@dataclass +class FontBackgroundGradioComponents: + """ + A dataclass representing the components of the background editor. + """ + enabled: gr.Checkbox + color: gr.ColorPicker + opacity: gr.Slider + + RGBColor = Union[str, tuple[int, int, int]] diff --git a/utils/gradio.py b/utils/gradio.py index f086830..54353ff 100644 --- a/utils/gradio.py +++ b/utils/gradio.py @@ -2,7 +2,7 @@ This module contains utility functions for rendering widely-used Gradio components. """ import gradio as gr -from utils import font_manager +from utils import font_manager, dataclasses def render_color_opacity_picker(default_name_label: str = "Font") -> tuple[gr.ColorPicker, gr.Slider]: @@ -33,8 +33,7 @@ def bind_checkbox_to_visibility(checkbox: gr.Checkbox, group: gr.Group): ) -def render_font_picker(default_font_size: int = 55) \ - -> tuple[gr.Dropdown, gr.Dropdown, gr.ColorPicker, gr.Slider, gr.Number]: +def render_font_picker(default_font_size: int = 55) -> dataclasses.FontGradioComponents: """ Renders a font picker with the appropriate styling. :param default_font_size: The default font size to use. @@ -62,7 +61,7 @@ def update_font_styles(selected_font_family): font_color, font_opacity = render_color_opacity_picker() font_size = gr.Number(default_font_size, label="Font Size", interactive=True) - return font_family, font_style, font_color, font_opacity, font_size + return dataclasses.FontGradioComponents(font_family, font_style, font_color, font_opacity, font_size) def render_tool_description(description: str): diff --git a/utils/visualizer.py b/utils/visualizer.py index 88619ba..8e53285 100644 --- a/utils/visualizer.py +++ b/utils/visualizer.py @@ -4,6 +4,7 @@ from typing import Dict, Optional import numpy as np import cv2 +from utils import dataclasses class Visualizer: @@ -11,13 +12,11 @@ class Visualizer: This class is used to draw the visualizer on the canvas. Will be replaced with a more general solution in the future to allow for more customization. """ - def __init__(self, base_size, max_size, color, dot_count, width, height): - self.base_size = base_size - self.max_size = max_size + def __init__(self, dot_size: dataclasses.MinMax, color, dot_count: dataclasses.RowCol, size: dataclasses.Size): + self.dot_size = dot_size self.color = color self.dot_count = dot_count - self.width = width - self.height = height + self.size = size self.cached_dot_positions = None self.cached_resized_drawing = {} @@ -27,13 +26,13 @@ def initialize_static_values(self: "Visualizer") -> None: :return: None. """ # Calculate and store dot positions - x_positions = (self.width / self.dot_count[0]) * np.arange(self.dot_count[0]) + ( - self.width / self.dot_count[0] / 2) - y_positions = (self.height / self.dot_count[1]) * np.arange(self.dot_count[1]) + ( - self.height / self.dot_count[1] / 2) + x_positions = (self.size.width / self.dot_count.col) * np.arange(self.dot_count.col) + ( + self.size.width / self.dot_count.col / 2) + y_positions = (self.size.height / self.dot_count.row) * np.arange(self.dot_count.row) + ( + self.size.height / self.dot_count.row / 2) grid_x, grid_y = np.meshgrid(x_positions, y_positions) - self.cached_dot_positions = [(grid_x[y, x], grid_y[y, x]) for x in range(self.dot_count[0]) for y in - range(self.dot_count[1])] + self.cached_dot_positions = [(grid_x[y, x], grid_y[y, x]) for x in range(self.dot_count.col) for y in + range(self.dot_count.row)] def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict[float, float], custom_drawing: Optional[np.ndarray] = None) -> None: @@ -44,15 +43,11 @@ def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict :param custom_drawing: A custom drawing to use instead of the default circle. :return: None. """ - # Calculate and store dot positions - dot_count_x = self.dot_count[0] - dot_count_y = self.dot_count[1] - # Precompute log frequencies freq_keys = np.array(list(frequency_data.keys())) start_freq = freq_keys[freq_keys > 0][0] if freq_keys[freq_keys > 0].size > 0 else 1.0 end_freq = freq_keys[-1] - log_freqs = np.logspace(np.log10(start_freq), np.log10(end_freq), dot_count_x) + log_freqs = np.logspace(np.log10(start_freq), np.log10(end_freq), self.dot_count.col) # Find the maximum and minimum loudness values, ignoring -80 dB freq_bands = np.array([frequency_data[key] for key in freq_keys if key > 0]) # Ignore 0 Hz @@ -62,9 +57,9 @@ def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict # Precompute loudness values loudness_values = {} - for x in range(dot_count_x): + for x in range(self.dot_count.col): lower_bound = log_freqs[x] - upper_bound = log_freqs[x + 1] if x < dot_count_x - 1 else end_freq + 1 + upper_bound = log_freqs[x + 1] if x < self.dot_count.col - 1 else end_freq + 1 band_freqs = [freq for freq in freq_keys if lower_bound <= freq < upper_bound] if not band_freqs: closest_freq = min(freq_keys, key=lambda f, lb=lower_bound: abs(f - lb)) @@ -76,15 +71,15 @@ def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict cached_dot_sizes = {} for i, (pos_x, pos_y) in enumerate(self.cached_dot_positions): - column = i // dot_count_y # Ensure the correct column is computed + column = i // self.dot_count.row # Ensure the correct column is computed if column not in cached_dot_sizes: avg_loudness = loudness_values[column] # Scale the loudness to the dot size scaled_loudness = (avg_loudness - min_loudness) / ( max_loudness - min_loudness) if max_loudness != min_loudness else 0 - dot_size = self.base_size + scaled_loudness * (self.max_size - self.base_size) - dot_size = min(max(dot_size, self.base_size), self.max_size) + dot_size = self.dot_size.min + scaled_loudness * (self.dot_size.max - self.dot_size.min) + dot_size = min(max(dot_size, self.dot_size.min), self.dot_size.max) cached_dot_sizes[column] = dot_size else: From 04a622b36de81be339d76bd7aa060f2a2649b722 Mon Sep 17 00:00:00 2001 From: Fabian Gonzalez Date: Tue, 16 Jan 2024 19:42:27 -0500 Subject: [PATCH 13/13] 100% Linter --- .github/workflows/pylint.yml | 4 +- .pylintrc | 1 + processing/image.py | 42 +++-- processing/video.py | 6 +- requirements.txt | 4 +- ui/listicles/interface.py | 68 +++++--- ui/listicles/utils.py | 1 + ui/music/interface.py | 116 ++++++++----- ui/music/utils.py | 317 +++++++++++++++++++---------------- utils/dataclasses.py | 68 +++++++- utils/gradio.py | 11 +- utils/image.py | 44 +++++ utils/visualizer.py | 131 ++++++++++----- 13 files changed, 530 insertions(+), 283 deletions(-) diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index aa949d3..e6dde2a 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -1,11 +1,11 @@ name: Python linter on: [push] jobs: - build: + lint: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.10", "3.11"] + python-version: ["3.11"] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} diff --git a/.pylintrc b/.pylintrc index 2083c87..b390428 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,6 +1,7 @@ [MAIN] max-line-length=120 max-attributes=10 +max-locals=20 # Currently, this is added because gradio Inputs don't support passing tuples/dataclasses/etc. as arguments, meaning I # can't shorten some methods that take a lot of arguments. disable=too-many-arguments diff --git a/processing/image.py b/processing/image.py index eb97bc1..2c35dfb 100644 --- a/processing/image.py +++ b/processing/image.py @@ -35,9 +35,7 @@ def render_image_output() -> (gr.Image, gr.Textbox, gr.Dropdown, gr.Button): return image_output, image_name, image_suffix, save_image_button -def render_text_editor_parameters(name: str) -> (dataclasses.FontGradioComponents, - dataclasses.FontDropShadowGradioComponents, - dataclasses.FontBackgroundGradioComponents): +def render_text_editor_parameters(name: str) -> dataclasses.FontDisplayGradioComponents: """ Renders the text editor parameters. :param name: The name of the text editor parameters. This is used as the label for the accordion. @@ -49,18 +47,21 @@ def render_text_editor_parameters(name: str) -> (dataclasses.FontGradioComponent with gr.Group(): drop_shadow_enabled = gr.Checkbox(False, label="Enable Drop Shadow", interactive=True) with gr.Group(visible=drop_shadow_enabled.value) as additional_options: - drop_shadow_color, drop_shadow_opacity = gru.render_color_opacity_picker() + drop_shadow_color_opacity = gru.render_color_opacity_picker() drop_shadow_radius = gr.Number(0, label="Shadow Radius") gru.bind_checkbox_to_visibility(drop_shadow_enabled, additional_options) with gr.Group(): background_enabled = gr.Checkbox(False, label="Enable Background", interactive=True) with gr.Group(visible=background_enabled.value) as additional_options: - background_color, background_opacity = gru.render_color_opacity_picker() + background_color_opacity = gru.render_color_opacity_picker() gru.bind_checkbox_to_visibility(background_enabled, additional_options) - return (font_data, dataclasses.FontDropShadowGradioComponents(drop_shadow_enabled, drop_shadow_color, - drop_shadow_opacity, drop_shadow_radius), - dataclasses.FontBackgroundGradioComponents(background_enabled, background_color, background_opacity)) + drop_shadow_data = dataclasses.FontDropShadowGradioComponents(drop_shadow_enabled, drop_shadow_color_opacity.color, + drop_shadow_color_opacity.opacity, drop_shadow_radius) + background_data = dataclasses.FontBackgroundGradioComponents(background_enabled, background_color_opacity.color, + background_color_opacity.opacity) + + return dataclasses.FontDisplayGradioComponents(font_data, drop_shadow_data, background_data) def add_background(image_pil: Image, draw: ImageDraw, position: tuple[int, int], text: str, font: ImageFont, @@ -216,7 +217,23 @@ def save_image_to_disk(image_path: str, name: Optional[str] = None, save_dir: st return save_dir -# Function to add text to an image with custom font, size, and wrapping +def _get_lines(text: str, max_width: Optional[int] = None) -> list[str]: + """ + Gets the lines of text from a string. + :param text: The text to get the lines from. + :param max_width: The maximum width of the text before wrapping. + :return: A list of lines. + """ + if max_width: # Prepare for text wrapping if max_width is provided + wrapped_text = textwrap.fill(text, width=max_width) + else: + wrapped_text = text + + return wrapped_text.split('\n') + + +# A lot of the reported variables come from the parameters +# pylint: disable=too-many-locals def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[int, int], font_path: str, font_size: int, font_color: Tuple[int, int, int, int] = (255, 255, 255, 255), shadow_color: Tuple[int, int, int, int] = (255, 255, 255, 255), @@ -256,12 +273,7 @@ def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[i font = ImageFont.truetype(font_path, font_size) draw = ImageDraw.Draw(txt_layer) - if max_width: # Prepare for text wrapping if max_width is provided - wrapped_text = textwrap.fill(text, width=max_width) - else: - wrapped_text = text - - lines = wrapped_text.split('\n') + lines = _get_lines(text, max_width) y_offset = 0 # max_line_width = 0 # Keep track of the widest line diff --git a/processing/video.py b/processing/video.py index 3d19e59..cbb4036 100644 --- a/processing/video.py +++ b/processing/video.py @@ -8,13 +8,13 @@ from typing import Optional, Literal import gradio as gr from moviepy.editor import VideoFileClip -from utils import path_handler +from utils import path_handler, dataclasses VIDEO_FOLDER = "videos" default_path = os.path.join(path_handler.get_default_path(), VIDEO_FOLDER) -def render_video_output() -> (gr.Video, gr.Textbox, gr.Dropdown, gr.Button): +def render_video_output() -> dataclasses.VideoOutputGradioComponents: """ Creates and returns a set of Gradio interface components for video output. @@ -32,7 +32,7 @@ def render_video_output() -> (gr.Video, gr.Textbox, gr.Dropdown, gr.Button): video_suffix = gr.Dropdown([".mp4", ".mov"], value=".mp4", label="File Type", allow_custom_value=False) save_video_button = gr.Button("Save To Disk", variant="primary") - return video_output, video_name, video_suffix, save_video_button + return dataclasses.VideoOutputGradioComponents(video_output, video_name, video_suffix, save_video_button) def save_video_to_disk(video_path: str, name: Optional[str] = None, video_suffix: Literal[".mp4", ".mov"] = ".mp4", diff --git a/requirements.txt b/requirements.txt index 7922ce1..7614d6e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ gradio~=4.12.0 inflect~=7.0.0 openai~=1.6.1 -numpy~=1.23.5 -Pillow~=8.4.0 +numpy~=1.26.3 +Pillow~=10.2.0 opencv-python-headless~=4.8.1.78 fonttools~=4.47.0 moviepy~=1.0.3 diff --git a/ui/listicles/interface.py b/ui/listicles/interface.py index bb15613..87d386a 100644 --- a/ui/listicles/interface.py +++ b/ui/listicles/interface.py @@ -81,11 +81,11 @@ def set_json(json_file): with gr.Column(scale=3): gr.Markdown("# Parameters") with gr.Row(equal_height=False): - name_font, name_shadow, name_background = image_processing.render_text_editor_parameters("Name") - desc_font, desc_shadow, desc_background = image_processing.render_text_editor_parameters("Description") + name_font_display = image_processing.render_text_editor_parameters("Name") + desc_font_display = image_processing.render_text_editor_parameters("Description") with gr.Row(equal_height=False): - asc_font, asc_shadow, asc_background = image_processing.render_text_editor_parameters("Association") - rate_font, rate_shadow, rate_background = image_processing.render_text_editor_parameters("Rating") + asc_font_display = image_processing.render_text_editor_parameters("Association") + rate_font_display = image_processing.render_text_editor_parameters("Rating") with gr.Column(scale=1): gr.Markdown("# Output") @@ -99,26 +99,46 @@ def set_json(json_file): save_button.click(image_processing.save_images_to_disk, inputs=[output_preview, image_type], outputs=[]) process_button.click(listicle_utils.process, inputs=[input_batch_images, input_batch_json, - name_font.family, name_font.style, name_font.size, - name_font.color, name_font.opacity, name_shadow.enabled, - name_shadow.color, name_shadow.opacity, name_shadow.radius, - name_background.enabled, name_background.color, - name_background.opacity, - desc_font.family, desc_font.style, desc_font.size, - desc_font.color, desc_font.opacity, desc_shadow.enabled, - desc_shadow.color, desc_shadow.opacity, desc_shadow.radius, - desc_background.enabled, desc_background.color, - desc_background.opacity, - asc_font.family, asc_font.style, asc_font.size, - asc_font.color, asc_font.opacity, asc_shadow.enabled, - asc_shadow.color, asc_shadow.opacity, asc_shadow.radius, - asc_background.enabled, asc_background.color, - asc_background.opacity, - rate_font.family, rate_font.style, rate_font.size, - rate_font.color, rate_font.opacity, rate_shadow.enabled, - rate_shadow.color, rate_shadow.opacity, rate_shadow.radius, - rate_background.enabled, rate_background.color, - rate_background.opacity, + name_font_display.font.family, name_font_display.font.style, + name_font_display.font.size, name_font_display.font.color, + name_font_display.font.opacity, + name_font_display.drop_shadow.enabled, + name_font_display.drop_shadow.color, + name_font_display.drop_shadow.opacity, + name_font_display.drop_shadow.radius, + name_font_display.background.enabled, + name_font_display.background.color, + name_font_display.background.opacity, + desc_font_display.font.family, desc_font_display.font.style, + desc_font_display.font.size, desc_font_display.font.color, + desc_font_display.font.opacity, + desc_font_display.drop_shadow.enabled, + desc_font_display.drop_shadow.color, + desc_font_display.drop_shadow.opacity, + desc_font_display.drop_shadow.radius, + desc_font_display.background.enabled, + desc_font_display.background.color, + desc_font_display.background.opacity, + asc_font_display.font.family, asc_font_display.font.style, + asc_font_display.font.size, asc_font_display.font.color, + asc_font_display.font.opacity, + asc_font_display.drop_shadow.enabled, + asc_font_display.drop_shadow.color, + asc_font_display.drop_shadow.opacity, + asc_font_display.drop_shadow.radius, + asc_font_display.background.enabled, + asc_font_display.background.color, + asc_font_display.background.opacity, + rate_font_display.font.family, rate_font_display.font.style, + rate_font_display.font.size, rate_font_display.font.color, + rate_font_display.font.opacity, + rate_font_display.drop_shadow.enabled, + rate_font_display.drop_shadow.color, + rate_font_display.drop_shadow.opacity, + rate_font_display.drop_shadow.radius, + rate_font_display.background.enabled, + rate_font_display.background.color, + rate_font_display.background.opacity, ], outputs=[output_preview]) return input_batch_images, input_batch_json diff --git a/ui/listicles/utils.py b/ui/listicles/utils.py index 747c6c4..01a7898 100644 --- a/ui/listicles/utils.py +++ b/ui/listicles/utils.py @@ -11,6 +11,7 @@ import api.chatgpt as chatgpt_api +# pylint: disable=too-many-locals def process(image_files: list[Any], json_data: str, nf_family: str, nf_style: str, nfs: int, nfc: dataclasses.RGBColor, nfo: int, nse: bool, nsc: dataclasses.RGBColor, nso: int, nsr, nbe: bool, nbc: dataclasses.RGBColor, nbo: int, diff --git a/ui/music/interface.py b/ui/music/interface.py index 8808022..c6e199b 100644 --- a/ui/music/interface.py +++ b/ui/music/interface.py @@ -7,6 +7,7 @@ import processing.video as video_processing import processing.image as image_processing import ui.components.openai as openai_components +from utils import dataclasses def render_music_section() -> None: @@ -71,7 +72,7 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image): display component for displaying the cover image before processing, and an image display component for displaying the cover image after processing. """ - with (gr.Column()): + with gr.Column(): gr.Markdown("## Input") with gr.Group(): input_image = gr.Image(sources=["upload"], label="Cover Image (png)", type="filepath", @@ -81,13 +82,11 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image): with gr.Row(equal_height=False): with gr.Group(): artist_name = gr.Textbox(label="Artist Name", lines=1, max_lines=1, scale=1) - artist_font, artist_shadow, artist_background = image_processing.render_text_editor_parameters( - "Artist Text Parameters") + artist_font_display = image_processing.render_text_editor_parameters("Artist Text Parameters") with gr.Group(): song_name = gr.Textbox(label="Song Title", lines=1, max_lines=1, scale=2) - song_font, song_shadow, song_background = \ - image_processing.render_text_editor_parameters("Song Text Parameters") + song_font_display = image_processing.render_text_editor_parameters("Song Text Parameters") process_button = gr.Button("Process", variant="primary") @@ -98,14 +97,20 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image): send_to_create_video_button = gr.Button("Send Image to 'Create Music Video'", variant="secondary") process_button.click(process, inputs=[input_image, artist_name, song_name, - artist_font.family, artist_font.style, artist_font.size, artist_font.color, - artist_font.opacity, artist_shadow.enabled, artist_shadow.color, - artist_shadow.opacity, artist_shadow.radius, artist_background.enabled, - artist_background.color, artist_background.opacity, song_font.family, - song_font.style, song_font.size, song_font.color, song_font.opacity, - song_shadow.enabled, song_shadow.color, song_shadow.opacity, - song_shadow.radius, song_background.enabled, song_background.color, - song_background.opacity], + artist_font_display.font.family, artist_font_display.font.style, + artist_font_display.font.size, artist_font_display.font.color, + artist_font_display.font.opacity, artist_font_display.drop_shadow.enabled, + artist_font_display.drop_shadow.color, + artist_font_display.drop_shadow.opacity, + artist_font_display.drop_shadow.radius, + artist_font_display.background.enabled, + artist_font_display.background.color, artist_font_display.background.opacity, + song_font_display.font.family, song_font_display.font.style, + song_font_display.font.size, song_font_display.font.color, + song_font_display.font.opacity, song_font_display.drop_shadow.enabled, + song_font_display.drop_shadow.color, song_font_display.drop_shadow.opacity, + song_font_display.drop_shadow.radius, song_font_display.background.enabled, + song_font_display.background.color, song_font_display.background.opacity], outputs=[image_output]) save_image_button.click(image_processing.save_image_to_disk, inputs=[image_output, image_name, image_suffix], outputs=[]) @@ -124,14 +129,13 @@ def render_music_video_creation() -> gr.Image: show_share_button=False, show_download_button=False, scale=2, image_mode="RGBA") audio_filepath = gr.File(label="Audio", file_types=["audio"], scale=1, height=100) with gr.Column(): - background_color, background_opacity = gru.render_color_opacity_picker(default_name_label="Background") + background_color_opacity = gru.render_color_opacity_picker(default_name_label="Background") with gr.Group(): artist_name = gr.Textbox(label="Artist Name", lines=1, max_lines=1, scale=1) - artist_font, artist_shadow, artist_background = \ - image_processing.render_text_editor_parameters("Text Parameters") + artist_font_display = image_processing.render_text_editor_parameters("Text Parameters") with gr.Group(): song_title = gr.Textbox(label="Song Title", lines=1, max_lines=1, scale=2) - song_font, song_shadow, song_background = image_processing.render_text_editor_parameters("Text Parameters") + song_font_display = image_processing.render_text_editor_parameters("Text Parameters") with gr.Column(): # Defaulting to 1. It's a still image, but may expand by adding some effects (grain, and not sure what else) fps = gr.Number(value=1, label="FPS", minimum=1, maximum=144) @@ -140,48 +144,70 @@ def render_music_video_creation() -> gr.Image: generate_audio_visualizer_button = gr.Checkbox(value=False, label="Generate Audio Visualizer", interactive=True) with gr.Group() as audio_visualizer_group: - audio_visualizer_color, audio_visualizer_opacity = \ - gru.render_color_opacity_picker("Audio Visualizer") + audio_visualizer_color_opacity = gru.render_color_opacity_picker("Audio Visualizer") with gr.Group(): with gr.Row(): - audio_visualizer_num_rows = gr.Number(value=90, label="Number of Rows", - minimum=1, maximum=100) - audio_visualizer_num_columns = gr.Number(value=65, label="Number of Columns", - minimum=1, maximum=100) + audio_visualizer_amount = dataclasses.RowColGradioComponents( + row=gr.Number(value=90, label="Number of Rows", minimum=1, + maximum=100), + col=gr.Number(value=65, label="Number of Columns", minimum=1, + maximum=100) + ) with gr.Row(): - audio_visualizer_min_size = gr.Number(value=1, label="Minimum Size", minimum=1, maximum=100) - audio_visualizer_max_size = gr.Number(value=7, label="Maximum Size", minimum=1, maximum=200) - # Must be a PNG file to support transparency. The idea for this is more-so to have shapes that can - # be rendered for the visualizer, and ideally they have transparent backgrounds, so using RGBA. + audio_visualizer_dot_size = dataclasses.MinMaxGradioComponents( + min=gr.Number(value=1, label="Minimum Size", minimum=1, maximum=100), + max=gr.Number(value=7, label="Maximum Size", minimum=1, maximum=200) + ) audio_visualizer_drawing = gr.Image(label="Visualizer Drawing (png)", type="filepath", sources=["upload"], show_share_button=False, show_download_button=False, scale=2, height=150, image_mode="RGBA") + visualizer_overlay_checkbox = gr.Checkbox(value=False, label="Overlay Visualizer on One-Another", + info="If checked, alpha-blending will be applied, which " + "is noticeable on larger pngs where each drawing " + "overlaps. This is only important for transparent" + "images and is very slow. If the image is not " + "transparent, leave this unchecked.") gru.bind_checkbox_to_visibility(generate_audio_visualizer_button, audio_visualizer_group) create_video_button = gr.Button("Create Music Video", variant="primary") gr.Markdown("## Output") with gr.Group(): - video_output, video_name, video_suffix, save_video_button = video_processing.render_video_output() + video_data = video_processing.render_video_output() create_video_button.click(create_music_video, inputs=[cover_image, audio_filepath, fps, artist_name, - artist_font.family, artist_font.style, artist_font.size, - artist_font.color, artist_font.opacity, artist_shadow.enabled, - artist_shadow.color, artist_shadow.opacity, - artist_shadow.radius, artist_background.enabled, - artist_background.color, artist_background.opacity, - song_title, song_font.family, song_font.style, song_font.size, - song_font.color, song_font.opacity, song_shadow.enabled, - song_shadow.color, song_shadow.opacity, song_shadow.radius, - song_background.enabled, song_background.color, - song_background.opacity, background_color, background_opacity, - generate_audio_visualizer_button, audio_visualizer_color, - audio_visualizer_opacity, audio_visualizer_drawing, - audio_visualizer_num_rows, audio_visualizer_num_columns, - audio_visualizer_min_size, audio_visualizer_max_size], - outputs=[video_output]) - save_video_button.click(video_processing.save_video_to_disk, - inputs=[video_output, video_name, video_suffix], outputs=[]) + artist_font_display.font.family, + artist_font_display.font.style, artist_font_display.font.size, + artist_font_display.font.color, + artist_font_display.font.opacity, + artist_font_display.drop_shadow.enabled, + artist_font_display.drop_shadow.color, + artist_font_display.drop_shadow.opacity, + artist_font_display.drop_shadow.radius, + artist_font_display.background.enabled, + artist_font_display.background.color, + artist_font_display.background.opacity, + song_title, song_font_display.font.family, + song_font_display.font.style, song_font_display.font.size, + song_font_display.font.color, song_font_display.font.opacity, + song_font_display.drop_shadow.enabled, + song_font_display.drop_shadow.color, + song_font_display.drop_shadow.opacity, + song_font_display.drop_shadow.radius, + song_font_display.background.enabled, + song_font_display.background.color, + song_font_display.background.opacity, + background_color_opacity.color, + background_color_opacity.opacity, + generate_audio_visualizer_button, + audio_visualizer_color_opacity.color, + audio_visualizer_color_opacity.opacity, + audio_visualizer_drawing, visualizer_overlay_checkbox, + audio_visualizer_amount.row, audio_visualizer_amount.col, + audio_visualizer_dot_size.min, audio_visualizer_dot_size.max], + outputs=[video_data.video]) + video_data.save.click(video_processing.save_video_to_disk, inputs=[video_data.video, video_data.name, + video_data.suffix], outputs=[]) return cover_image diff --git a/ui/music/utils.py b/ui/music/utils.py index 10726b9..1ffdb90 100644 --- a/ui/music/utils.py +++ b/ui/music/utils.py @@ -7,13 +7,14 @@ import time import tempfile from typing import List, Dict, Optional +from dataclasses import dataclass import cv2 from moviepy.editor import AudioFileClip import numpy as np import librosa from api import chatgpt as chatgpt_api from processing import image as image_processing -from utils import progress, visualizer, font_manager, image as image_utils, dataclasses +from utils import progress, visualizer, font_manager, image as image_utils, dataclasses as local_dataclasses def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]], np.ndarray): @@ -45,16 +46,25 @@ def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]] return downsampled_frequency_loudness, downsampled_times -def _audio_visualizer_generator(frame_size: dataclasses.Size, audio_path: str, audio_length: int, fps: int, - audio_visualizer: dataclasses.RGBOpacity, dot_size: dataclasses.MinMax, - dot_count: dataclasses.RowCol, visualizer_drawing: Optional[str] = None) -> str: - print("Generating audio visualizer...") +@dataclass +class AudioVisualizerDotData: + """ + A dataclass representing the data for the audio visualizer's dots. + """ + size: local_dataclasses.MinMax + count: local_dataclasses.RowCol + color: local_dataclasses.RGBColor + opacity: int + visualizer_drawing: Optional[str] = None + visualizer_drawing_overlap: bool = False - audio_visualizer_color_and_opacity = image_utils.get_rgba(audio_visualizer.color, audio_visualizer.opacity) +def _audio_visualizer_generator(frame_size: local_dataclasses.Size, audio_path: str, audio_length: int, fps: int, + dot_data: AudioVisualizerDotData) -> str: + print("Generating audio visualizer...") custom_drawing = None - if visualizer_drawing is not None and visualizer_drawing != "": - custom_drawing = cv2.imread(visualizer_drawing, cv2.IMREAD_UNCHANGED) + if dot_data.visualizer_drawing is not None and dot_data.visualizer_drawing != "": + custom_drawing = cv2.imread(dot_data.visualizer_drawing, cv2.IMREAD_UNCHANGED) if custom_drawing.shape[2] == 3: custom_drawing = cv2.cvtColor(custom_drawing, cv2.COLOR_BGR2RGBA) else: @@ -65,9 +75,9 @@ def _audio_visualizer_generator(frame_size: dataclasses.Size, audio_path: str, a total_iterations = len(times) start_time = time.time() - vis = visualizer.Visualizer(size=dataclasses.Size(frame_size.width, frame_size.height), - dot_size=dot_size, color=audio_visualizer_color_and_opacity, - dot_count=dataclasses.RowCol(dot_count.row, dot_count.col)) + vis = visualizer.Visualizer(size=local_dataclasses.Size(frame_size.width, frame_size.height), + dot_size=dot_data.size, color=image_utils.get_rgba(dot_data.color, dot_data.opacity), + dot_count=local_dataclasses.RowCol(dot_data.count.row, dot_data.count.col)) vis.initialize_static_values() temp_visualizer_images_dir = tempfile.mkdtemp() os.makedirs(temp_visualizer_images_dir, exist_ok=True) @@ -75,7 +85,8 @@ def _audio_visualizer_generator(frame_size: dataclasses.Size, audio_path: str, a if time_point > audio_length: break frame = frame_cache.copy() - vis.draw_visualizer(frame, frequency_loudness[i], custom_drawing=custom_drawing) + vis.draw_visualizer(frame, frequency_loudness[i], custom_drawing=custom_drawing, + custom_drawing_overlap=dot_data.visualizer_drawing_overlap) frame_np = np.array(frame) frame_np = cv2.cvtColor(frame_np, cv2.COLOR_RGBA2BGRA) frame_filename = f'{temp_visualizer_images_dir}/frame_{i:05d}.png' @@ -87,23 +98,133 @@ def _audio_visualizer_generator(frame_size: dataclasses.Size, audio_path: str, a return temp_visualizer_images_dir -def create_music_video( +def _get_video_background(image_path: str, frame_size: local_dataclasses.Size, + background_overlay_color_opacity: local_dataclasses.RGBOpacity) -> np.ndarray: + """ + Gets the background for the video, which is a gaussian blurred version of the cover image stretched with a color + overlay. + :param image_path: The path to the image to use background. + :param frame_size: The size of the frame to use for the background. + :param background_overlay_color_opacity: The color and opacity to use for the background overlay. + :return: + """ + background = cv2.imread(image_path) + background = cv2.resize(background, (frame_size.width, frame_size.height)) + background = cv2.GaussianBlur(background, (49, 49), 0) + if background.shape[2] == 3: + background = cv2.cvtColor(background, cv2.COLOR_BGR2BGRA) + overlay = np.full((frame_size.height, frame_size.width, 4), + image_utils.get_bgra(background_overlay_color_opacity.color, + background_overlay_color_opacity.opacity), + dtype=np.uint8) + alpha_overlay = overlay[:, :, 3] / 255.0 + alpha_background = background[:, :, 3] / 255.0 + for c in range(0, 3): + background[:, :, c] = (alpha_overlay * overlay[:, :, c] + + alpha_background * (1 - alpha_overlay) * background[:, :, c]) + background[:, :, 3] = (alpha_overlay + alpha_background * (1 - alpha_overlay)) * 255 + return background + + +def _generate_final_video(background_image_path: str, visualizer_frames_dir: Optional[str], cover_image_path: str, + audio_path: str, fps: int) -> str: + """ + Generates the final video using the given parameters with ffmpeg. + :param background_image_path: The path to the background image to use for the video. + :param visualizer_frames_dir: The path to the directory containing the audio visualizer frames. + :param cover_image_path: The path to the cover image to use for the video. + :param audio_path: The path to the audio file to use for the video. + :param fps: The frames per second to use for the video. + :return: + """ + temp_final_video_path = tempfile.mktemp(suffix=".mp4") + + audio_clip = AudioFileClip(audio_path) + ffmpeg_commands = [ + "ffmpeg", "-y", + "-loop", "1", + "-i", background_image_path, + ] + + if visualizer_frames_dir is not None: + ffmpeg_commands.extend([ + "-framerate", str(fps), + "-i", f'{visualizer_frames_dir}/frame_%05d.png', + ]) + filter_complex = "[0][1]overlay=format=auto[bg];[bg][2]overlay=format=auto" + else: + filter_complex = "[0][1]overlay=format=auto" + + ffmpeg_commands.extend([ + "-framerate", str(fps), + "-i", cover_image_path, + "-i", audio_path, + "-filter_complex", filter_complex, + "-map", "3:a" if visualizer_frames_dir is not None else "2:a", + "-c:v", "libx264", + "-c:a", "aac", + "-strict", "experimental", + "-t", str(audio_clip.duration), + "-hide_banner", + "-framerate", str(fps), + '-pix_fmt', 'yuv420p', + temp_final_video_path + ]) + print("Generating final video...") + + duration_regex = re.compile(r"Duration: (\d\d):(\d\d):(\d\d)\.\d\d") + time_regex = re.compile(r"time=(\d\d):(\d\d):(\d\d)\.\d\d") + + ffmpeg_start_time = time.time() + with subprocess.Popen(ffmpeg_commands, stderr=subprocess.PIPE, text=True) as ffmpeg_process: + for line in ffmpeg_process.stderr: + # Extract total duration of the video + duration_match = duration_regex.search(line) + if duration_match: + duration_match_groups = duration_match.groups() + curr_duration = local_dataclasses.Time( + hours=int(duration_match_groups[0]), + minutes=int(duration_match_groups[1]), + seconds=int(duration_match_groups[2]) + ) + + # Extract current time of encoding + time_match = time_regex.search(line) + if time_match and int(curr_duration) > 0: + time_match_groups = time_match.groups() + curr_time = local_dataclasses.Time( + hours=int(time_match_groups[0]), + minutes=int(time_match_groups[1]), + seconds=int(time_match_groups[2]) + ) + progress.print_progress_bar(int(curr_time), int(curr_duration), start_time=ffmpeg_start_time) + + progress.print_progress_bar(100, 100, end='\n', start_time=ffmpeg_start_time) + + return temp_final_video_path + + +def create_music_video( # pylint: disable=too-many-locals image_path: str, audio_path: str, fps: int, artist: str, artist_font_type: str, artist_font_style: str, artist_font_size: int, - artist_font_color: dataclasses.RGBColor, artist_font_opacity: int, artist_shadow_enabled: bool, - artist_shadow_color: dataclasses.RGBColor, artist_shadow_opacity: int, artist_shadow_radius: int, - artist_background_enabled: bool, artist_background_color: dataclasses.RGBColor, artist_background_opacity: int, - song: str, song_font_type: str, song_font_style: str, song_font_size: int, - song_font_color: dataclasses.RGBColor, song_font_opacity: int, song_shadow_enabled: bool, - song_shadow_color: dataclasses.RGBColor, song_shadow_opacity: int, song_shadow_radius: int, - song_background_enabled: bool, song_background_color: dataclasses.RGBColor, song_background_opacity: int, - background_color: dataclasses.RGBColor = (0, 0, 0), background_opacity: int = 66, - generate_audio_visualizer: bool = False, audio_visualizer_color: dataclasses.RGBColor = (255, 255, 255), + artist_font_color: local_dataclasses.RGBColor, artist_font_opacity: int, artist_shadow_enabled: bool, + artist_shadow_color: local_dataclasses.RGBColor, artist_shadow_opacity: int, artist_shadow_radius: int, + artist_background_enabled: bool, artist_background_color: local_dataclasses.RGBColor, + artist_background_opacity: int, song: str, song_font_type: str, song_font_style: str, song_font_size: int, + song_font_color: local_dataclasses.RGBColor, song_font_opacity: int, song_shadow_enabled: bool, + song_shadow_color: local_dataclasses.RGBColor, song_shadow_opacity: int, song_shadow_radius: int, + song_background_enabled: bool, song_background_color: local_dataclasses.RGBColor, song_background_opacity: int, + background_color: local_dataclasses.RGBColor = (0, 0, 0), background_opacity: int = 66, + generate_audio_visualizer: bool = False, audio_visualizer_color: local_dataclasses.RGBColor = (255, 255, 255), audio_visualizer_opacity: int = 100, visualizer_drawing: Optional[str] = None, - audio_visualizer_num_rows: int = 90, audio_visualizer_num_columns: int = 65, audio_visualizer_min_size: int = 1, + visualizer_drawing_overlap: bool = False, audio_visualizer_num_rows: int = 90, + audio_visualizer_num_columns: int = 65, audio_visualizer_min_size: int = 1, audio_visualizer_max_size: int = 7) -> Optional[str]: """ Creates a music video using the given parameters. + :param visualizer_drawing_overlap: Whether to overlap the visualizer drawings onto one-another with alpha-blending. + This is only noticeable on images with transparency and is a slow process, so if your visualizer drawings are + not transparent, it is recommended to set this to False. :param image_path: The path to the image to use as the cover + background for the video. :param audio_path: The path to the audio file to use for the video. :param fps: The frames per second to use for the video. @@ -145,29 +266,22 @@ def create_music_video( :param audio_visualizer_max_size: The maximum size to use for the audio visualizer's drawings (peak loudness). :return: The path to the generated video, or None if there was an error. """ - if image_path is None: - print("No cover image for the video.") - return None - if audio_path is None: - print("No audio to add to the video.") + if image_path is None or audio_path is None: + print("No cover image and/or audio for the video.") return None # Could probably expand to 4k, but unnecessary for this type of music video # Maybe in a future iteration it could be worth it - frame_size = dataclasses.Size(1920, 1080) + frame_size = local_dataclasses.Size(1920, 1080) # Set up cover - cover = cv2.imread(image_path, cv2.IMREAD_UNCHANGED) - if cover.shape[2] == 3: - cover = cv2.cvtColor(cover, cv2.COLOR_BGR2RGBA) - else: - cover = cv2.cvtColor(cover, cv2.COLOR_BGRA2RGBA) + cover = image_utils.open_image_as_rgba(image_path) # Create canvas with 4 channels (RGBA) canvas = np.zeros((frame_size.height, frame_size.width, 4), dtype=np.uint8) # Calculate dimensions for resizing the cover to fit within the canvas while maintaining its aspect ratio - cover_size = dataclasses.Size(cover.shape[1], cover.shape[0]) + cover_size = local_dataclasses.Size(cover.shape[1], cover.shape[0]) resize_factor = min(frame_size.width / cover_size.width, frame_size.height / cover_size.height) resize_factor *= (7 / 10) cover_size.width = int(cover_size.width * resize_factor) @@ -183,32 +297,28 @@ def create_music_video( audio_clip = AudioFileClip(audio_path) # Add video background - background = cv2.imread(image_path) - background = cv2.resize(background, (frame_size.width, frame_size.height)) - background = cv2.GaussianBlur(background, (49, 49), 0) - if background.shape[2] == 3: - background = cv2.cvtColor(background, cv2.COLOR_BGR2BGRA) - background_color_overlay = image_utils.get_bgra(background_color, background_opacity) - overlay = np.full((frame_size.height, frame_size.width, 4), background_color_overlay, dtype=np.uint8) - alpha_overlay = overlay[:, :, 3] / 255.0 - alpha_background = background[:, :, 3] / 255.0 - for c in range(0, 3): - background[:, :, c] = (alpha_overlay * overlay[:, :, c] + - alpha_background * (1 - alpha_overlay) * background[:, :, c]) - background[:, :, 3] = (alpha_overlay + alpha_background * (1 - alpha_overlay)) * 255 + background = _get_video_background(image_path, frame_size, + local_dataclasses.RGBOpacity(background_color, background_opacity)) background_bgr = cv2.cvtColor(background, cv2.COLOR_BGRA2BGR) tmp_background_image_path = tempfile.mktemp(suffix=".png") cv2.imwrite(tmp_background_image_path, background_bgr) + temp_visualizer_images_dir = None if generate_audio_visualizer: temp_visualizer_images_dir = _audio_visualizer_generator(frame_size, audio_path, audio_clip.duration, fps, - dataclasses.RGBOpacity(audio_visualizer_color, - audio_visualizer_opacity), - dataclasses.MinMax(audio_visualizer_min_size, - audio_visualizer_max_size), - dataclasses.RowCol(audio_visualizer_num_rows, - audio_visualizer_num_columns), - visualizer_drawing=visualizer_drawing) + AudioVisualizerDotData( + size=local_dataclasses.MinMax( + audio_visualizer_min_size, + audio_visualizer_max_size), + color=audio_visualizer_color, + opacity=audio_visualizer_opacity, + count=local_dataclasses.RowCol( + audio_visualizer_num_rows, + audio_visualizer_num_columns), + visualizer_drawing=visualizer_drawing, + visualizer_drawing_overlap=\ + visualizer_drawing_overlap) + ) # Add text font_families = font_manager.get_fonts() @@ -240,96 +350,19 @@ def create_music_video( artist_shadow_opacity), show_background=artist_background_enabled, background_color=image_utils.get_rgba( - artist_background_color, artist_background_opacity)) - - text_np = np.array(text_canvas) - np_canvas = np.array(canvas) - # Normalize the alpha channels - alpha_text = text_np[:, :, 3] / 255.0 - alpha_canvas = np_canvas[:, :, 3] / 255.0 - alpha_final = alpha_text + alpha_canvas * (1 - alpha_text) - - canvas_final = np.zeros_like(np_canvas) - # alpha blend - for c in range(3): # Loop over color (non-alpha) channels - canvas_final[:, :, c] = (alpha_text * text_np[:, :, c] + alpha_canvas * (1 - alpha_text) * - np_canvas[:, :, c]) / alpha_final - canvas_final[:, :, 3] = alpha_final * 255 - canvas_final[:, :, :3][alpha_final == 0] = 0 + artist_background_color, artist_background_opacity)) + canvas_final = image_utils.blend_alphas(np.array(text_canvas), np.array(canvas)) temp_canvas_image_path = tempfile.mktemp(suffix=".png") # Convert to BGR for OpenCV canvas_final = cv2.cvtColor(canvas_final, cv2.COLOR_RGBA2BGRA) cv2.imwrite(temp_canvas_image_path, canvas_final) - temp_final_video_path = tempfile.mktemp(suffix=".mp4") - - # set up the background video commands - ffmpeg_commands = [ - "ffmpeg", "-y", - "-loop", "1", - "-i", tmp_background_image_path, - ] + temp_final_video_path = _generate_final_video(tmp_background_image_path, temp_visualizer_images_dir, + temp_canvas_image_path, audio_path, fps) - if generate_audio_visualizer: - ffmpeg_commands.extend([ - "-framerate", str(fps), - "-i", f'{temp_visualizer_images_dir}/frame_%05d.png', - ]) - filter_complex = "[0][1]overlay=format=auto[bg];[bg][2]overlay=format=auto" - audio_input_map = "3:a" - else: - filter_complex = "[0][1]overlay=format=auto" - audio_input_map = "2:a" - - ffmpeg_commands.extend([ - "-framerate", str(fps), - "-i", temp_canvas_image_path, - "-i", audio_path, - "-filter_complex", filter_complex, - "-map", audio_input_map, - "-c:v", "libx264", - "-c:a", "aac", - "-strict", "experimental", - "-t", str(audio_clip.duration), - "-hide_banner", - "-framerate", str(fps), - '-pix_fmt', 'yuv420p', - temp_final_video_path - ]) - print("Generating final video...") - ffmpeg_process = subprocess.Popen(ffmpeg_commands, stderr=subprocess.PIPE, text=True) - - duration_regex = re.compile(r"Duration: (\d\d):(\d\d):(\d\d)\.\d\d") - time_regex = re.compile(r"time=(\d\d):(\d\d):(\d\d)\.\d\d") - total_duration_in_seconds = 0 - - ffmpeg_start_time = time.time() - while True: - line = ffmpeg_process.stderr.readline() - if not line: - break - - # Extract total duration of the video - duration_match = duration_regex.search(line) - if duration_match: - hours, minutes, seconds = map(int, duration_match.groups()) - total_duration_in_seconds = hours * 3600 + minutes * 60 + seconds - - # Extract current time of encoding - time_match = time_regex.search(line) - if time_match and total_duration_in_seconds > 0: - hours, minutes, seconds = map(int, time_match.groups()) - current_time = hours * 3600 + minutes * 60 + seconds - progress.print_progress_bar(current_time, total_duration_in_seconds, start_time=ffmpeg_start_time) - - ffmpeg_process.wait() - if ffmpeg_process.returncode != 0: - raise subprocess.CalledProcessError(ffmpeg_process.returncode, ffmpeg_commands) - progress.print_progress_bar(100, 100, end='\n', start_time=ffmpeg_start_time) - print("Done generating final video!\n") # clean up the original frames - if generate_audio_visualizer: + if temp_visualizer_images_dir is not None: for file in os.listdir(temp_visualizer_images_dir): os.remove(os.path.join(temp_visualizer_images_dir, file)) os.rmdir(temp_visualizer_images_dir) @@ -353,11 +386,13 @@ def generate_cover_image(api_key: str, api_model: str, prompt: str) -> Optional[ return chatgpt_api.url_to_gradio_image_name(image_url) +# pylint: disable=too-many-locals def process(image_path: str, artist: str, song: str, - af_family: str, af_style: str, afs: int, afc: dataclasses.RGBColor, afo: int, ase: bool, - asc: dataclasses.RGBColor, aso: int, asr: Optional[int], abe: bool, abc: dataclasses.RGBColor, abo: int, - sf_family: str, sf_style: str, sfs: int, sfc: dataclasses.RGBColor, sfo: int, sse: bool, - ssc: dataclasses.RGBColor, sso: int, ssr: Optional[int], sbe: bool, sbc: dataclasses.RGBColor, sbo: int) \ + af_family: str, af_style: str, afs: int, afc: local_dataclasses.RGBColor, afo: int, ase: bool, + asc: local_dataclasses.RGBColor, aso: int, asr: Optional[int], abe: bool, abc: local_dataclasses.RGBColor, + abo: int, sf_family: str, sf_style: str, sfs: int, sfc: local_dataclasses.RGBColor, sfo: int, sse: bool, + ssc: local_dataclasses.RGBColor, sso: int, ssr: Optional[int], sbe: bool, sbc: local_dataclasses.RGBColor, + sbo: int) \ -> Optional[np.ndarray]: """ Processes the image at the given path (by adding the requested text) and returns the processed image. diff --git a/utils/dataclasses.py b/utils/dataclasses.py index 77c3531..0755858 100644 --- a/utils/dataclasses.py +++ b/utils/dataclasses.py @@ -59,8 +59,17 @@ class MinMax: """ A dataclass representing a minimum and maximum value. """ - min: int - max: int + min: Union[int, float] + max: Union[int, float] + + +@dataclass +class MinMaxGradioComponents: + """ + A dataclass representing the components of a minimum and maximum value editor. + """ + min: gr.Number + max: gr.Number @dataclass @@ -72,6 +81,15 @@ class RowCol: col: int +@dataclass +class RowColGradioComponents: + """ + A dataclass representing the components of a row and column editor. + """ + row: gr.Number + col: gr.Number + + @dataclass class FontGradioComponents: """ @@ -105,4 +123,50 @@ class FontBackgroundGradioComponents: opacity: gr.Slider +@dataclass +class FontDisplayGradioComponents: + """ + A dataclass representing the components of how to display the font. + """ + font: FontGradioComponents + drop_shadow: FontDropShadowGradioComponents + background: FontBackgroundGradioComponents + + +@dataclass +class ColorOpacityGradioComponents: + """ + A dataclass representing the components of the color and opacity editor. + """ + color: gr.ColorPicker + opacity: gr.Slider + + +@dataclass +class VideoOutputGradioComponents: + """ + A dataclass representing the components of the video output. + """ + video: gr.Video + name: gr.Textbox + suffix: gr.Dropdown + save: gr.Button + + +@dataclass +class Time: + """ + A dataclass representing a time. + """ + hours: int + minutes: int + seconds: int + + def __int__(self) -> int: + """ + Returns the time in seconds. + """ + return self.hours * 3600 + self.minutes * 60 + self.seconds + + RGBColor = Union[str, tuple[int, int, int]] diff --git a/utils/gradio.py b/utils/gradio.py index 54353ff..fae5a02 100644 --- a/utils/gradio.py +++ b/utils/gradio.py @@ -5,18 +5,18 @@ from utils import font_manager, dataclasses -def render_color_opacity_picker(default_name_label: str = "Font") -> tuple[gr.ColorPicker, gr.Slider]: +def render_color_opacity_picker(default_name_label: str = "Font") -> dataclasses.ColorOpacityGradioComponents: """ Renders a color picker with the appropriate styling. :param default_name_label: The default name label to use. - :return: A tuple containing the color and opacity components. + :return: A class containing the color and opacity components. """ with gr.Group(): with gr.Row(): color = gr.ColorPicker(label=f"{default_name_label} Color", scale=1, interactive=True) opacity = gr.Slider(0, 100, value=100, label="Opacity", scale=2, interactive=True) - return color, opacity + return dataclasses.ColorOpacityGradioComponents(color, opacity) def bind_checkbox_to_visibility(checkbox: gr.Checkbox, group: gr.Group): @@ -58,10 +58,11 @@ def update_font_styles(selected_font_family): font_family.change(update_font_styles, inputs=[font_family], outputs=[font_style]) with gr.Group(): - font_color, font_opacity = render_color_opacity_picker() + font_color_opacity = render_color_opacity_picker() font_size = gr.Number(default_font_size, label="Font Size", interactive=True) - return dataclasses.FontGradioComponents(font_family, font_style, font_color, font_opacity, font_size) + return dataclasses.FontGradioComponents(font_family, font_style, font_color_opacity.color, + font_color_opacity.opacity, font_size) def render_tool_description(description: str): diff --git a/utils/image.py b/utils/image.py index b070a04..42f59e4 100644 --- a/utils/image.py +++ b/utils/image.py @@ -2,6 +2,8 @@ This file contains functions for image processing. """ from typing import Tuple +import cv2 +import numpy as np from utils import dataclasses @@ -43,3 +45,45 @@ def get_bgra(color: dataclasses.RGBColor, opacity: int) -> Tuple[int, int, int, color = tuple(int(color[i:i + 2], 16) for i in (0, 2, 4)) return color[2], color[1], color[0], get_alpha_from_opacity(opacity) + + +def open_image_as_rgba(image_path: str) -> np.ndarray: + """ + Opens an image as RGBA. + :param image_path: The path to the image. + :return: The image as RGBA. + """ + img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED) + if img.shape[2] == 3: + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA) + else: + img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA) + + return img + + +def blend_alphas(a: np.ndarray, b: np.ndarray) -> np.ndarray: + """ + Blends two images together using alpha blending. + :param a: The first image. + :param b: The second image. + :return: The blended image. + """ + if a.shape != b.shape: + raise ValueError("both images must have the same shape to blend alphas") + if a.shape[2] != 4 or b.shape[2] != 4: + raise ValueError("both images must have 4 channels to blend alphas") + + alpha_text = a[:, :, 3] / 255.0 + alpha_canvas = b[:, :, 3] / 255.0 + alpha_final = alpha_text + alpha_canvas * (1 - alpha_text) + + final = np.zeros_like(b) + # alpha blend + for c in range(3): # Loop over color (non-alpha) channels + final[:, :, c] = (alpha_text * a[:, :, c] + alpha_canvas * (1 - alpha_text) * + b[:, :, c]) / alpha_final + final[:, :, 3] = alpha_final * 255 + final[:, :, :3][alpha_final == 0] = 0 + + return final diff --git a/utils/visualizer.py b/utils/visualizer.py index 8e53285..2beb839 100644 --- a/utils/visualizer.py +++ b/utils/visualizer.py @@ -4,7 +4,7 @@ from typing import Dict, Optional import numpy as np import cv2 -from utils import dataclasses +from utils import dataclasses, image as image_utils class Visualizer: @@ -12,6 +12,7 @@ class Visualizer: This class is used to draw the visualizer on the canvas. Will be replaced with a more general solution in the future to allow for more customization. """ + def __init__(self, dot_size: dataclasses.MinMax, color, dot_count: dataclasses.RowCol, size: dataclasses.Size): self.dot_size = dot_size self.color = color @@ -34,14 +35,11 @@ def initialize_static_values(self: "Visualizer") -> None: self.cached_dot_positions = [(grid_x[y, x], grid_y[y, x]) for x in range(self.dot_count.col) for y in range(self.dot_count.row)] - def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict[float, float], - custom_drawing: Optional[np.ndarray] = None) -> None: + def _get_loudness(self, frequency_data: Dict[float, float]) -> (dataclasses.MinMax, Dict[int, int]): """ - Draws the visualizer on the canvas (a single frame). - :param canvas: The canvas to draw on. + Calculates the loudness values for each column. :param frequency_data: The frequency data to use for drawing which correlates to the loudness + frequency. - :param custom_drawing: A custom drawing to use instead of the default circle. - :return: None. + :return: A tuple containing the loudness min/max and the loudness values for each column. """ # Precompute log frequencies freq_keys = np.array(list(frequency_data.keys())) @@ -51,66 +49,111 @@ def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict # Find the maximum and minimum loudness values, ignoring -80 dB freq_bands = np.array([frequency_data[key] for key in freq_keys if key > 0]) # Ignore 0 Hz - max_loudness = np.max(freq_bands) filtered_loudness = freq_bands[freq_bands > -80] - min_loudness = np.min(filtered_loudness) if filtered_loudness.size > 0 else -80 + loudness_min_max = dataclasses.MinMax(np.min(filtered_loudness) if filtered_loudness.size > 0 else -80, + np.max(freq_bands)) # Precompute loudness values loudness_values = {} for x in range(self.dot_count.col): - lower_bound = log_freqs[x] - upper_bound = log_freqs[x + 1] if x < self.dot_count.col - 1 else end_freq + 1 - band_freqs = [freq for freq in freq_keys if lower_bound <= freq < upper_bound] + bounds = { + "lower": log_freqs[x], + "upper": log_freqs[x + 1] if x < self.dot_count.col - 1 else end_freq + 1 + } + band_freqs = [freq for freq in freq_keys if bounds.get("lower") <= freq < bounds.get("upper")] if not band_freqs: - closest_freq = min(freq_keys, key=lambda f, lb=lower_bound: abs(f - lb)) + closest_freq = min(freq_keys, key=lambda f, lb=bounds.get("lower"): abs(f - lb)) band_freqs = [closest_freq] band_loudness = [frequency_data[freq] for freq in band_freqs] avg_loudness = np.mean(band_loudness) if band_loudness else -80 loudness_values[x] = avg_loudness + return loudness_min_max, loudness_values + + def _calculate_dot_size(self: "Visualizer", column: int, loudness: dataclasses.MinMax, + loudness_values: Dict[int, int]) -> int: + """ + Calculates the dot size for a given column. + :param loudness: The loudness min/max. + :param loudness_values: The loudness values for each column. + :return: The dot size. + """ + # Scale the loudness to the dot size + scaled_loudness = (loudness_values[column] - loudness.min) / ( + loudness.max - loudness.min) if loudness.max != loudness.min else 0 + dot_size = self.dot_size.min + scaled_loudness * (self.dot_size.max - self.dot_size.min) + return min(max(dot_size, self.dot_size.min), self.dot_size.max) + + def _draw_custom_drawing(self: "Visualizer", canvas: np.ndarray, start_pos: dataclasses.Position, + end_pos: dataclasses.Position, img_start_pos: dataclasses.Position, + img_end_pos: dataclasses.Position, dot_size: int, + custom_drawing_overlap: bool) -> np.ndarray: + """ + Draws the custom drawing on the canvas. + :param canvas: The canvas to draw on. + :param start_pos: The start position on the canvas. + :param end_pos: The end position on the canvas. + :param img_start_pos: The start position on the resized image. + :param img_end_pos: The end position on the resized image. + :param dot_size: The dot size. + :param custom_drawing_overlap: Whether overlapped custom drawings should alpha blend. + :return: The canvas with the custom drawing drawn on it. + """ + drawing_slice = self.cached_resized_drawing[dot_size][img_start_pos.y:img_end_pos.y, + img_start_pos.x:img_end_pos.x] + + if custom_drawing_overlap: + canvas_slice = canvas[start_pos.y:end_pos.y, start_pos.x:end_pos.x] + return image_utils.blend_alphas(canvas_slice, drawing_slice) + + return drawing_slice + + def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict[float, float], + custom_drawing: Optional[np.ndarray] = None, custom_drawing_overlap: bool = False) -> None: + """ + Draws the visualizer on the canvas (a single frame). + :param custom_drawing_overlap: Whether to overlap the custom drawing should alpha blend when overlapping. + :param canvas: The canvas to draw on. + :param frequency_data: The frequency data to use for drawing which correlates to the loudness + frequency. + :param custom_drawing: A custom drawing to use instead of the default circle. + :return: None. + """ + loudness, loudness_values = self._get_loudness(frequency_data) + cached_dot_sizes = {} for i, (pos_x, pos_y) in enumerate(self.cached_dot_positions): column = i // self.dot_count.row # Ensure the correct column is computed if column not in cached_dot_sizes: - avg_loudness = loudness_values[column] - # Scale the loudness to the dot size - scaled_loudness = (avg_loudness - min_loudness) / ( - max_loudness - min_loudness) if max_loudness != min_loudness else 0 - dot_size = self.dot_size.min + scaled_loudness * (self.dot_size.max - self.dot_size.min) - dot_size = min(max(dot_size, self.dot_size.min), self.dot_size.max) - - cached_dot_sizes[column] = dot_size - else: - dot_size = cached_dot_sizes[column] + cached_dot_sizes[column] = self._calculate_dot_size(column, loudness, loudness_values) - # Convert dot size to integer and calculate the center position - dot_size = int(dot_size) - center = (int(pos_x), int(pos_y)) + dot_size = int(cached_dot_sizes[column]) + center_pos = dataclasses.Position(int(pos_x), int(pos_y)) if custom_drawing is not None: if dot_size not in self.cached_resized_drawing: - self.cached_resized_drawing[dot_size] = cv2.resize(custom_drawing, (dot_size, dot_size), - interpolation=cv2.INTER_LANCZOS4) - resized_custom_drawing = self.cached_resized_drawing[dot_size] + if dot_size == 0: + self.cached_resized_drawing[dot_size] = np.zeros((1, 1, 4), dtype=np.uint8) + else: + self.cached_resized_drawing[dot_size] = cv2.resize(custom_drawing, (dot_size, dot_size), + interpolation=cv2.INTER_LANCZOS4) - center_x, center_y = int(pos_x), int(pos_y) half_dot_size = dot_size // 2 - # Calculate bounds on the canvas - start_x = max(center_x - half_dot_size, 0) - end_x = min(center_x + half_dot_size, canvas.shape[1]) - start_y = max(center_y - half_dot_size, 0) - end_y = min(center_y + half_dot_size, canvas.shape[0]) + start_pos = dataclasses.Position(max(center_pos.x - half_dot_size, 0), + max(center_pos.y - half_dot_size, 0)) + end_pos = dataclasses.Position(min(center_pos.x + half_dot_size, canvas.shape[1]), min( + center_pos.y + half_dot_size, canvas.shape[0])) # Calculate corresponding bounds on the resized image - img_start_x = max(half_dot_size - (center_x - start_x), 0) - img_end_x = img_start_x + (end_x - start_x) - img_start_y = max(half_dot_size - (center_y - start_y), 0) - img_end_y = img_start_y + (end_y - start_y) - - # Place the image slice onto the canvas - canvas[start_y:end_y, start_x:end_x] = resized_custom_drawing[img_start_y:img_end_y, - img_start_x:img_end_x] + img_start_pos = dataclasses.Position(max(half_dot_size - (center_pos.x - start_pos.x), 0), + max(half_dot_size - (center_pos.y - start_pos.y), 0)) + img_end_pos = dataclasses.Position(img_start_pos.x + (end_pos.x - start_pos.x), + img_start_pos.y + (end_pos.y - start_pos.y)) + + canvas[start_pos.y:end_pos.y, start_pos.x:end_pos.x] = self._draw_custom_drawing(canvas, start_pos, + end_pos, img_start_pos, + img_end_pos, dot_size, + custom_drawing_overlap) else: - cv2.circle(canvas, center, dot_size // 2, self.color, -1) + cv2.circle(canvas, (center_pos.x, center_pos.y), dot_size // 2, self.color, -1)