From ca2f6fcaee765076133871af9f61c9fa8dffc697 Mon Sep 17 00:00:00 2001
From: Fabian Gonzalez <fabiangonz98@gmail.com>
Date: Wed, 10 Jan 2024 23:05:55 -0500
Subject: [PATCH 01/13] Add pylint workflow

---
 .github/workflows/pylint.yml | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 .github/workflows/pylint.yml

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
new file mode 100644
index 0000000..5730b67
--- /dev/null
+++ b/.github/workflows/pylint.yml
@@ -0,0 +1,21 @@
+name: Python linter
+on: [push]
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8", "3.9", "3.10"]
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install pylint
+    - name: Analysing the code with pylint
+      run: |
+        pylint $(git ls-files '*.py')

From fb276eba57a4337597b515b08cf222958603e3d3 Mon Sep 17 00:00:00 2001
From: Fabian Gonzalez <fabiangonz98@gmail.com>
Date: Thu, 11 Jan 2024 00:27:16 -0500
Subject: [PATCH 02/13] Begin fixing all linter issues

---
 .pylintrc             |   2 +
 api/chatgpt.py        |  82 +++++++++++++++++----
 main.py               |   4 +-
 processing/image.py   | 166 ++++++++++++++++++++++++++++++++++--------
 processing/video.py   |  51 ++++++++++---
 ui/listicles/utils.py |   3 +-
 ui/music/interface.py |   6 +-
 ui/music/utils.py     |  58 +++++++++------
 ui/ui.py              |   5 +-
 utils/gradio.py       |   3 +-
 10 files changed, 296 insertions(+), 84 deletions(-)
 create mode 100644 .pylintrc

diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 0000000..d047969
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,2 @@
+[MAIN]
+max-line-length=120
\ No newline at end of file
diff --git a/api/chatgpt.py b/api/chatgpt.py
index 39d37ea..8aeac86 100644
--- a/api/chatgpt.py
+++ b/api/chatgpt.py
@@ -1,12 +1,25 @@
+"""
+This module provides utility functions for interacting with the OpenAI API and Gradio interfaces.
+"""
 import openai
 from openai import OpenAI
 import os
 import gradio as gr
+from typing import Optional
 
 
-# The actual gradio image name (+ orig_name) is  <>.png, but the tmp file created and sent to
-# batch is based on the portion after the last `/` in the url without the '%' (looks url encoded).
-def url_to_gradio_image_name(url):
+def url_to_gradio_image_name(url: str) -> str:
+    """
+    Converts an OpenAI generated URL to a Gradio-compatible image name.
+
+    This function extracts the portion of the URL after the last forward slash ('/'). It removes special characters
+    often found in URLs such as '%', '&', and '='. The resulting string is truncated to a maximum length of 200
+    characters to prevent issues with file name length limitations.
+
+    :param url: The URL containing the image name.
+    :returns: A cleaned and truncated version of the image name suitable for use with Gradio.
+    """
+
     # Get the part after the final `/` in the URL
     image_name = url.rsplit('/', 1)[-1]
 
@@ -21,7 +34,18 @@ def url_to_gradio_image_name(url):
     return image_name
 
 
-def get_openai_client(api_key):
+def get_openai_client(api_key: Optional[str] = None) -> Optional[OpenAI]:
+    """
+    Creates and returns an OpenAI client object configured with the given API key.
+
+    This function initializes an OpenAI client using the provided API key. If the provided API key is None or empty,
+    it attempts to retrieve the API key from the environment variable 'OPENAI_API_KEY'. If the environment variable is
+    also not set, it raises a warning and returns None.
+
+    :param api_key: The API key for OpenAI. If not provided, the function will try to use the API key from the
+    environment variable.
+    :returns: An instance of the OpenAI client configured with the API key, or None if no valid API key is provided.
+    """
     if api_key is None or api_key == "":
         api_key = os.environ.get("OPENAI_API_KEY")
     if api_key is None or api_key == "":
@@ -31,7 +55,24 @@ def get_openai_client(api_key):
     return OpenAI(api_key=api_key)
 
 
-def get_chat_response(client: openai.Client, api_model: str, role: str, prompt: str, context: list = None, as_json: bool= False):
+def get_chat_response(client: openai.Client, api_model: str, role: str, prompt: str, context: Optional[list] = None,
+                      as_json: bool = False) -> Optional[str]:
+    """
+    Generates a chat response using the OpenAI API based on the provided parameters.
+
+    This function sends a message to the OpenAI API using the specified client and model. It constructs a message with
+    a role (system or user) and the provided prompt. It also optionally includes previous chat context. The response
+    can be returned in JSON format if specified.
+
+    :param client: The OpenAI client to use for making the API call.
+    :param api_model: The model to use for the chat completion (e.g., 'davinci-codex').
+    :param role: The role the AI should assume.
+    :param prompt: The message prompt to send to the chat model.
+    :param context: A list of previous chat messages to provide context. Default is None.
+    :param as_json: A flag to specify if the response should be in JSON format. Default is False.
+
+    :returns: The chat response as a string, or None if there was an error or no response generated.
+    """
     message = [
         {"role": "system",
          "content": role},
@@ -59,29 +100,42 @@ def get_chat_response(client: openai.Client, api_model: str, role: str, prompt:
             messages=message,
         )
 
-
-
     response = response.choices[0]
-
     if response.finish_reason != "stop":
         if response.finish_reason == "length":
             gr.Warning(
-                f"finish_reason: {response.finish_reason}. The maximum number of tokens specified in the request was reached.")
-            return None, None, None
+                f"finish_reason: {response.finish_reason}. The maximum number of tokens specified in the request was "
+                f"reached.")
+            return None
         elif response.finish_reason == "content_filter":
             gr.Warning(
-                f"finish_reason: {response.finish_reason}. The content was omitted due to a flag from OpenAI's content filters.")
-            return None, None, None
+                f"finish_reason: {response.finish_reason}. The content was omitted due to a flag from OpenAI's content "
+                f"filters.")
+            return None
 
     content = response.message.content
     if content is None or content == "":
         gr.Warning("No content was generated.")
-        return None, None
+        return None
 
     return content
 
 
-def get_image_response(client: openai.Client, api_model: str, prompt: str, portrait=False):
+def get_image_response(client: openai.Client, api_model: str, prompt: str, portrait=False) -> Optional[str]:
+    """
+    Generates an image response using the OpenAI API based on a given prompt and specified parameters.
+
+    This function requests the OpenAI API to generate an image based on the provided text prompt. It allows specification
+    of the model to use and whether the generated image should be in a portrait format. For 'dall-e-3' model,
+    it supports high-definition (HD) quality image generation.
+
+    :param client: The OpenAI client to use for making the API call.
+    :param api_model: The model to use for image generation (e.g., 'dall-e-3').
+    :param prompt: The text prompt based on which the image is generated.
+    :param portrait: A flag to specify if the generated image should be in portrait orientation. Default is False.
+
+    :returns: The URL of the generated image, or None if no image was generated or if there was an error.
+    """
     image_size = "1024x1024"
     if portrait and api_model == "dall-e-3":
         image_size = "1024x1792"
diff --git a/main.py b/main.py
index 43d422b..1ebec69 100755
--- a/main.py
+++ b/main.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*
 
-import ui.ui as ui
-import utils.font_manager as font_manager
+from ui import ui
+from utils import font_manager
 
 if __name__ == '__main__':
     # Initialize fonts, and svg file grabber at start
diff --git a/processing/image.py b/processing/image.py
index 52c9667..9e793ba 100644
--- a/processing/image.py
+++ b/processing/image.py
@@ -1,3 +1,6 @@
+"""
+Module for handling image-related operations in a Gradio interface.
+"""
 import PIL
 from PIL import ImageFont, ImageDraw, Image, ImageFilter
 import numpy as np
@@ -8,14 +11,29 @@
 import os
 import cv2
 from pathlib import Path
-import utils.path_handler as path_handler
+from utils import path_handler
 import utils.gradio as gru
+from typing import Tuple, Optional, Union, Any, Literal
 
 image_folder = "images"
 default_path = os.path.join(path_handler.get_default_path(), image_folder)
 
 
-def render_image_output():
+def render_image_output() -> (gr.Image, gr.Textbox, gr.Dropdown, gr.Button):
+    """
+    Creates and returns a set of Gradio interface components for image output.
+
+    This function sets up an image display component along with associated controls for naming the image file,
+    selecting its file type, and a button for saving the image to disk. It leverages Gradio's UI components to
+    create an interactive and user-friendly interface for image handling.
+
+    Returns:
+    - Tuple[gr.Image, gr.Textbox, gr.Dropdown, gr.Button]: A tuple containing Gradio UI components:
+        - gr.Image: An image display component for showing image output.
+        - gr.Textbox: A textbox for inputting the name of the image file.
+        - gr.Dropdown: A dropdown menu for selecting the image file type.
+        - gr.Button: A button that triggers the action to save the image to disk.
+    """
     image_output = gr.Image(elem_classes=["single-image-output"],
                             label="Image Output", interactive=False,
                             show_download_button=False, type="filepath")
@@ -28,7 +46,23 @@ def render_image_output():
     return image_output, image_name, image_suffix, save_image_button
 
 
-def render_text_editor_parameters(name):
+def render_text_editor_parameters(name: str) -> ((gr.Dropdown, gr.Dropdown, gr.Number, gr.ColorPicker, gr.Slider),
+                                                 (gr.Checkbox, gr.ColorPicker, gr.Slider, gr.Number),
+                                                 (gr.Checkbox, gr.ColorPicker, gr.Slider)):
+    """
+    Creates and returns a set of Gradio interface components for text editor parameters.
+
+    This function sets up a set of Gradio UI components for configuring the text editor parameters. It includes
+    controls for font family, font style, font size, font color, font opacity, drop shadow, drop shadow color,
+    drop shadow opacity, drop shadow radius, background, background color, and background opacity.
+
+    :param name: The name of the text editor parameters section.
+    :return: A tuple of tuples containing Gradio UI components: A tuple containing Gradio UI
+            components for configuring the font family, font style, font size, font color, and font opacity. A tuple
+            containing Gradio UI components for configuring the drop shadow, drop shadow color, drop shadow opacity,
+            and drop shadow radius. A tuple containing Gradio UI components for configuring the background, background
+            color, and background opacity.
+    """
     with gr.Accordion(label=name):
         with gr.Column():
             font_family, font_style, font_color, font_opacity, font_size = gru.render_font_picker()
@@ -49,7 +83,23 @@ def render_text_editor_parameters(name):
             (background_checkbox, background_color, background_opacity))
 
 
-def add_background(image_pil, draw, position, text, font, padding=(15, 5), fill_color=(0, 0, 0, 255), border_radius=0):
+def add_background(image_pil: PIL.Image, draw: PIL.ImageDraw, position: Tuple[int, int], text: str, font: PIL.ImageFont,
+                   padding: Tuple[int, int] = (15, 5), fill_color: Tuple[int, int, int, int] = (0, 0, 0, 255),
+                   border_radius: int = 0) -> (Tuple[int, int], Tuple[int, int]):
+    """
+    Adds a background to text on an image.
+
+    :param image_pil: The image to get the size of for text placement.
+    :param draw: The image draw object to use for drawing the background.
+    :param position: The position of the text on the image.
+    :param text: The text to add a background to.
+    :param font: The font used for the text.
+    :param padding: The padding to add between the text and the background.
+    :param fill_color: The RGBA color to fill the background with.
+    :param border_radius: The radius of the border.
+
+    :return: A tuple containing the position of the text and the size of the background.
+    """
     # Calculate width and height of text with padding
     bbox = draw.textbbox((0, 0), text, font=font)
     text_width = bbox[2] - bbox[0]
@@ -67,8 +117,19 @@ def add_background(image_pil, draw, position, text, font, padding=(15, 5), fill_
     return (x1 + padding[0], y1 + padding[1]), (x2 - x1, y2 - y1)
 
 
-def add_blurred_shadow(image_pil, text, position, font, shadow_color=(0, 0, 0), shadow_offset=(0, 0),
-                       blur_radius=1):
+def add_blurred_shadow(image_pil: PIL.Image, text: str, position: Tuple[int, int], font: PIL.ImageFont,
+                       shadow_color: Tuple[int, int, int, int] = (0, 0, 0), shadow_offset: Tuple[int, int] = (0, 0),
+                       blur_radius: int = 1):
+    """
+    Adds a blurred shadow or highlight to text on an image.
+    :param image_pil: The image to place the shadow on.
+    :param text: The text to add a shadow to.
+    :param position: The position of the text on the image.
+    :param font: The font used for the text.
+    :param shadow_color: The RGBA color of the shadow.
+    :param shadow_offset: The offset of the shadow.
+    :param blur_radius: The radius of the blur.
+    """
     # Create an image for the shadow
     shadow_image = Image.new('RGBA', image_pil.size, (0, 0, 0, 0))
     shadow_draw = ImageDraw.Draw(shadow_image)
@@ -84,7 +145,15 @@ def add_blurred_shadow(image_pil, text, position, font, shadow_color=(0, 0, 0),
     image_pil.paste(blurred_shadow, (0, 0), blurred_shadow)
 
 
-def read_image_from_disk(filepath, size=None):
+def read_image_from_disk(filepath: str, size: Optional[Tuple[int, int]] = None) \
+        -> Union[cv2.Mat, np.ndarray[Any, np.dtype[np.generic]], np.ndarray]:
+    """
+    Reads an image from disk and returns it as a NumPy array for use with PIL.
+    :param filepath: The path to the image file.
+    :param size: The size to resize the image to.
+
+    :return: A NumPy array containing the image.
+    """
     img = cv2.imread(filepath, cv2.IMREAD_UNCHANGED)
     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA)  # Convert to RGBA for PIL usage
     if size:
@@ -92,20 +161,27 @@ def read_image_from_disk(filepath, size=None):
     return img
 
 
-# This assumes the images are from a gallery, which is why it checks for the 'root' attribute.
-def save_images_to_disk(images, image_type, dir=default_path):
+def save_images_to_disk(images: gr.data_classes.GradioRootModel, image_type: Literal["png", "jpg", "webp"],
+                        save_dir: str = default_path) -> Optional[str]:
+    """
+    Saves a list of images to disk.
+    :param images: The list of images to save from Gradio's Gallery.
+    :param image_type: The type of image to save.
+    :param save_dir: The directory to save the images to.
+    :return: The directory the images were saved to.
+    """
     if not images or len(images.root) == 0:
         gr.Warning("No images to save.")
         return
 
-    base_dir = Path(dir) if Path(dir).is_absolute() else Path("/").joinpath(dir)
+    base_dir = Path(save_dir) if Path(save_dir).is_absolute() else Path("/").joinpath(save_dir)
 
     date = datetime.now().strftime("%m%d%Y")
     unique_id = uuid.uuid4()
-    dir = f"{base_dir}/{date}/{unique_id}"
+    save_dir = f"{base_dir}/{date}/{unique_id}"
 
-    if not os.path.exists(dir):
-        os.makedirs(dir)
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
 
     for index, image_container in enumerate(images.root):
         image = image_container.image
@@ -114,25 +190,35 @@ def save_images_to_disk(images, image_type, dir=default_path):
             continue
 
         filename = f"{index}.{image_type}"
-        filepath = os.path.join(dir, filename)
+        filepath = os.path.join(save_dir, filename)
 
         img = cv2.imread(image.path, cv2.IMREAD_UNCHANGED)
         cv2.imwrite(filepath, img)
 
-    gr.Info(f"Saved generated images to {dir}.")
-    return dir
-
-
-def save_image_to_disk(image_path, name, image_suffix=".png", dir=default_path):
+    gr.Info(f"Saved generated images to {save_dir}.")
+    return save_dir
+
+
+def save_image_to_disk(image_path: str, name: Optional[str] = None,
+                       image_suffix: Literal[".png", ".jpg", ".webp"] = ".png", save_dir: str = default_path) \
+        -> Optional[str]:
+    """
+    Saves an image to disk.
+    :param image_path: The path to the image to save. (from a temporary directory from Gradio)
+    :param name: The name of the image file. If not provided, a generated name will be used.
+    :param image_suffix: The suffix of the image file denoting its type.
+    :param save_dir: The directory to save the image to.
+    :return: The directory the image was saved to.
+    """
     if image_path is None:
         gr.Warning("No image to save.")
-        return
+        return None
 
-    base_dir = Path(dir) if Path(dir).is_absolute() else Path("/").joinpath(dir)
+    base_dir = Path(save_dir) if Path(save_dir).is_absolute() else Path("/").joinpath(save_dir)
 
     date = datetime.now().strftime("%m%d%Y")
     unique_id = uuid.uuid4()
-    dir = f"{base_dir}/{date}/{unique_id}"
+    save_dir = f"{base_dir}/{date}/{unique_id}"
 
     if name is None or name == "":
         unique_id = uuid.uuid4()
@@ -142,21 +228,41 @@ def save_image_to_disk(image_path, name, image_suffix=".png", dir=default_path):
         name = Path(name).stem
         name = f"{name}{image_suffix}"
 
-    if not os.path.exists(dir):
-        os.makedirs(dir)
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
 
-    filepath = os.path.join(dir, name)
+    filepath = os.path.join(save_dir, name)
     img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
     cv2.imwrite(filepath, img)
 
-    gr.Info(f"Saved generated image to {dir}.")
-    return dir
+    gr.Info(f"Saved generated image to {save_dir}.")
+    return save_dir
 
 
 # Function to add text to an image with custom font, size, and wrapping
-def add_text(image, text, position, font_path, font_size, font_color=(255, 255, 255, 255), shadow_color=(255, 255, 255),
-             shadow_radius=None, max_width=None, show_background=False, show_shadow=False,
-             background_color=(0, 0, 0, 255), x_center=False):
+def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[int, int], font_path: str,
+             font_size: int, font_color: Tuple[int, int, int, int] = (255, 255, 255, 255),
+             shadow_color: Tuple[int, int, int, int] = (255, 255, 255, 255),
+             shadow_radius: Optional[int] = None, max_width: Optional[int] = None, show_background: bool = False,
+             show_shadow: bool = False, background_color: Tuple[int, int, int, int] = (0, 0, 0, 255),
+             x_center: bool = False) -> (np.ndarray, Tuple[int, int]):
+    """
+    Adds text to an image with custom font, size, and wrapping.
+    :param image: The image to add text to.
+    :param text: The text to add to the image.
+    :param position: The (x, y) position of the text on the image.
+    :param font_path: The path to the font to use.
+    :param font_size: The size of the font.
+    :param font_color: The color of the font.
+    :param shadow_color: The color of the shadow.
+    :param shadow_radius: The radius of the shadow.
+    :param max_width: The maximum width of the text before wrapping.
+    :param show_background: Whether to show a background behind the text.
+    :param show_shadow: Whether to show a shadow behind the text.
+    :param background_color: The color of the background.
+    :param x_center: Whether to center the text on the x-axis. This ignores the positional x parameter.
+    :return: A tuple containing the image with text added and the size of the text block.
+    """
     if not isinstance(position, tuple):
         raise TypeError("Position must be a 2-tuple.", type(position))
 
diff --git a/processing/video.py b/processing/video.py
index b972727..96f8663 100644
--- a/processing/video.py
+++ b/processing/video.py
@@ -1,16 +1,31 @@
+"""
+Module for handling video-related operations in a Gradio interface.
+"""
 import uuid
 from datetime import datetime
-import gradio as gr
 from pathlib import Path
 import os
+import gradio as gr
 from moviepy.editor import VideoFileClip
-import utils.path_handler as path_handler
+from utils import path_handler
+from typing import Optional, Literal
 
 video_folder = "videos"
 default_path = os.path.join(path_handler.get_default_path(), video_folder)
 
 
-def render_video_output():
+def render_video_output() -> (gr.Video, gr.Textbox, gr.Dropdown, gr.Button):
+    """
+    Creates and returns a set of Gradio interface components for video output.
+
+    This function sets up a video display component along with associated controls for naming the video file,
+    selecting its file type, and a button for saving the video to disk. It leverages Gradio's UI components to
+    create an interactive and user-friendly interface for video handling.
+
+    :returns: A tuple containing the following Gradio UI components: A video display component for showing video output,
+        a textbox for inputting the name of the video file, a dropdown menu for selecting the video file type, and a
+        button that triggers the action to save the video to disk.
+    """
     video_output = gr.Video(elem_classes=["video-output"], label="Video Output", interactive=False)
     with gr.Row():
         video_name = gr.Textbox(label="Name", lines=1, max_lines=1, scale=2)
@@ -20,14 +35,28 @@ def render_video_output():
     return video_output, video_name, video_suffix, save_video_button
 
 
-def save_video_to_disk(video, name, video_suffix=".mp4", dir=default_path):
-    if not video:
+def save_video_to_disk(video_path: str, name: Optional[str] = None, video_suffix: Literal[".mp4", ".mov"] = ".mp4",
+                       save_dir: str = default_path) -> None:
+    """
+    Saves a video file to the specified directory with a given name and file suffix.
+
+    This function handles saving a video file to disk. It constructs a file path using the provided directory,
+    current date, and a unique name or the specified name. It supports saving in either .mp4 or .mov format.
+    If no name is provided, it generates a unique identifier for the file name. The function creates the necessary
+    directory structure if it does not exist and then saves the video using moviepy.
+
+    :param video_path: The path to the video file to be saved.
+    :param name: The desired name for the saved video file. If not provided, a unique name is generated.
+    :param video_suffix: The file extension for the video. Defaults to ".mp4".
+    :param save_dir: The directory where the video will be saved. Defaults to the default path defined globally.
+    """
+    if not video_path or video_path == "":
         gr.Warning("No video to save.")
         return
 
-    base_dir = Path(dir) if Path(dir).is_absolute() else Path("/").joinpath(dir)
+    base_dir = Path(save_dir) if Path(save_dir).is_absolute() else Path("/").joinpath(save_dir)
     date = datetime.now().strftime("%m%d%Y")
-    dir = f"{base_dir}/{date}"
+    save_dir = f"{base_dir}/{date}"
 
     if name is None or name == "":
         unique_id = uuid.uuid4()
@@ -37,12 +66,12 @@ def save_video_to_disk(video, name, video_suffix=".mp4", dir=default_path):
         name = Path(name).stem
         name = f"{name}{video_suffix}"
 
-    video_clip = VideoFileClip(video)
+    video_clip = VideoFileClip(video_path)
 
-    if not os.path.exists(dir):
-        os.makedirs(dir)
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
 
-    video_fqn = os.path.join(dir, name)
+    video_fqn = os.path.join(save_dir, name)
     video_clip.write_videofile(video_fqn, codec="libx264", fps=video_clip.fps)
 
     gr.Info(f"Saved video to {video_fqn}.")
diff --git a/ui/listicles/utils.py b/ui/listicles/utils.py
index 5b75969..dbea344 100644
--- a/ui/listicles/utils.py
+++ b/ui/listicles/utils.py
@@ -188,7 +188,8 @@ def generate_listicle(api_key, api_text_model, api_image_model, number_of_items,
         if association is not None and association != "":  # Add association field if provided
             json_format += ", association: <string>"
         json_format += "}"
-        message = f"Format the listicle into JSON. For the items, store as a list named 'items' with the content format: {json_format}."
+        message = (f"Format the listicle into JSON. For the items, store as a list named 'items' with the content "
+                   f"format: {json_format}.")
         if rating_type is not None and rating_type != "":
             message += (f"Include a top-level field `rating_type: <string>` with what the rating "
                         f"represents.")
diff --git a/ui/music/interface.py b/ui/music/interface.py
index 0057eed..3bac1a5 100644
--- a/ui/music/interface.py
+++ b/ui/music/interface.py
@@ -11,7 +11,8 @@ def render_music_section():
     with gr.Tab("Generate Cover"):
         send_cover_to_process_button, send_cover_to_video_button, generated_image_output_path = render_generate_cover()
     with gr.Tab("Add Text To Image"):
-        send_processed_cover_to_video_button, processed_image_input, processed_image_output_path = render_process_cover()
+        send_processed_cover_to_video_button, processed_image_input, processed_image_output_path = \
+            render_process_cover()
     with gr.Tab("Create Music Video"):
         music_video_cover_image = render_music_video_creation()
 
@@ -111,7 +112,8 @@ def render_music_video_creation():
                 generate_audio_visualizer_button = gr.Checkbox(value=False, label="Generate Audio Visualizer",
                                                                interactive=True)
                 with gr.Group() as audio_visualizer_group:
-                    audio_visualizer_color, audio_visualizer_opacity = gru.render_color_opacity_picker("Audio Visualizer")
+                    audio_visualizer_color, audio_visualizer_opacity = \
+                        gru.render_color_opacity_picker("Audio Visualizer")
                     with gr.Group():
                         with gr.Row():
                             audio_visualizer_num_rows = gr.Number(value=90, label="Number of Rows",
diff --git a/ui/music/utils.py b/ui/music/utils.py
index 63e6fa8..9690e59 100644
--- a/ui/music/utils.py
+++ b/ui/music/utils.py
@@ -37,7 +37,7 @@ def analyze_audio(audio, target_fps):
 cached_visualizer_background = None
 
 
-def draw_visualizer(canvas, frequency_data, base_size=1, max_size=7, color=(255, 255, 255, 255), dot_count=(90, 65), # the more dots, the more drawings, meaning slower.
+def draw_visualizer(canvas, frequency_data, base_size=1, max_size=7, color=(255, 255, 255, 255), dot_count=(90, 65),
                     alias_scale=1, custom_drawing=None):
     global cached_visualizer_dot_positions, cached_visualizer_background
     width, height = canvas.size[0] * alias_scale, canvas.size[1] * alias_scale
@@ -91,7 +91,8 @@ def draw_visualizer(canvas, frequency_data, base_size=1, max_size=7, color=(255,
             # avg_loudness = loudness_values.get(column, -80) < if anything breaks, do this
 
             # Scale the loudness to the dot size
-            scaled_loudness = (avg_loudness - min_loudness) / (max_loudness - min_loudness) if max_loudness != min_loudness else 0
+            scaled_loudness = (avg_loudness - min_loudness) / (max_loudness - min_loudness) \
+                if max_loudness != min_loudness else 0
             dot_size = base_size + scaled_loudness * (max_size - base_size)
             dot_size = min(max(dot_size, base_size), max_size) * alias_scale
 
@@ -101,9 +102,13 @@ def draw_visualizer(canvas, frequency_data, base_size=1, max_size=7, color=(255,
 
         if custom_drawing is not None:
             custom_drawing = custom_drawing.resize((int(dot_size), int(dot_size)), Image.LANCZOS)
-            large_canvas.paste(custom_drawing, (int(pos_x - dot_size / 2), int(pos_y - dot_size / 2)), custom_drawing)
+            large_canvas.paste(custom_drawing,
+                               (int(pos_x - dot_size / 2), int(pos_y - dot_size / 2)),
+                               custom_drawing)
         else:
-            large_draw.ellipse([(pos_x - dot_size / 2, pos_y - dot_size / 2), (pos_x + dot_size / 2, pos_y + dot_size / 2)], fill=color, outline=color)
+            large_draw.ellipse([
+                (pos_x - dot_size / 2, pos_y - dot_size / 2), (pos_x + dot_size / 2, pos_y + dot_size / 2)
+            ], fill=color, outline=color)
 
     canvas.paste(large_canvas.resize(canvas.size, Image.LANCZOS))
 
@@ -194,25 +199,34 @@ def create_music_video(
 
     song_pos = (20, int(height * 0.925))
     text_canvas, (_, song_height) = image_processing.add_text(text_canvas, song, song_pos,
-                                                    font_families[song_font_type][song_font_style],
-                                                    font_size=song_font_size,
-                                                    font_color=image_utils.get_rgba(song_font_color, song_font_opacity),
-                                                    show_shadow=song_shadow_enabled, shadow_radius=song_shadow_radius,
-                                                    shadow_color=image_utils.get_rgba(song_shadow_color,
-                                                                                      song_shadow_opacity),
-                                                    show_background=song_background_enabled,
-                                                    background_color=image_utils.get_rgba(song_background_color,
-                                                                                          song_background_opacity))
+                                                              font_families[song_font_type][song_font_style],
+                                                              font_size=song_font_size,
+                                                              font_color=image_utils.get_rgba(song_font_color,
+                                                                                              song_font_opacity),
+                                                              show_shadow=song_shadow_enabled,
+                                                              shadow_radius=song_shadow_radius,
+                                                              shadow_color=image_utils.get_rgba(song_shadow_color,
+                                                                                                song_shadow_opacity),
+                                                              show_background=song_background_enabled,
+                                                              background_color=image_utils.get_rgba(
+                                                                  song_background_color,
+                                                                  song_background_opacity))
     artist_pos = (song_pos[0], song_pos[1] - song_height - 5)
     text_canvas, (_, artist_height) = image_processing.add_text(text_canvas, artist, artist_pos,
-                                                        font_families[artist_font_type][artist_font_style],
-                                                        font_size=artist_font_size,
-                                                        font_color=image_utils.get_rgba(artist_font_color, artist_font_opacity),
-                                                        show_shadow=artist_shadow_enabled,
-                                                        shadow_radius=artist_shadow_radius,
-                                                        shadow_color=image_utils.get_rgba(artist_shadow_color, artist_shadow_opacity),
-                                                        show_background=artist_background_enabled,
-                                                        background_color=image_utils.get_rgba(artist_background_color, artist_background_opacity))
+                                                                font_families[artist_font_type][artist_font_style],
+                                                                font_size=artist_font_size,
+                                                                font_color=image_utils.get_rgba(artist_font_color,
+                                                                                                artist_font_opacity),
+                                                                show_shadow=artist_shadow_enabled,
+                                                                shadow_radius=artist_shadow_radius,
+                                                                shadow_color=image_utils.get_rgba(artist_shadow_color,
+                                                                                                  artist_shadow_opacity
+                                                                                                  ),
+                                                                show_background=artist_background_enabled,
+                                                                background_color=image_utils.get_rgba(
+                                                                    artist_background_color,
+                                                                    artist_background_opacity)
+                                                                )
 
     text_np = np.array(text_canvas)
     text_clip = ImageClip(text_np).set_duration(audio_clip.duration)
@@ -240,7 +254,7 @@ def create_music_video(
         preset="medium",
         verbose=False,  # add: logger=None
         logger=None,
-        )
+    )
 
     return temp_video_path
 
diff --git a/ui/ui.py b/ui/ui.py
index f3c82fc..f6093e1 100644
--- a/ui/ui.py
+++ b/ui/ui.py
@@ -1,5 +1,8 @@
-import gradio as gr
+"""
+This file contains the main UI code that runs the TrendGenie web app.
+"""
 import os
+import gradio as gr
 import ui.listicles.interface as listicle_interface
 import ui.music.interface as music_interface
 
diff --git a/utils/gradio.py b/utils/gradio.py
index f1f2b31..2add475 100644
--- a/utils/gradio.py
+++ b/utils/gradio.py
@@ -25,7 +25,8 @@ def render_font_picker(default_font_size=55):
         with gr.Row():
             font_families_list = list(font_families.keys())
             initial_font_family = font_families_list[0] if len(font_families_list) > 0 else ""
-            font_family = gr.Dropdown(font_families_list, value=initial_font_family, label="Font Family", interactive=True)
+            font_family = gr.Dropdown(font_families_list, value=initial_font_family, label="Font Family",
+                                      interactive=True)
             font_styles_list = list(font_families[initial_font_family].keys() if initial_font_family else [])
             initial_font_style = font_styles_list[0] if len(font_styles_list) > 0 else ""
             font_style = gr.Dropdown(font_styles_list, value=initial_font_style, label="Font Style", interactive=True)

From 89071fb6bedfa87d2fc136335504afec1f8cd007 Mon Sep 17 00:00:00 2001
From: Fabian Gonzalez <fabiangonz98@gmail.com>
Date: Thu, 11 Jan 2024 00:40:21 -0500
Subject: [PATCH 03/13] Some more linter fixes

---
 api/chatgpt.py          |  6 +++---
 ui/components/openai.py | 12 +++++++++++-
 utils/gradio.py         | 34 +++++++++++++++++++++++++++++-----
 utils/image.py          | 13 ++++++++++++-
 utils/path_handler.py   | 11 +++++++++--
 5 files changed, 64 insertions(+), 12 deletions(-)

diff --git a/api/chatgpt.py b/api/chatgpt.py
index 8aeac86..363ff16 100644
--- a/api/chatgpt.py
+++ b/api/chatgpt.py
@@ -125,9 +125,9 @@ def get_image_response(client: openai.Client, api_model: str, prompt: str, portr
     """
     Generates an image response using the OpenAI API based on a given prompt and specified parameters.
 
-    This function requests the OpenAI API to generate an image based on the provided text prompt. It allows specification
-    of the model to use and whether the generated image should be in a portrait format. For 'dall-e-3' model,
-    it supports high-definition (HD) quality image generation.
+    This function requests the OpenAI API to generate an image based on the provided text prompt. It allows
+    specification of the model to use and whether the generated image should be in a portrait format. For 'dall-e-3'
+    model, it supports high-definition (HD) quality image generation.
 
     :param client: The OpenAI client to use for making the API call.
     :param api_model: The model to use for image generation (e.g., 'dall-e-3').
diff --git a/ui/components/openai.py b/ui/components/openai.py
index c8ac4b8..4fec6bf 100644
--- a/ui/components/openai.py
+++ b/ui/components/openai.py
@@ -1,7 +1,17 @@
+"""
+This module contains ui components for the OpenAI API.
+"""
 import gradio as gr
 
 
-def render_openai_setup(show_text_model=True, show_image_model=True):
+def render_openai_setup(show_text_model: bool = True, show_image_model: bool = True) \
+        -> (gr.Textbox, gr.Dropdown, gr.Dropdown):
+    """
+    Renders the OpenAI API setup components.
+    :param show_text_model: Whether to show the text model dropdown.
+    :param show_image_model: Whether to show the image model dropdown.
+    :return: A tuple containing the API key, text model, and image model components.
+    """
     api_text_model = None
     api_image_model = None
     with gr.Row():
diff --git a/utils/gradio.py b/utils/gradio.py
index 2add475..57a4018 100644
--- a/utils/gradio.py
+++ b/utils/gradio.py
@@ -1,8 +1,16 @@
+"""
+This module contains utility functions for rendering widely-used Gradio components.
+"""
 import gradio as gr
-import utils.font_manager as font_manager
+from utils import font_manager
 
 
-def render_color_opacity_picker(default_name_label="Font"):
+def render_color_opacity_picker(default_name_label: str = "Font") -> tuple[gr.ColorPicker, gr.Slider]:
+    """
+    Renders a color picker with the appropriate styling.
+    :param default_name_label: The default name label to use.
+    :return: A tuple containing the color and opacity components.
+    """
     with gr.Group():
         with gr.Row():
             color = gr.ColorPicker(label=f"{default_name_label} Color", scale=1, interactive=True)
@@ -11,7 +19,13 @@ def render_color_opacity_picker(default_name_label="Font"):
     return color, opacity
 
 
-def bind_checkbox_to_visibility(checkbox, group):
+def bind_checkbox_to_visibility(checkbox: gr.Checkbox, group: gr.Group):
+    """
+    Binds a checkbox to the visibility of a group. When the checkbox is checked, the group is visible, and when the
+    checkbox is unchecked, the group is hidden.
+    :param checkbox: The Checkbox component to bind.
+    :param group: The Group component to bind.
+    """
     checkbox.change(
         lambda state: gr.Group(visible=state),
         inputs=checkbox,
@@ -19,7 +33,13 @@ def bind_checkbox_to_visibility(checkbox, group):
     )
 
 
-def render_font_picker(default_font_size=55):
+def render_font_picker(default_font_size: int = 55) \
+        -> tuple[gr.Dropdown, gr.Dropdown, gr.ColorPicker, gr.Slider, gr.Number]:
+    """
+    Renders a font picker with the appropriate styling.
+    :param default_font_size: The default font size to use.
+    :return: A tuple containing the font family, font style, font color, font opacity, and font size components.
+    """
     font_families = font_manager.get_fonts()
     with gr.Group():
         with gr.Row():
@@ -45,5 +65,9 @@ def update_font_styles(selected_font_family):
     return font_family, font_style, font_color, font_opacity, font_size
 
 
-def render_tool_description(description):
+def render_tool_description(description: str):
+    """
+    Renders a description for a tool with the appropriate styling.
+    :param description: The description to render.
+    """
     gr.Markdown(description, elem_classes=["tool-description"])
diff --git a/utils/image.py b/utils/image.py
index a195c75..af47f1d 100644
--- a/utils/image.py
+++ b/utils/image.py
@@ -1,5 +1,16 @@
+"""
+This file contains functions for image processing.
+"""
+from typing import Tuple, Union
 
-def get_rgba(color, opacity):
+
+def get_rgba(color: Union[str, Tuple[int, int, int]], opacity: int) -> Tuple[int, int, int, int]:
+    """
+    Gets the RGBA value for a given color and opacity.
+    :param color: The color to use. Either a hex string or a tuple of RGB values.
+    :param opacity: The opacity to use, from 0 to 100.
+    :return: The RGBA value.
+    """
     # Opacity should be 0 -> 0, 100 -> 255
     alpha = int(opacity * 255 / 100)
 
diff --git a/utils/path_handler.py b/utils/path_handler.py
index c79358c..706db3f 100644
--- a/utils/path_handler.py
+++ b/utils/path_handler.py
@@ -1,13 +1,20 @@
+"""
+This module contains functions for handling paths.
+"""
 import os
 from pathlib import Path
 
 default_path = None
 
-def get_default_path():
+
+def get_default_path() -> str:
+    """
+    Gets the default path for saving files, which is the user's home directory under a folder called "trendgenie".
+    :return:
+    """
     global default_path
     if default_path is None:
         homepath = Path.home()
         default_path = os.path.join(homepath, "trendgenie")
 
     return default_path
-

From cb6b712486fd8731596c666c2f873050a8c632e7 Mon Sep 17 00:00:00 2001
From: Fabian Gonzalez <fabiangonz98@gmail.com>
Date: Thu, 11 Jan 2024 00:41:11 -0500
Subject: [PATCH 04/13] Remove 3.8 from pylinter

---
 .github/workflows/pylint.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index 5730b67..7ea94c9 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -5,7 +5,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.8", "3.9", "3.10"]
+        python-version: ["3.9", "3.10"]
     steps:
     - uses: actions/checkout@v3
     - name: Set up Python ${{ matrix.python-version }}

From 87ac5bbe19ba3bc13630d019c4d429180df3fd2b Mon Sep 17 00:00:00 2001
From: Fabian Gonzalez <fabiangonz98@gmail.com>
Date: Fri, 12 Jan 2024 23:38:21 -0500
Subject: [PATCH 05/13] Some more minor pylint fixes

---
 api/chatgpt.py        |  4 +--
 main.py               |  5 ++-
 processing/image.py   | 34 +++++++++---------
 processing/video.py   |  6 ++--
 ui/listicles/utils.py |  3 +-
 ui/music/interface.py | 34 +++++++++++++++---
 ui/music/utils.py     | 83 +++++++++++++++++++++++++++++--------------
 ui/ui.py              |  2 +-
 utils/font_manager.py | 82 ++++++++++++++++++++++++++----------------
 utils/gradio.py       |  4 +--
 utils/path_handler.py | 12 +++----
 11 files changed, 174 insertions(+), 95 deletions(-)

diff --git a/api/chatgpt.py b/api/chatgpt.py
index 363ff16..debbbc0 100644
--- a/api/chatgpt.py
+++ b/api/chatgpt.py
@@ -1,11 +1,11 @@
 """
 This module provides utility functions for interacting with the OpenAI API and Gradio interfaces.
 """
+import os
+from typing import Optional
 import openai
 from openai import OpenAI
-import os
 import gradio as gr
-from typing import Optional
 
 
 def url_to_gradio_image_name(url: str) -> str:
diff --git a/main.py b/main.py
index 1ebec69..d314cbe 100755
--- a/main.py
+++ b/main.py
@@ -1,4 +1,7 @@
-#!/usr/bin/env python3
+"""
+This is the main file for the web app. It launches the web app and initializes the font manager and inflect engine.
+"""
+# !/usr/bin/env python3
 # -*- coding: utf-8 -*
 
 from ui import ui
diff --git a/processing/image.py b/processing/image.py
index 9e793ba..995e89f 100644
--- a/processing/image.py
+++ b/processing/image.py
@@ -1,22 +1,22 @@
 """
 Module for handling image-related operations in a Gradio interface.
 """
-import PIL
-from PIL import ImageFont, ImageDraw, Image, ImageFilter
-import numpy as np
 import textwrap
-import gradio as gr
 import uuid
 from datetime import datetime
 import os
-import cv2
 from pathlib import Path
+from typing import Tuple, Optional, Union, Any, Literal
+import PIL
+from PIL import ImageFont, ImageDraw, Image, ImageFilter
+import numpy as np
+import gradio as gr
+import cv2
 from utils import path_handler
 import utils.gradio as gru
-from typing import Tuple, Optional, Union, Any, Literal
 
-image_folder = "images"
-default_path = os.path.join(path_handler.get_default_path(), image_folder)
+IMAGE_FOLDER = "images"
+default_path = os.path.join(path_handler.get_default_path(), IMAGE_FOLDER)
 
 
 def render_image_output() -> (gr.Image, gr.Textbox, gr.Dropdown, gr.Button):
@@ -104,17 +104,17 @@ def add_background(image_pil: PIL.Image, draw: PIL.ImageDraw, position: Tuple[in
     bbox = draw.textbbox((0, 0), text, font=font)
     text_width = bbox[2] - bbox[0]
     text_height = bbox[3] - bbox[1]
-    x1 = position[0] - padding[0]  # left
-    y1 = position[1] - padding[1]  # top
-    x2 = x1 + text_width + 2 * padding[0]  # right
-    y2 = y1 + text_height + 2 * padding[1]  # bottom
+    left = position[0] - padding[0]
+    top = position[1] - padding[1]  # top
+    right = left + text_width + 2 * padding[0]
+    bottom = top + text_height + 2 * padding[1]
 
     rect_img = Image.new('RGBA', image_pil.size, (0, 0, 0, 0))
     rect_draw = ImageDraw.Draw(rect_img)
-    rect_draw.rounded_rectangle([x1, y1, x2, y2], fill=fill_color, radius=border_radius)
+    rect_draw.rounded_rectangle([left, top, right, bottom], fill=fill_color, radius=border_radius)
     image_pil.paste(rect_img, (0, 0), rect_img)
 
-    return (x1 + padding[0], y1 + padding[1]), (x2 - x1, y2 - y1)
+    return (left + padding[0], top + padding[1]), (right - left, bottom - top)
 
 
 def add_blurred_shadow(image_pil: PIL.Image, text: str, position: Tuple[int, int], font: PIL.ImageFont,
@@ -172,7 +172,7 @@ def save_images_to_disk(images: gr.data_classes.GradioRootModel, image_type: Lit
     """
     if not images or len(images.root) == 0:
         gr.Warning("No images to save.")
-        return
+        return None
 
     base_dir = Path(save_dir) if Path(save_dir).is_absolute() else Path("/").joinpath(save_dir)
 
@@ -279,7 +279,7 @@ def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[i
     font = ImageFont.truetype(font_path, font_size)
     draw = ImageDraw.Draw(txt_layer)
 
-    img_width, img_height = image_pil.size
+    img_width, _ = image_pil.size
 
     if max_width:  # Prepare for text wrapping if max_width is provided
         wrapped_text = textwrap.fill(text, width=max_width)
@@ -292,7 +292,7 @@ def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[i
     y_offset = 0
     max_line_width = 0  # Keep track of the widest line
     total_height = 0  # Accumulate total height of text block
-    for i, line in enumerate(lines):
+    for line in lines:
         bbox = draw.textbbox((0, 0), line, font=font)
         line_width = bbox[2] - bbox[0]
         line_height = bbox[3] - bbox[1]
diff --git a/processing/video.py b/processing/video.py
index 96f8663..3d19e59 100644
--- a/processing/video.py
+++ b/processing/video.py
@@ -5,13 +5,13 @@
 from datetime import datetime
 from pathlib import Path
 import os
+from typing import Optional, Literal
 import gradio as gr
 from moviepy.editor import VideoFileClip
 from utils import path_handler
-from typing import Optional, Literal
 
-video_folder = "videos"
-default_path = os.path.join(path_handler.get_default_path(), video_folder)
+VIDEO_FOLDER = "videos"
+default_path = os.path.join(path_handler.get_default_path(), VIDEO_FOLDER)
 
 
 def render_video_output() -> (gr.Video, gr.Textbox, gr.Dropdown, gr.Button):
diff --git a/ui/listicles/utils.py b/ui/listicles/utils.py
index dbea344..d43eeb5 100644
--- a/ui/listicles/utils.py
+++ b/ui/listicles/utils.py
@@ -1,9 +1,8 @@
 import gradio as gr
 import json
-import utils.image as image_utils
 import processing.image as image_processing
 import os
-import utils.font_manager as font_manager
+from utils import font_manager, image as image_utils
 import api.chatgpt as chatgpt_api
 
 
diff --git a/ui/music/interface.py b/ui/music/interface.py
index 3bac1a5..06f187c 100644
--- a/ui/music/interface.py
+++ b/ui/music/interface.py
@@ -1,12 +1,19 @@
+"""
+Tbe interface for the music section of the UI. This is the main piece where we define the Gradio interface components.
+"""
 import gradio as gr
 import utils.gradio as gru
-from ui.music.utils import *
+from ui.music.utils import generate_cover_image, process, create_music_video
 import processing.video as video_processing
 import processing.image as image_processing
 import ui.components.openai as openai_components
 
 
-def render_music_section():
+def render_music_section() -> None:
+    """
+    Renders the music cover video section of the UI.
+    :return: None
+    """
     gru.render_tool_description("Create a cover and a simple video for your music!")
     with gr.Tab("Generate Cover"):
         send_cover_to_process_button, send_cover_to_video_button, generated_image_output_path = render_generate_cover()
@@ -27,7 +34,13 @@ def render_music_section():
                                      outputs=[music_video_cover_image])
 
 
-def render_generate_cover():
+def render_generate_cover() -> (gr.Button, gr.Button, gr.Image):
+    """
+    Renders the cover generation interface component for the music cover creation section.
+    :return: A tuple containing the following Gradio UI components: A button for generating a cover image, a button for
+        sending the generated cover image to the "Add Text to Image" section, and an image display component for
+        displaying the generated cover image.
+    """
     api_key, _, api_image_model = openai_components.render_openai_setup(show_text_model=False)
     with gr.Row(equal_height=False):
         with gr.Group():
@@ -49,7 +62,14 @@ def render_generate_cover():
     return send_to_process_button, send_to_create_video_button, image_output
 
 
-def render_process_cover():
+def render_process_cover() -> (gr.Button, gr.Image, gr.Image):
+    """
+    Renders the cover processing interface component for the music cover creation section. This is where we add text to
+    the cover image.
+    :return: A tuple containing the following Gradio UI components: A button for processing a cover image, an image
+        display component for displaying the cover image before processing, and an image display component for
+        displaying the cover image after processing.
+    """
     with gr.Column():
         gr.Markdown("## Input")
         with gr.Group():
@@ -86,7 +106,11 @@ def render_process_cover():
     return send_to_create_video_button, input_image, image_output
 
 
-def render_music_video_creation():
+def render_music_video_creation() -> gr.Image:
+    """
+    Renders the music video creation interface component for the music cover creation section.
+    :return: An image display component for displaying the cover image.
+    """
     gr.Markdown("## Input")
     with gr.Row(equal_height=False):
         # Sadly we can't use RGBA here due to JPEG images not supporting alpha and breaking. It would be nice if Gradio
diff --git a/ui/music/utils.py b/ui/music/utils.py
index 9690e59..2909657 100644
--- a/ui/music/utils.py
+++ b/ui/music/utils.py
@@ -1,26 +1,36 @@
+"""
+This file contains the functions and utilities used to generate the music video and cover image.
+"""
 import math
-from PIL import Image, ImageFilter, ImageDraw, ImageFont
+from typing import Dict, List, Optional
+from PIL import Image, ImageFilter, ImageDraw
 from moviepy.editor import AudioFileClip, ImageClip, CompositeVideoClip, concatenate_videoclips
 import multiprocessing
-import utils.font_manager as font_manager
-import utils.image as image_utils
+from utils import font_manager, image as image_utils
 import numpy as np
 import tempfile
-import api.chatgpt as chatgpt_api
-import processing.image as image_processing
+from api import chatgpt as chatgpt_api
+from processing import image as image_processing
 import librosa
-import cProfile
 
 
-def analyze_audio(audio, target_fps):
-    y, sr = librosa.load(audio, sr=None)
+def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]], np.ndarray):
+    """
+    Analyzes the audio file at the given path and returns the frequency loudness and times relating to the frequency
+    loudness.
+    :param audio_path: The path to the audio file to analyze.
+    :param target_fps: The target frames per second for the audio visualizer. This is used to downsample the audio so
+      that it aligns with the video.
+    :return: A tuple containing the frequency loudness and times relating to the frequency loudness.
+    """
+    y, sr = librosa.load(audio_path, sr=None)
     D = librosa.stft(y)
     D_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
 
     frequencies = librosa.fft_frequencies(sr=sr)
     times = librosa.frames_to_time(np.arange(D_db.shape[1]), sr=sr)
 
-    audio_clip = AudioFileClip(audio)
+    audio_clip = AudioFileClip(audio_path)
     audio_frames_per_video_frame = len(times) / (target_fps * audio_clip.duration)
 
     sample_indices = np.arange(0, len(times), audio_frames_per_video_frame)
@@ -33,27 +43,43 @@ def analyze_audio(audio, target_fps):
     return downsampled_frequency_loudness, downsampled_times
 
 
-cached_visualizer_dot_positions = None
-cached_visualizer_background = None
-
-
-def draw_visualizer(canvas, frequency_data, base_size=1, max_size=7, color=(255, 255, 255, 255), dot_count=(90, 65),
-                    alias_scale=1, custom_drawing=None):
-    global cached_visualizer_dot_positions, cached_visualizer_background
+CACHED_VISUALIZER_DOT_POSITIONS = None
+CACHED_VISUALIZER_BACKGROUND = None
+
+
+def draw_visualizer(canvas: Image, frequency_data: Dict[float, float], base_size: int = 1, max_size: int = 7,
+                    color: tuple[int, int, int, int] = (255, 255, 255, 255), dot_count: tuple[int, int] = (90, 65),
+                    alias_scale: int = 1, custom_drawing: Optional[Image] = None) -> None:
+    """
+    Draws a visualizer on the given canvas frame using the frequency data.
+    :param canvas: The canvas to draw the visualizer on.
+    :param frequency_data: The frequency data to use for drawing the visualizer.
+    :param base_size: The base size of the dots (silent).
+    :param max_size: The maximum size of the dots (loudest portion).
+    :param color: The color of the dots.
+    :param dot_count: The number of dots to use in the visualizer. The first value is the number of rows, and the second
+        value is the number of columns.
+    :param alias_scale: The alias scale to use for the visualizer. This is used to increase the resolution of the
+        visualizer.
+    :param custom_drawing: The custom drawing to use for the visualizer. This is used to replace the dots with a custom
+        image.
+    :return:
+    """
+    global CACHED_VISUALIZER_DOT_POSITIONS, CACHED_VISUALIZER_BACKGROUND
     width, height = canvas.size[0] * alias_scale, canvas.size[1] * alias_scale
 
-    if cached_visualizer_background is None:
-        cached_visualizer_background = Image.new("RGBA", (width, height))
-    large_canvas = cached_visualizer_background.copy()
+    if CACHED_VISUALIZER_BACKGROUND is None:
+        CACHED_VISUALIZER_BACKGROUND = Image.new("RGBA", (width, height))
+    large_canvas = CACHED_VISUALIZER_BACKGROUND.copy()
     large_draw = ImageDraw.Draw(large_canvas)
 
     # In case the dot count changes, recalculate the dot positions
-    if cached_visualizer_dot_positions is None or len(cached_visualizer_dot_positions) != dot_count[0] * dot_count[1]:
+    if CACHED_VISUALIZER_DOT_POSITIONS is None or len(CACHED_VISUALIZER_DOT_POSITIONS) != dot_count[0] * dot_count[1]:
         # Calculate and store dot positions
         x_positions = (width / dot_count[0]) * np.arange(dot_count[0]) + (width / dot_count[0] / 2)
         y_positions = (height / dot_count[1]) * np.arange(dot_count[1]) + (height / dot_count[1] / 2)
         grid_x, grid_y = np.meshgrid(x_positions, y_positions)
-        cached_visualizer_dot_positions = [(grid_x[y, x], grid_y[y, x]) for x in range(dot_count[0]) for y in
+        CACHED_VISUALIZER_DOT_POSITIONS = [(grid_x[y, x], grid_y[y, x]) for x in range(dot_count[0]) for y in
                                            range(dot_count[1])]
 
     # Precompute log frequencies
@@ -83,7 +109,7 @@ def draw_visualizer(canvas, frequency_data, base_size=1, max_size=7, color=(255,
         loudness_values[x] = avg_loudness
 
     cached_dot_sizes = {}
-    for i, (pos_x, pos_y) in enumerate(cached_visualizer_dot_positions):
+    for i, (pos_x, pos_y) in enumerate(CACHED_VISUALIZER_DOT_POSITIONS):
         column = i // dot_count[1]  # Ensure the correct column is computed
 
         if column not in cached_dot_sizes:
@@ -175,8 +201,6 @@ def create_music_video(
             if time_point > audio_clip.duration:
                 break
             frame = frame_cache.copy()
-            # cProfile.runctx("draw_visualizer(frame, frequency_loudness[i], color=audio_visualizer_color_opacity)",
-            #                 locals=locals(), globals=globals())
             draw_visualizer(frame, frequency_loudness[i], color=audio_visualizer_color_opacity,
                             custom_drawing=custom_drawing, base_size=audio_visualizer_min_size,
                             max_size=audio_visualizer_max_size, dot_count=(audio_visualizer_num_rows,
@@ -252,14 +276,21 @@ def create_music_video(
         temp_audiofile=temp_audio_path,
         threads=threads,
         preset="medium",
-        verbose=False,  # add: logger=None
+        verbose=False,
         logger=None,
     )
 
     return temp_video_path
 
 
-def generate_cover_image(api_key, api_model, prompt):
+def generate_cover_image(api_key: str, api_model: str, prompt: str) -> Optional[str]:
+    """
+    Generates a cover image using the OpenAI API based on a given prompt and specified parameters.
+    :param api_key: The API key to use for the OpenAI API.
+    :param api_model: The model to use for image generation (e.g., 'dall-e-3').
+    :param prompt: The text prompt based on which the image is generated.
+    :return: The URL of the generated image, or None if no image was generated or if there was an error.
+    """
     client = chatgpt_api.get_openai_client(api_key)
     image_url = chatgpt_api.get_image_response(client, api_model, prompt, portrait=False)
     if image_url is None or image_url == "":
diff --git a/ui/ui.py b/ui/ui.py
index f6093e1..9d52763 100644
--- a/ui/ui.py
+++ b/ui/ui.py
@@ -8,7 +8,7 @@
 
 # Read the styles.css file and add it to the page.
 css_file = os.path.join(os.path.dirname(__file__), "styles.css")
-with open(css_file, "r") as file:
+with open(css_file, "r", encoding="utf-8") as file:
     css = file.read()
 
 with gr.Blocks(theme=gr.themes.Soft(), css=css) as WebApp:
diff --git a/utils/font_manager.py b/utils/font_manager.py
index bb53315..6114b8d 100644
--- a/utils/font_manager.py
+++ b/utils/font_manager.py
@@ -1,12 +1,18 @@
+"""
+This module manages the fonts and the inflect engine.
+"""
 import glob
 import os
+from typing import Optional, Dict, TypeAlias
 from fontTools.ttLib import TTFont
 import inflect
-import utils.path_handler as path_handler
+from utils import path_handler
 
-font_families = None
-p = None
-fonts_dirs = [
+NestedDict: TypeAlias = Dict[str, Dict[str, str]]
+
+FONT_FAMILIES: Optional[NestedDict] = None
+P: Optional[inflect.engine] = None
+FONTS_DIRS = [
     # MacOS
     "/Library/Fonts", "~/Library/Fonts", "System/Library/Fonts",
     # Linux
@@ -16,31 +22,37 @@
 ]
 
 
-def initialize_inflect():
-    global p
-    if p is None:
-        p = inflect.engine()
+def initialize_inflect() -> inflect.engine:
+    """
+    Initializes the inflect engine.
+    :return: The inflect engine.
+    """
+    global P
+    if P is None:
+        P = inflect.engine()
 
-    return p
+    return P
 
 
-def initialize_fonts():
-    global font_families
-    if font_families is None:
-        font_families = font_families
+def initialize_fonts() -> NestedDict:
+    """
+    Initializes the font families from the global FONTS_DIRS.
+    :return: The font families and their paths. They are called by map[font_family][font_style].
+    """
+    global FONT_FAMILIES
 
     font_files = []
     # Add TrendGenie fonts
     trendgenie_fonts_dir = os.path.join(path_handler.get_default_path(), "fonts")
-    fonts_dirs.append(trendgenie_fonts_dir)
-    for fonts_dir in fonts_dirs:
+    FONTS_DIRS.append(trendgenie_fonts_dir)
+    for fonts_dir in FONTS_DIRS:
         fonts_dir = os.path.expanduser(fonts_dir)
         if not os.path.exists(fonts_dir):
             continue
         font_files += glob.glob(os.path.join(fonts_dir, "**/*.ttf"), recursive=True)
         font_files += glob.glob(os.path.join(fonts_dir, "**/*.otf"), recursive=True)
 
-    font_families = {}
+    FONT_FAMILIES = {}
     for font_file in font_files:
         font = TTFont(font_file)
         name = font['name']
@@ -52,22 +64,32 @@ def initialize_fonts():
             elif record.nameID == 2 and b'\000' in record.string:
                 style_name = record.string.decode('utf-16-be').rstrip('\0')
         if family_name and style_name:
-            if family_name not in font_families:
-                font_families[family_name] = {}
-            font_families[family_name][style_name] = font_file
+            if family_name not in FONT_FAMILIES:
+                FONT_FAMILIES[family_name] = {}
+            FONT_FAMILIES[family_name][style_name] = font_file
+
+    return FONT_FAMILIES
+
 
-    return font_families
+def get_fonts() -> NestedDict:
+    """
+    Gets the font families. If they are not initialized, it initializes them.
+    :return: The font families and their paths. They are called by map[font_family][font_style].
+    """
+    global FONT_FAMILIES
+    if FONT_FAMILIES is None:
+        FONT_FAMILIES = initialize_fonts()
 
-def get_fonts():
-    global font_families
-    if font_families is None:
-        font_families = initialize_fonts()
+    return FONT_FAMILIES
 
-    return font_families
 
-def get_inflect():
-    global p
-    if p is None:
-        p = initialize_inflect()
+def get_inflect() -> inflect.engine:
+    """
+    Gets the inflect engine. If it is not initialized, it initializes it.
+    :return: The inflect engine.
+    """
+    global P
+    if P is None:
+        P = initialize_inflect()
 
-    return p
\ No newline at end of file
+    return P
diff --git a/utils/gradio.py b/utils/gradio.py
index 57a4018..f086830 100644
--- a/utils/gradio.py
+++ b/utils/gradio.py
@@ -54,8 +54,8 @@ def render_font_picker(default_font_size: int = 55) \
         def update_font_styles(selected_font_family):
             if selected_font_family is None or selected_font_family == "":
                 return []
-            font_syles = list(font_families[selected_font_family].keys())
-            return gr.Dropdown(font_syles, value=font_syles[0], label="Font Style")
+            font_styles = list(font_families[selected_font_family].keys())
+            return gr.Dropdown(font_styles, value=font_styles[0], label="Font Style")
 
         font_family.change(update_font_styles, inputs=[font_family], outputs=[font_style])
     with gr.Group():
diff --git a/utils/path_handler.py b/utils/path_handler.py
index 706db3f..58251cb 100644
--- a/utils/path_handler.py
+++ b/utils/path_handler.py
@@ -4,17 +4,17 @@
 import os
 from pathlib import Path
 
-default_path = None
+DEFAULT_PATH = None
 
 
 def get_default_path() -> str:
     """
     Gets the default path for saving files, which is the user's home directory under a folder called "trendgenie".
-    :return:
+    :return: The default path.
     """
-    global default_path
-    if default_path is None:
+    global DEFAULT_PATH
+    if DEFAULT_PATH is None:
         homepath = Path.home()
-        default_path = os.path.join(homepath, "trendgenie")
+        DEFAULT_PATH = os.path.join(homepath, "trendgenie")
 
-    return default_path
+    return DEFAULT_PATH

From 66b37ec03cdea8d427cce1ba181372ee594cf67c Mon Sep 17 00:00:00 2001
From: Fabian Gonzalez <fabiangonz98@gmail.com>
Date: Sun, 14 Jan 2024 20:49:48 -0500
Subject: [PATCH 06/13] Add type checks to resolve dynamic members

---
 .pylintrc             | 5 ++++-
 utils/font_manager.py | 4 ++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/.pylintrc b/.pylintrc
index d047969..59c5a5c 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,2 +1,5 @@
 [MAIN]
-max-line-length=120
\ No newline at end of file
+max-line-length=120
+
+[TYPECHECK]
+generated-members=gradio.components.dropdown.*,gradio.components.button.*,cv2.*
\ No newline at end of file
diff --git a/utils/font_manager.py b/utils/font_manager.py
index 6114b8d..4074ec1 100644
--- a/utils/font_manager.py
+++ b/utils/font_manager.py
@@ -3,12 +3,12 @@
 """
 import glob
 import os
-from typing import Optional, Dict, TypeAlias
+from typing import Optional, Dict
 from fontTools.ttLib import TTFont
 import inflect
 from utils import path_handler
 
-NestedDict: TypeAlias = Dict[str, Dict[str, str]]
+NestedDict = Dict[str, Dict[str, str]]
 
 FONT_FAMILIES: Optional[NestedDict] = None
 P: Optional[inflect.engine] = None

From 2d4cd614b36e4d4fa4c0e266ca584553c366ad8d Mon Sep 17 00:00:00 2001
From: Fabian Gonzalez <fabiangonz98@gmail.com>
Date: Sun, 14 Jan 2024 21:11:28 -0500
Subject: [PATCH 07/13] Some more linter fixes - image.py

---
 processing/image.py | 125 ++++++++++++++++++++++++++++++++------------
 1 file changed, 92 insertions(+), 33 deletions(-)

diff --git a/processing/image.py b/processing/image.py
index ec6acd8..03f1f6a 100644
--- a/processing/image.py
+++ b/processing/image.py
@@ -1,3 +1,4 @@
+from typing import Optional, Literal
 from PIL import ImageFont, ImageDraw, Image, ImageFilter
 import numpy as np
 import textwrap
@@ -14,7 +15,11 @@
 default_path = os.path.join(path_handler.get_default_path(), image_folder)
 
 
-def render_image_output():
+def render_image_output() -> (gr.Image, gr.Textbox, gr.Dropdown, gr.Button):
+    """
+    Renders the image output components.
+    :return: A tuple containing the image output, image name, image suffix, and save image button components.
+    """
     image_output = gr.Image(elem_classes=["single-image-output"],
                             label="Image Output", interactive=False,
                             show_download_button=False, type="filepath")
@@ -27,7 +32,14 @@ def render_image_output():
     return image_output, image_name, image_suffix, save_image_button
 
 
-def render_text_editor_parameters(name):
+def render_text_editor_parameters(name: str) -> ((gr.Dropdown, gr.Dropdown, gr.ColorPicker, gr.Slider, gr.Number),
+                                                 (gr.Checkbox, gr.ColorPicker, gr.Slider, gr.Number),
+                                                 (gr.Checkbox, gr.ColorPicker, gr.Slider)):
+    """
+    Renders the text editor parameters.
+    :param name: The name of the text editor parameters. This is used as the label for the accordion.
+    :return: A tuple containing the font, drop shadow, and background components.
+    """
     with gr.Accordion(label=name):
         with gr.Column():
             font_family, font_style, font_color, font_opacity, font_size = gru.render_font_picker()
@@ -48,26 +60,51 @@ def render_text_editor_parameters(name):
             (background_checkbox, background_color, background_opacity))
 
 
-def add_background(image_pil, draw, position, text, font, padding=(15, 5), fill_color=(0, 0, 0, 255), border_radius=0):
+def add_background(image_pil: Image, draw: ImageDraw, position: tuple[int, int], text: str, font: ImageFont,
+                   padding: tuple[int, int] = (15, 5), fill_color: tuple[int, int, int, int] = (0, 0, 0, 255),
+                   border_radius: int = 0) -> (tuple[int, int], tuple[int, int]):
+    """
+    Adds a background to the text.
+    :param image_pil: The PIL image to add the background to.
+    :param draw: The PIL draw object to use.
+    :param position: The position of the text on the image.
+    :param text: The text to add the background to.
+    :param font: The font to use.
+    :param padding: The padding between the font and background.
+    :param fill_color: The color of the background.
+    :param border_radius: The border radius of the background.
+    :return: A tuple containing the position of the text and the size of the background.
+    """
     # Calculate width and height of text with padding
     bbox = draw.textbbox((0, 0), text, font=font)
     text_width = bbox[2] - bbox[0]
     text_height = bbox[3] - bbox[1]
-    x1 = position[0] - padding[0]  # left
-    y1 = position[1] - padding[1]  # top
-    x2 = x1 + text_width + 2 * padding[0]  # right
-    y2 = y1 + text_height + 2 * padding[1]  # bottom
+    left = position[0] - padding[0]
+    top = position[1] - padding[1]
+    right = left + text_width + 2 * padding[0]
+    bottom = top + text_height + 2 * padding[1]
 
     rect_img = Image.new('RGBA', image_pil.size, (0, 0, 0, 0))
     rect_draw = ImageDraw.Draw(rect_img)
-    rect_draw.rounded_rectangle([x1, y1, x2, y2], fill=fill_color, radius=border_radius)
+    rect_draw.rounded_rectangle([left, top, right, bottom], fill=fill_color, radius=border_radius)
     image_pil.paste(rect_img, (0, 0), rect_img)
 
-    return (x1 + padding[0], y1 + padding[1]), (x2 - x1, y2 - y1)
-
-
-def add_blurred_shadow(image_pil, text, position, font, shadow_color=(0, 0, 0), shadow_offset=(0, 0),
-                       blur_radius=1):
+    return (left + padding[0], top + padding[1]), (right - left, bottom - top)
+
+
+def add_blurred_shadow(image_pil: Image, text: str, position: tuple[float, float], font: ImageFont,
+                       shadow_color: tuple[int, int, int, int] = (0, 0, 0, 0), shadow_offset: tuple[int, int] = (0, 0),
+                       blur_radius: int = 1) -> None:
+    """
+    Adds a blurred shadow (or highlight) to the text.
+    :param image_pil: The PIL image to add the shadow to.
+    :param text: The text to add the shadow to.
+    :param position: The position of the text on the image.
+    :param font: The font to use.
+    :param shadow_color: The color of the shadow.
+    :param shadow_offset: The offset of the shadow.
+    :param blur_radius: The blur radius of the shadow.
+    """
     # Create an image for the shadow
     shadow_image = Image.new('RGBA', image_pil.size, (0, 0, 0, 0))
     shadow_draw = ImageDraw.Draw(shadow_image)
@@ -83,7 +120,13 @@ def add_blurred_shadow(image_pil, text, position, font, shadow_color=(0, 0, 0),
     image_pil.paste(blurred_shadow, (0, 0), blurred_shadow)
 
 
-def read_image_from_disk(filepath, size=None):
+def read_image_from_disk(filepath: str, size: Optional[cv2.typing.Size] = None) -> np.ndarray:
+    """
+    Reads and returns an image from disk using CV2.
+    :param filepath: The path to the image.
+    :param size: The size to resize the image to.
+    :return: The image as a NumPy array.
+    """
     img = cv2.imread(filepath, cv2.IMREAD_UNCHANGED)
     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA)  # Convert to RGBA for PIL usage
     if size:
@@ -91,20 +134,27 @@ def read_image_from_disk(filepath, size=None):
     return img
 
 
-# This assumes the images are from a gallery, which is why it checks for the 'root' attribute.
-def save_images_to_disk(images, image_type, dir=default_path):
+def save_images_to_disk(images: gr.data_classes.RootModel, image_type: gr.Dropdown, save_dir: str = default_path) -> \
+        Optional[str]:
+    """
+    Saves a list of images to disk.
+    :param images: The list of images to save. Imported from a gradio.Gallery component.
+    :param image_type: The type of image to save.
+    :param save_dir: The directory to save the images to.
+    :return: The directory the images were saved to. None if there was an error.
+    """
     if not images or len(images.root) == 0:
         gr.Warning("No images to save.")
-        return
+        return None
 
-    base_dir = Path(dir) if Path(dir).is_absolute() else Path("/").joinpath(dir)
+    base_dir = Path(save_dir) if Path(save_dir).is_absolute() else Path("/").joinpath(save_dir)
 
     date = datetime.now().strftime("%m%d%Y")
     unique_id = uuid.uuid4()
-    dir = f"{base_dir}/{date}/{unique_id}"
+    save_dir = f"{base_dir}/{date}/{unique_id}"
 
-    if not os.path.exists(dir):
-        os.makedirs(dir)
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
 
     for index, image_container in enumerate(images.root):
         image = image_container.image
@@ -113,25 +163,34 @@ def save_images_to_disk(images, image_type, dir=default_path):
             continue
 
         filename = f"{index}.{image_type}"
-        filepath = os.path.join(dir, filename)
+        filepath = os.path.join(save_dir, filename)
 
         img = cv2.imread(image.path, cv2.IMREAD_UNCHANGED)
         cv2.imwrite(filepath, img)
 
-    gr.Info(f"Saved generated images to {dir}.")
-    return dir
+    gr.Info(f"Saved generated images to {save_dir}.")
+    return save_dir
 
 
-def save_image_to_disk(image_path, name, image_suffix=".png", dir=default_path):
+def save_image_to_disk(image_path: str, name: Optional[str] = None, save_dir: str = default_path,
+                       image_suffix: Literal[".png", ".jpg", ".webp"] = ".png") -> Optional[str]:
+    """
+    Saves an image to disk.
+    :param image_path: The path to the temporary image.
+    :param name: The name to give the saved image.
+    :param save_dir: The directory to save the image to.
+    :param image_suffix: The suffix to give the saved image.
+    :return: The directory the image was saved to. None if there was an error.
+    """
     if image_path is None:
         gr.Warning("No image to save.")
-        return
+        return None
 
-    base_dir = Path(dir) if Path(dir).is_absolute() else Path("/").joinpath(dir)
+    base_dir = Path(save_dir) if Path(save_dir).is_absolute() else Path("/").joinpath(save_dir)
 
     date = datetime.now().strftime("%m%d%Y")
     unique_id = uuid.uuid4()
-    dir = f"{base_dir}/{date}/{unique_id}"
+    save_dir = f"{base_dir}/{date}/{unique_id}"
 
     if name is None or name == "":
         unique_id = uuid.uuid4()
@@ -141,15 +200,15 @@ def save_image_to_disk(image_path, name, image_suffix=".png", dir=default_path):
         name = Path(name).stem
         name = f"{name}{image_suffix}"
 
-    if not os.path.exists(dir):
-        os.makedirs(dir)
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
 
-    filepath = os.path.join(dir, name)
+    filepath = os.path.join(save_dir, name)
     img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
     cv2.imwrite(filepath, img)
 
-    gr.Info(f"Saved generated image to {dir}.")
-    return dir
+    gr.Info(f"Saved generated image to {save_dir}.")
+    return save_dir
 
 
 # Function to add text to an image with custom font, size, and wrapping

From fd7b6c383d74eeecb9b960223cd0ffa1161f371f Mon Sep 17 00:00:00 2001
From: Fabian Gonzalez <fabiangonz98@gmail.com>
Date: Sun, 14 Jan 2024 22:03:26 -0500
Subject: [PATCH 08/13] More linting - ui/music/utils.py

---
 .github/workflows/pylint.yml |   2 +-
 api/chatgpt.py               |  25 ++---
 processing/image.py          |  51 +++++++---
 ui/music/interface.py        |   5 +-
 ui/music/utils.py            | 189 ++++++++++++++++++++++++++---------
 utils/image.py               |  29 +++++-
 6 files changed, 224 insertions(+), 77 deletions(-)

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index 7ea94c9..2d4b561 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -5,7 +5,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.9", "3.10"]
+        python-version: ["3.10", "3.11"]
     steps:
     - uses: actions/checkout@v3
     - name: Set up Python ${{ matrix.python-version }}
diff --git a/api/chatgpt.py b/api/chatgpt.py
index debbbc0..e80d050 100644
--- a/api/chatgpt.py
+++ b/api/chatgpt.py
@@ -80,8 +80,8 @@ def get_chat_response(client: openai.Client, api_model: str, role: str, prompt:
 
     # Give the model previous chat context
     if context is not None and len(context) > 0:
-        for c in context:
-            message.append(c)
+        for curr_context in context:
+            message.append(curr_context)
 
     message.append({
         "role": "user",
@@ -102,16 +102,17 @@ def get_chat_response(client: openai.Client, api_model: str, role: str, prompt:
 
     response = response.choices[0]
     if response.finish_reason != "stop":
-        if response.finish_reason == "length":
-            gr.Warning(
-                f"finish_reason: {response.finish_reason}. The maximum number of tokens specified in the request was "
-                f"reached.")
-            return None
-        elif response.finish_reason == "content_filter":
-            gr.Warning(
-                f"finish_reason: {response.finish_reason}. The content was omitted due to a flag from OpenAI's content "
-                f"filters.")
-            return None
+        match response.finish_reason:
+            case "length":
+                gr.Warning(
+                    f"finish_reason: {response.finish_reason}. The maximum number of tokens specified in the request "
+                    f"was reached.")
+                return None
+            case "content_filter":
+                gr.Warning(
+                    f"finish_reason: {response.finish_reason}. The content was omitted due to a flag from OpenAI's "
+                    f"content filters.")
+                return None
 
     content = response.message.content
     if content is None or content == "":
diff --git a/processing/image.py b/processing/image.py
index 03f1f6a..fd2cec9 100644
--- a/processing/image.py
+++ b/processing/image.py
@@ -1,18 +1,21 @@
-from typing import Optional, Literal
-from PIL import ImageFont, ImageDraw, Image, ImageFilter
-import numpy as np
+"""
+This module contains functions for processing images.
+"""
 import textwrap
-import gradio as gr
 import uuid
-from datetime import datetime
 import os
-import cv2
 from pathlib import Path
-import utils.path_handler as path_handler
+from datetime import datetime
+from typing import Optional, Literal, Union, Tuple
+from PIL import ImageFont, ImageDraw, Image, ImageFilter
+import numpy as np
+import gradio as gr
+import cv2
+from utils import path_handler
 import utils.gradio as gru
 
-image_folder = "images"
-default_path = os.path.join(path_handler.get_default_path(), image_folder)
+IMAGE_FOLDER = "images"
+default_path = os.path.join(path_handler.get_default_path(), IMAGE_FOLDER)
 
 
 def render_image_output() -> (gr.Image, gr.Textbox, gr.Dropdown, gr.Button):
@@ -212,9 +215,29 @@ def save_image_to_disk(image_path: str, name: Optional[str] = None, save_dir: st
 
 
 # Function to add text to an image with custom font, size, and wrapping
-def add_text(image, text, position, font_path, font_size, font_color=(255, 255, 255, 255), shadow_color=(255, 255, 255),
-             shadow_radius=None, max_width=None, show_background=False, show_shadow=False,
-             background_color=(0, 0, 0, 255), x_center=False):
+def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[int, int], font_path: str,
+             font_size: int, font_color: Tuple[int, int, int, int] = (255, 255, 255, 255),
+             shadow_color: Tuple[int, int, int, int] = (255, 255, 255, 255),
+             shadow_radius: Optional[int] = None, max_width: Optional[int] = None, show_background: bool = False,
+             show_shadow: bool = False, background_color: Tuple[int, int, int, int] = (0, 0, 0, 255),
+             x_center: bool = False) -> (np.ndarray, Tuple[int, int]):
+    """
+    Adds text to an image with custom font, size, and wrapping.
+    :param image: The image to add text to.
+    :param text: The text to add to the image.
+    :param position: The (x, y) position of the text on the image.
+    :param font_path: The path to the font to use.
+    :param font_size: The size of the font.
+    :param font_color: The color of the font.
+    :param shadow_color: The color of the shadow.
+    :param shadow_radius: The radius of the shadow.
+    :param max_width: The maximum width of the text before wrapping.
+    :param show_background: Whether to show a background behind the text.
+    :param show_shadow: Whether to show a shadow behind the text.
+    :param background_color: The color of the background.
+    :param x_center: Whether to center the text on the x-axis. This ignores the positional x parameter.
+    :return: A tuple containing the image with text added and the size of the text block.
+    """
     if not isinstance(position, tuple):
         raise TypeError("Position must be a 2-tuple.", type(position))
 
@@ -231,7 +254,7 @@ def add_text(image, text, position, font_path, font_size, font_color=(255, 255,
     font = ImageFont.truetype(font_path, font_size)
     draw = ImageDraw.Draw(txt_layer)
 
-    img_width, img_height = image_pil.size
+    img_width, _ = image_pil.size
 
     if max_width:  # Prepare for text wrapping if max_width is provided
         wrapped_text = textwrap.fill(text, width=max_width)
@@ -244,7 +267,7 @@ def add_text(image, text, position, font_path, font_size, font_color=(255, 255,
     y_offset = 0
     max_line_width = 0  # Keep track of the widest line
     total_height = 0  # Accumulate total height of text block
-    for i, line in enumerate(lines):
+    for line in lines:
         bbox = draw.textbbox((0, 0), line, font=font)
         line_width = bbox[2] - bbox[0]
         line_height = bbox[3] - bbox[1]
diff --git a/ui/music/interface.py b/ui/music/interface.py
index 1f13624..3361495 100644
--- a/ui/music/interface.py
+++ b/ui/music/interface.py
@@ -73,8 +73,9 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image):
     with gr.Column():
         gr.Markdown("## Input")
         with gr.Group():
-            input_image = gr.Image(sources=["upload"], label="Cover Image (png)", type="filepath", show_download_button=False,
-                                   scale=2, elem_classes=["single-image-input"], image_mode="RGBA")
+            input_image = gr.Image(sources=["upload"], label="Cover Image (png)", type="filepath",
+                                   show_download_button=False, scale=2, elem_classes=["single-image-input"],
+                                   image_mode="RGBA")
 
         with gr.Row(equal_height=False):
             with gr.Group():
diff --git a/ui/music/utils.py b/ui/music/utils.py
index 4ff294c..7caebca 100644
--- a/ui/music/utils.py
+++ b/ui/music/utils.py
@@ -1,29 +1,41 @@
+"""
+This file contains the functions and utilities used to generate the music video and cover image.
+"""
 import os
 import subprocess
 import re
 import time
 import cv2
+from typing import List, Dict, Optional
 from moviepy.editor import AudioFileClip
-import utils.font_manager as font_manager
+from utils import font_manager
 import utils.image as image_utils
 import numpy as np
 import tempfile
-import api.chatgpt as chatgpt_api
-import processing.image as image_processing
+from api import chatgpt as chatgpt_api
+from processing import image as image_processing
 import librosa
 from utils import progress, visualizer
 import cProfile
 
 
-def analyze_audio(audio, target_fps):
-    y, sr = librosa.load(audio, sr=None)
-    D = librosa.stft(y)
-    D_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
+def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]], np.ndarray):
+    """
+    Analyzes the audio file at the given path and returns the frequency loudness and times relating to the frequency
+    loudness.
+    :param audio_path: The path to the audio file to analyze.
+    :param target_fps: The target frames per second for the audio visualizer. This is used to downsample the audio so
+      that it aligns with the video.
+    :return: A tuple containing the frequency loudness and times relating to the frequency loudness.
+    """
+    y, sr = librosa.load(audio_path, sr=None)
+    d = librosa.stft(y)
+    d_db = librosa.amplitude_to_db(np.abs(d), ref=np.max)
 
     frequencies = librosa.fft_frequencies(sr=sr)
-    times = librosa.frames_to_time(np.arange(D_db.shape[1]), sr=sr)
+    times = librosa.frames_to_time(np.arange(d_db.shape[1]), sr=sr)
 
-    audio_clip = AudioFileClip(audio)
+    audio_clip = AudioFileClip(audio_path)
     audio_frames_per_video_frame = len(times) / (target_fps * audio_clip.duration)
 
     sample_indices = np.arange(0, len(times), audio_frames_per_video_frame)
@@ -31,36 +43,82 @@ def analyze_audio(audio, target_fps):
     sample_indices = sample_indices[sample_indices < len(times)]
 
     downsampled_times = times[sample_indices]
-    downsampled_frequency_loudness = [dict(zip(frequencies, D_db[:, i])) for i in sample_indices]
+    downsampled_frequency_loudness = [dict(zip(frequencies, d_db[:, i])) for i in sample_indices]
 
     return downsampled_frequency_loudness, downsampled_times
 
 
 def create_music_video(
-        image, audio, fps,
-        artist, artist_font_type, artist_font_style, artist_font_size, artist_font_color, artist_font_opacity,
-        artist_shadow_enabled, artist_shadow_color, artist_shadow_opacity, artist_shadow_radius,
-        artist_background_enabled, artist_background_color, artist_background_opacity,
-        song, song_font_type, song_font_style, song_font_size, song_font_color, song_font_opacity, song_shadow_enabled,
-        song_shadow_color, song_shadow_opacity, song_shadow_radius, song_background_enabled, song_background_color,
-        song_background_opacity,
-        background_color=(0, 0, 0), background_opacity=66, generate_audio_visualizer=False,
-        audio_visualizer_color=(255, 255, 255), audio_visualizer_opacity=100, visualizer_drawing=None,
-        audio_visualizer_num_rows=90, audio_visualizer_num_columns=65, audio_visualizer_min_size=1,
-        audio_visualizer_max_size=7):
-    if image is None:
+        image_path: str, audio_path: str, fps: int,
+        artist: str, artist_font_type: str, artist_font_style: str, artist_font_size: int,
+        artist_font_color: tuple[int, int, int], artist_font_opacity: int, artist_shadow_enabled: bool,
+        artist_shadow_color: tuple[int, int, int], artist_shadow_opacity: int, artist_shadow_radius: int,
+        artist_background_enabled: bool, artist_background_color: tuple[int, int, int], artist_background_opacity: int,
+        song: str, song_font_type: str, song_font_style: str, song_font_size: int,
+        song_font_color: tuple[int, int, int], song_font_opacity: int, song_shadow_enabled: bool,
+        song_shadow_color: tuple[int, int, int], song_shadow_opacity: int, song_shadow_radius: int,
+        song_background_enabled: bool, song_background_color: tuple[int, int, int], song_background_opacity: int,
+        background_color: tuple[int, int, int] = (0, 0, 0), background_opacity: int = 66,
+        generate_audio_visualizer: bool = False, audio_visualizer_color: tuple[int, int, int] =(255, 255, 255),
+        audio_visualizer_opacity: int = 100, visualizer_drawing: Optional[str] = None,
+        audio_visualizer_num_rows: int = 90, audio_visualizer_num_columns: int = 65, audio_visualizer_min_size: int = 1,
+        audio_visualizer_max_size: int = 7) -> Optional[str]:
+    """
+    Creates a music video using the given parameters.
+    :param image_path: The path to the image to use as the cover + background for the video.
+    :param audio_path: The path to the audio file to use for the video.
+    :param fps: The frames per second to use for the video.
+    :param artist: The artist name to add to the video.
+    :param artist_font_type: The font family to use for the artist name.
+    :param artist_font_style: The font style to use for the artist name.
+    :param artist_font_size: The font size to use for the artist name.
+    :param artist_font_color: The font color to use for the artist name.
+    :param artist_font_opacity: The font opacity to use for the artist name.
+    :param artist_shadow_enabled: Whether to show a shadow for the artist name.
+    :param artist_shadow_color: The shadow color to use for the artist name.
+    :param artist_shadow_opacity: The shadow opacity to use for the artist name.
+    :param artist_shadow_radius: The shadow radius to use for the artist name.
+    :param artist_background_enabled: Whether to show a background for the artist name.
+    :param artist_background_color: The background color to use for the artist name.
+    :param artist_background_opacity: The background opacity to use for the artist name.
+    :param song: The song name to add to the video.
+    :param song_font_type: The font family to use for the song name.
+    :param song_font_style: The font style to use for the song name.
+    :param song_font_size: The font size to use for the song name.
+    :param song_font_color: The font color to use for the song name.
+    :param song_font_opacity: The font opacity to use for the song name.
+    :param song_shadow_enabled: Whether to show a shadow for the song name.
+    :param song_shadow_color: The shadow color to use for the song name.
+    :param song_shadow_opacity: The shadow opacity to use for the song name.
+    :param song_shadow_radius: The shadow radius to use for the song name.
+    :param song_background_enabled: Whether to show a background for the song name.
+    :param song_background_color: The background color to use for the song name.
+    :param song_background_opacity: The background opacity to use for the song name.
+    :param background_color: The background color to use for the video.
+    :param background_opacity: The background opacity to use for the video.
+    :param generate_audio_visualizer: Whether to generate an audio visualizer for the video.
+    :param audio_visualizer_color: The color to use for the audio visualizer.
+    :param audio_visualizer_opacity: The opacity to use for the audio visualizer.
+    :param visualizer_drawing: The path to the image to use for the audio visualizer. If None, uses a circle.
+    :param audio_visualizer_num_rows: The number of rows to use for the audio visualizer's drawings.
+    :param audio_visualizer_num_columns: The number of columns to use for the audio visualizer's drawings.
+    :param audio_visualizer_min_size: The minimum size to use for the audio visualizer's drawings (silence).
+    :param audio_visualizer_max_size: The maximum size to use for the audio visualizer's drawings (peak loudness).
+    :return: The path to the generated video, or None if there was an error.
+    """
+    if image_path is None:
         print("No cover image for the video.")
-        return
-    if audio is None:
+        return None
+    if audio_path is None:
         print("No audio to add to the video.")
-        return
+        return None
 
     # Could probably expand to 4k, but unnecessary for this type of music video
     # Maybe in a future iteration it could be worth it
     width, height = 1920, 1080
 
     # Set up cover
-    cover = cv2.imread(image, cv2.IMREAD_UNCHANGED)
+    cover = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
     if cover.shape[2] == 3:
         cover = cv2.cvtColor(cover, cv2.COLOR_BGR2RGBA)
     else:
@@ -84,10 +142,10 @@ def create_music_video(
     canvas[cover_pos[1]:cover_pos[1] + new_height, cover_pos[0]:cover_pos[0] + new_width] = cover
 
     # Load song / audio
-    audio_clip = AudioFileClip(audio)
+    audio_clip = AudioFileClip(audio_path)
 
     # Add video background
-    background = cv2.imread(image)
+    background = cv2.imread(image_path)
     background = cv2.resize(background, (width, height))
     background = cv2.GaussianBlur(background, (49, 49), 0)
     if background.shape[2] == 3:
@@ -117,7 +175,7 @@ def create_music_video(
 
     if generate_audio_visualizer:
         print("Generating audio visualizer...")
-        frequency_loudness, times = analyze_audio(audio, fps)
+        frequency_loudness, times = analyze_audio(audio_path, fps)
         frame_cache = np.zeros((height, width, 4), dtype=np.uint8)
 
         total_iterations = len(times)
@@ -161,16 +219,16 @@ def create_music_video(
                                                                   song_background_opacity))
     artist_pos = (song_pos[0], song_pos[1] - song_height - 5)
     text_canvas, (_, _) = image_processing.add_text(text_canvas, artist, artist_pos,
-                                                                font_families[artist_font_type][artist_font_style],
-                                                                font_size=artist_font_size,
-                                                                font_color=image_utils.get_rgba(artist_font_color,
-                                                                                                artist_font_opacity),
-                                                                show_shadow=artist_shadow_enabled,
-                                                                shadow_radius=artist_shadow_radius,
-                                                                shadow_color=image_utils.get_rgba(artist_shadow_color,
-                                                                                                  artist_shadow_opacity),
-                                                                show_background=artist_background_enabled,
-                                                                background_color=image_utils.get_rgba(
+                                                    font_families[artist_font_type][artist_font_style],
+                                                    font_size=artist_font_size,
+                                                    font_color=image_utils.get_rgba(artist_font_color,
+                                                                                    artist_font_opacity),
+                                                    show_shadow=artist_shadow_enabled,
+                                                    shadow_radius=artist_shadow_radius,
+                                                    shadow_color=image_utils.get_rgba(artist_shadow_color,
+                                                                                      artist_shadow_opacity),
+                                                    show_background=artist_background_enabled,
+                                                    background_color=image_utils.get_rgba(
                                                                     artist_background_color, artist_background_opacity))
 
     text_np = np.array(text_canvas)
@@ -216,7 +274,7 @@ def create_music_video(
     ffmpeg_commands.extend([
         "-framerate", str(fps),
         "-i", temp_canvas_image_path,
-        "-i", audio,
+        "-i", audio_path,
         "-filter_complex", filter_complex,
         "-map", audio_input_map,
         "-c:v", "libx264",
@@ -268,7 +326,14 @@ def create_music_video(
     return temp_final_video_path
 
 
-def generate_cover_image(api_key, api_model, prompt):
+def generate_cover_image(api_key: str, api_model: str, prompt: str) -> Optional[str]:
+    """
+    Generates a cover image using the OpenAI API based on a given prompt and specified parameters.
+    :param api_key: The API key to use for the OpenAI API.
+    :param api_model: The model to use for image generation (e.g., 'dall-e-3').
+    :param prompt: The text prompt based on which the image is generated.
+    :return: The URL of the generated image, or None if no image was generated or if there was an error.
+    """
     client = chatgpt_api.get_openai_client(api_key)
     image_url = chatgpt_api.get_image_response(client, api_model, prompt, portrait=False)
     if image_url is None or image_url == "":
@@ -277,12 +342,46 @@ def generate_cover_image(api_key, api_model, prompt):
     return chatgpt_api.url_to_gradio_image_name(image_url)
 
 
-def process(image_path, artist, song,
-            af_family, af_style, afs, afc, afo, ase, asc, aso, asr, abe, abc, abo,
-            sf_family, sf_style, sfs, sfc, sfo, sse, ssc, sso, ssr, sbe, sbc, sbo):
+def process(image_path: str, artist: str, song: str,
+            af_family: str, af_style: str, afs: int, afc: tuple[int, int, int], afo: int, ase: bool,
+            asc: tuple[int, int, int], aso: int, asr: Optional[int], abe: bool, abc: tuple[int, int, int], abo: int,
+            sf_family: str, sf_style: str, sfs: int, sfc: tuple[int, int, int], sfo: int, sse: bool,
+            ssc: tuple[int, int, int], sso: int, ssr: Optional[int], sbe: bool, sbc: tuple[int, int, int], sbo: int) \
+        -> Optional[np.ndarray]:
+    """
+    Processes the image at the given path (by adding the requested text) and returns the processed image.
+    :param image_path: The path to the image to process.
+    :param artist: The artist name to add to the image.
+    :param song: The song name to add to the image.
+    :param af_family: The font family to use for the artist name.
+    :param af_style: The font style to use for the artist name.
+    :param afs: The font size to use for the artist name.
+    :param afc: The font color to use for the artist name.
+    :param afo: The font opacity to use for the artist name.
+    :param ase: Whether to show a shadow for the artist name.
+    :param asc: The shadow color to use for the artist name.
+    :param aso: The shadow opacity to use for the artist name.
+    :param asr: The shadow radius to use for the artist name.
+    :param abe: Whether to show a background for the artist name.
+    :param abc: The background color to use for the artist name.
+    :param abo: The background opacity to use for the artist name.
+    :param sf_family: The font family to use for the song name.
+    :param sf_style: The font style to use for the song name.
+    :param sfs: The font size to use for the song name.
+    :param sfc: The font color to use for the song name.
+    :param sfo: The font opacity to use for the song name.
+    :param sse: Whether to show a shadow for the song name.
+    :param ssc: The shadow color to use for the song name.
+    :param sso: The shadow opacity to use for the song name.
+    :param ssr: The shadow radius to use for the song name.
+    :param sbe: Whether to show a background for the song name.
+    :param sbc: The background color to use for the song name.
+    :param sbo: The background opacity to use for the song name.
+    :return: The processed image as a numpy array. If there was no image to process, returns None.
+    """
     if image_path is None:
         print("No image to modify.")
-        return
+        return None
 
     font_families = font_manager.get_fonts()
     aff = font_families[af_family][af_style]
diff --git a/utils/image.py b/utils/image.py
index 1bbe984..0204e57 100644
--- a/utils/image.py
+++ b/utils/image.py
@@ -1,9 +1,26 @@
-def get_alpha_from_opacity(opacity):
+"""
+This file contains functions for image processing.
+"""
+from typing import Tuple, Union
+
+
+def get_alpha_from_opacity(opacity: int) -> int:
+    """
+    Converts an opacity value from 0-100 to 0-255.
+    :param opacity: The opacity value from 0-100.
+    :return: The opacity value from 0-255.
+    """
     # Opacity should be 0 -> 0, 100 -> 255
     return int(opacity * 255 / 100)
 
 
-def get_rgba(color, opacity):
+def get_rgba(color: Union[str, Tuple[int, int, int]], opacity: int) -> Tuple[int, int, int, int]:
+    """
+     Gets the RGBA value for a given color and opacity.
+     :param color: The color to use. Either a hex string or a tuple of RGB values.
+     :param opacity: The opacity to use, from 0 to 100.
+     :return: The RGBA value.
+     """
     # if color is hex, convert to rgb
     if not isinstance(color, tuple) and color.startswith("#"):
         color = color.lstrip("#")
@@ -12,7 +29,13 @@ def get_rgba(color, opacity):
     return color[0], color[1], color[2], get_alpha_from_opacity(opacity)
 
 
-def get_bgra(color, opacity):
+def get_bgra(color: Union[str, Tuple[int, int, int]], opacity: int) -> Tuple[int, int, int, int]:
+    """
+     Gets the BGRA value for a given color and opacity.
+     :param color: The color to use. Either a hex string or a tuple of BGR values.
+     :param opacity: The opacity to use, from 0 to 100.
+     :return: The BGRA value.
+     """
     # if color is hex, convert to rgb
     if not isinstance(color, tuple) and color.startswith("#"):
         color = color.lstrip("#")

From 17640764821d5f272511af531884a84eb3438bbe Mon Sep 17 00:00:00 2001
From: Fabian Gonzalez <fabiangonz98@gmail.com>
Date: Sun, 14 Jan 2024 22:26:12 -0500
Subject: [PATCH 09/13] More linting - 95% done

---
 ui/listicles/interface.py | 22 ++++++++++++++---
 ui/listicles/utils.py     | 52 +++++++++++++++++++++++++++++++++------
 ui/music/utils.py         |  9 +++----
 utils/progress.py         |  9 ++++---
 utils/visualizer.py       | 24 ++++++++++++++++--
 5 files changed, 93 insertions(+), 23 deletions(-)

diff --git a/ui/listicles/interface.py b/ui/listicles/interface.py
index b27d214..664cd72 100644
--- a/ui/listicles/interface.py
+++ b/ui/listicles/interface.py
@@ -1,12 +1,18 @@
+"""
+The interface for the Listicles section of the web app.
+"""
+import json
 import gradio as gr
 import processing.image as image_processing
-import json
 import ui.listicles.utils as listicle_utils
 import ui.components.openai as openai_components
 import utils.gradio as gru
 
 
-def render_listicles_section():
+def render_listicles_section() -> None:
+    """
+    Renders the Listicles section of the web app.
+    """
     gru.render_tool_description("Create images in the style of those 'Your birth month is your ___' TikToks.")
     with gr.Tab("Generate Artifacts"):
         send_artifacts_to_batch_button, listicle_image_output, listicle_json_output = render_generate_section()
@@ -20,7 +26,11 @@ def render_listicles_section():
     )
 
 
-def render_batch_section():
+def render_batch_section() -> (gr.File, gr.Code):
+    """
+    Renders the Batch Image Processing section of the web app.
+    :return: The input images and input json components.
+    """
     with gr.Column():
         gr.Markdown("# Input")
         with gr.Row(equal_height=False):
@@ -106,7 +116,11 @@ def set_json(json_file):
     return input_batch_images, input_batch_json
 
 
-def render_generate_section():
+def render_generate_section() -> (gr.Button, gr.Gallery, gr.Code):
+    """
+    Renders the Generate Artifacts section of the web app.
+    :return: The send artifacts to batch button, the listicle image output gallery, and the listicle json output.
+    """
     api_key, api_text_model, api_image_model = openai_components.render_openai_setup()
     with gr.Row(equal_height=False):
         with gr.Group():
diff --git a/ui/listicles/utils.py b/ui/listicles/utils.py
index d43eeb5..b4c75b3 100644
--- a/ui/listicles/utils.py
+++ b/ui/listicles/utils.py
@@ -1,7 +1,11 @@
-import gradio as gr
+"""
+This file contains the functions that are used by the Gradio UI to generate listicles.
+"""
+import os
 import json
+from typing import Optional
+import gradio as gr
 import processing.image as image_processing
-import os
 from utils import font_manager, image as image_utils
 import api.chatgpt as chatgpt_api
 
@@ -93,7 +97,12 @@ def process(image_files, json_data,
     return images
 
 
-def validate_json(json_file):
+def validate_json(json_file: str) -> None:
+    """
+    Validates the JSON file to make sure it has the required fields.
+    :param json_file: The JSON file to validate.
+    :return: None
+    """
     if not json_file or len(json_file) == 0:
         gr.Warning("No JSON in the code block.")
         return
@@ -117,7 +126,13 @@ def validate_json(json_file):
         gr.Info("JSON is valid!")
 
 
-def send_artifacts_to_batch(listicle_images, json_data):
+def send_artifacts_to_batch(listicle_images: gr.data_classes.RootModel, json_data: str) -> (list, str):
+    """
+    Sends the artifacts to the batch processing section.
+    :param listicle_images: The list of images to send. This is a Gradio Gallery.
+    :param json_data: The JSON data to send.
+    :return: The list of images and the JSON data sent.
+    """
     if not listicle_images or len(listicle_images.root) == 0:
         gr.Warning("No images to send.")
         return
@@ -130,10 +145,17 @@ def send_artifacts_to_batch(listicle_images, json_data):
     return listicle_images, json_data
 
 
-def save_artifacts(listicle_images, image_type, json_data):
+def save_artifacts(listicle_images: gr.data_classes.RootModel, image_type: gr.Dropdown, json_data: str) -> None:
+    """
+    Saves the artifacts to disk.
+    :param listicle_images: The list of images to save. This is a Gradio Gallery.
+    :param image_type: The type of image to save.
+    :param json_data: The JSON data to save.
+    :return: None
+    """
     if not json_data or len(json_data) == 0:
         gr.Warning("No JSON data to save.")
-        return
+        return None
 
     # Save the images
     save_dir = image_processing.save_images_to_disk(listicle_images, image_type)
@@ -148,8 +170,22 @@ def save_artifacts(listicle_images, image_type, json_data):
         gr.Info(f"Saved generated artifacts to {save_dir}.")
 
 
-def generate_listicle(api_key, api_text_model, api_image_model, number_of_items, topic, association,
-                      rating_type, details="", generate_artifacts=False):
+def generate_listicle(api_key: str, api_text_model: str, api_image_model: str, number_of_items: int, topic: str,
+                      association: str, rating_type: str, details: str = "", generate_artifacts: bool = False) \
+        -> (Optional[str], Optional[str], Optional[list[str]]):
+    """
+    Generates a listicle using the OpenAI API.
+    :param api_key: The OpenAI API key to use.
+    :param api_text_model: The OpenAI API text model to use (e.g. 'gpt-4').
+    :param api_image_model: The OpenAI API image model to use (e.g. 'dall-e-3').
+    :param number_of_items: The number of items to generate.
+    :param topic: The topic of the listicle.
+    :param association: What each item is associated with.
+    :param rating_type: What the rating represents.
+    :param details: Additional details about the listicle you want to generate.
+    :param generate_artifacts: Whether to generate artifacts (images and JSON) for the listicle.
+    :return: The listicle content, the listicle JSON, and the listicle images.
+    """
     openai = chatgpt_api.get_openai_client(api_key)
     if openai is None:
         gr.Warning("No OpenAI client. Cannot generate listicle.")
diff --git a/ui/music/utils.py b/ui/music/utils.py
index 7caebca..eb6dd98 100644
--- a/ui/music/utils.py
+++ b/ui/music/utils.py
@@ -5,18 +5,17 @@
 import subprocess
 import re
 import time
-import cv2
+import tempfile
 from typing import List, Dict, Optional
+import cv2
 from moviepy.editor import AudioFileClip
+import numpy as np
+import librosa
 from utils import font_manager
 import utils.image as image_utils
-import numpy as np
-import tempfile
 from api import chatgpt as chatgpt_api
 from processing import image as image_processing
-import librosa
 from utils import progress, visualizer
-import cProfile
 
 
 def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]], np.ndarray):
diff --git a/utils/progress.py b/utils/progress.py
index c5a14b5..52569ba 100644
--- a/utils/progress.py
+++ b/utils/progress.py
@@ -6,9 +6,11 @@ def print_progress_bar(current_iteration: int, total_iterations: int, bar_length
                        start_time: Optional[float] = None, end: str = ''):
     progress_percentage = (current_iteration / total_iterations) * 100
     completed_length = int(bar_length * current_iteration // total_iterations)
-    bar = '█' * completed_length + '░' * (bar_length - completed_length)
+    progress_bar = '█' * completed_length + '░' * (bar_length - completed_length)
 
     elapsed_time = None
+    estimated_remaining_time = None
+    iterations_per_sec = None
     if start_time is not None:
         elapsed_time = time.time() - start_time
         if current_iteration > 0:
@@ -19,7 +21,6 @@ def print_progress_bar(current_iteration: int, total_iterations: int, bar_length
             estimated_remaining_time = None
 
     time_string = ''
-    if estimated_remaining_time is not None:
+    if estimated_remaining_time is not None and iterations_per_sec is not None:
         time_string = f'[{elapsed_time:.2f}s/{estimated_remaining_time:.2f}s, {iterations_per_sec:.2f}it/s]'
-    print(f'\r{progress_percentage:3.0f}%|{bar}| {current_iteration}/{total_iterations} {time_string}', end=end, flush=True)
-
+    print(f'\r{progress_percentage:3.0f}%|{progress_bar}| {current_iteration}/{total_iterations} {time_string}', end=end, flush=True)
diff --git a/utils/visualizer.py b/utils/visualizer.py
index 8739a70..e32e7bf 100644
--- a/utils/visualizer.py
+++ b/utils/visualizer.py
@@ -1,8 +1,16 @@
+"""
+This module defines the Visualizer class, which is used to draw the visualizer on the canvas.
+"""
+from typing import Dict, Optional
 import numpy as np
 import cv2
 
 
 class Visualizer:
+    """
+    This class is used to draw the visualizer on the canvas.
+    Will be replaced with a more general solution in the future to allow for more customization.
+    """
     def __init__(self, base_size, max_size, color, dot_count, width, height):
         self.base_size = base_size
         self.max_size = max_size
@@ -13,7 +21,11 @@ def __init__(self, base_size, max_size, color, dot_count, width, height):
         self.cached_dot_positions = None
         self.cached_resized_drawing = {}
 
-    def initialize_static_values(self):
+    def initialize_static_values(self: "Visualizer") -> None:
+        """
+        Initializes static values for the visualizer.
+        :return: None.
+        """
         # Calculate and store dot positions
         x_positions = (self.width / self.dot_count[0]) * np.arange(self.dot_count[0]) + (
                 self.width / self.dot_count[0] / 2)
@@ -23,7 +35,15 @@ def initialize_static_values(self):
         self.cached_dot_positions = [(grid_x[y, x], grid_y[y, x]) for x in range(self.dot_count[0]) for y in
                                      range(self.dot_count[1])]
 
-    def draw_visualizer(self, canvas, frequency_data, custom_drawing=None):
+    def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict[float, float],
+                        custom_drawing: Optional[np.ndarray] = None) -> None:
+        """
+        Draws the visualizer on the canvas (a single frame).
+        :param canvas: The canvas to draw on.
+        :param frequency_data: The frequency data to use for drawing which correlates to the loudness + frequency.
+        :param custom_drawing: A custom drawing to use instead of the default circle.
+        :return: None.
+        """
         # Calculate and store dot positions
         dot_count_x = self.dot_count[0]
         dot_count_y = self.dot_count[1]

From 2a3fa120d33a30cdbe516afb422b32f873f1c72a Mon Sep 17 00:00:00 2001
From: Fabian Gonzalez <fabiangonz98@gmail.com>
Date: Mon, 15 Jan 2024 00:11:01 -0500
Subject: [PATCH 10/13] More linting - 98% done

---
 .pylintrc                 |  4 ++
 processing/image.py       | 23 +++++-----
 ui/listicles/interface.py |  2 +-
 ui/listicles/utils.py     | 92 ++++++++++++++++++++++++++++++++-------
 ui/music/utils.py         | 28 ++++++------
 utils/dataclasses.py      | 28 ++++++++++++
 utils/font_manager.py     |  8 ++--
 utils/image.py            |  7 +--
 utils/path_handler.py     |  2 +-
 utils/progress.py         | 18 +++++++-
 utils/visualizer.py       |  2 +-
 11 files changed, 161 insertions(+), 53 deletions(-)
 create mode 100644 utils/dataclasses.py

diff --git a/.pylintrc b/.pylintrc
index 59c5a5c..2083c87 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,5 +1,9 @@
 [MAIN]
 max-line-length=120
+max-attributes=10
+# Currently, this is added because gradio Inputs don't support passing tuples/dataclasses/etc. as arguments, meaning I
+# can't shorten some methods that take a lot of arguments.
+disable=too-many-arguments
 
 [TYPECHECK]
 generated-members=gradio.components.dropdown.*,gradio.components.button.*,cv2.*
\ No newline at end of file
diff --git a/processing/image.py b/processing/image.py
index fd2cec9..9fe7628 100644
--- a/processing/image.py
+++ b/processing/image.py
@@ -11,10 +11,10 @@
 import numpy as np
 import gradio as gr
 import cv2
-from utils import path_handler
-import utils.gradio as gru
+from utils import gradio as gru, path_handler, dataclasses
 
 IMAGE_FOLDER = "images"
+
 default_path = os.path.join(path_handler.get_default_path(), IMAGE_FOLDER)
 
 
@@ -82,17 +82,19 @@ def add_background(image_pil: Image, draw: ImageDraw, position: tuple[int, int],
     bbox = draw.textbbox((0, 0), text, font=font)
     text_width = bbox[2] - bbox[0]
     text_height = bbox[3] - bbox[1]
-    left = position[0] - padding[0]
-    top = position[1] - padding[1]
-    right = left + text_width + 2 * padding[0]
-    bottom = top + text_height + 2 * padding[1]
+    rect_pos = dataclasses.FourEdges(left=position[0] - padding[0],
+                                     top=position[1] - padding[1],
+                                     right=(position[0] - padding[0]) + text_width + 2 * padding[0],
+                                     bottom=(position[1] - padding[1]) + text_height + 2 * padding[1])
 
     rect_img = Image.new('RGBA', image_pil.size, (0, 0, 0, 0))
     rect_draw = ImageDraw.Draw(rect_img)
-    rect_draw.rounded_rectangle([left, top, right, bottom], fill=fill_color, radius=border_radius)
+    rect_draw.rounded_rectangle([rect_pos.left, rect_pos.top, rect_pos.right, rect_pos.bottom], fill=fill_color,
+                                radius=border_radius)
     image_pil.paste(rect_img, (0, 0), rect_img)
 
-    return (left + padding[0], top + padding[1]), (right - left, bottom - top)
+    return ((rect_pos.left + padding[0], rect_pos.top + padding[1]),
+            (rect_pos.right - rect_pos.left, rect_pos.bottom - rect_pos.top))
 
 
 def add_blurred_shadow(image_pil: Image, text: str, position: tuple[float, float], font: ImageFont,
@@ -263,7 +265,6 @@ def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[i
 
     lines = wrapped_text.split('\n')
 
-    x_pos, y_pos = position
     y_offset = 0
     max_line_width = 0  # Keep track of the widest line
     total_height = 0  # Accumulate total height of text block
@@ -274,10 +275,10 @@ def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[i
         max_line_width = max(max_line_width, line_width)
         total_height += line_height
 
-        text_x = x_pos  # Adjusted to use numpy width
+        text_x = position[0]
         if x_center:
             text_x = (img_width - line_width) / 2
-        line_y = y_pos + y_offset
+        line_y = position[1] + y_offset
         y_offset += (line_height + 6)
 
         if show_background:
diff --git a/ui/listicles/interface.py b/ui/listicles/interface.py
index 664cd72..5dd1ab6 100644
--- a/ui/listicles/interface.py
+++ b/ui/listicles/interface.py
@@ -48,7 +48,7 @@ def set_json(json_file):
                     if not json_file:
                         gr.Warning("No JSON file uploaded. Reverse to default.")
                         return input_batch_json.value
-                    with open(json_file.name, "r") as file:
+                    with open(json_file.name, "r", encoding="utf-8") as file:
                         json_data = json.load(file)
                         json_data = json.dumps(json_data, indent=4)
 
diff --git a/ui/listicles/utils.py b/ui/listicles/utils.py
index b4c75b3..13a6e2b 100644
--- a/ui/listicles/utils.py
+++ b/ui/listicles/utils.py
@@ -3,24 +3,84 @@
 """
 import os
 import json
-from typing import Optional
+from typing import Optional, Any, List
 import gradio as gr
+import numpy as np
 import processing.image as image_processing
-from utils import font_manager, image as image_utils
+from utils import font_manager, image as image_utils, dataclasses
 import api.chatgpt as chatgpt_api
 
 
-def process(image_files, json_data,
-            nf_family, nf_style, nfs, nfc, nfo, nse, nsc, nso, nsr, nbe, nbc, nbo,
-            df_family, df_style, dfs, dfc, dfo, dse, dsc, dso, dsr, dbe, dbc, dbo,
-            mf_family, mf_style, mfs, mfc, mfo, mse, msc, mso, msr, mbe, mbc, mbo,
-            rf_family, rf_style, rfs, rfc, rfo, rse, rsc, rso, rsr, rbe, rbc, rbo):
+def process(image_files: list[Any], json_data: str,
+            nf_family: str, nf_style: str, nfs: int, nfc: dataclasses.RGBColor, nfo: int, nse: bool,
+            nsc: dataclasses.RGBColor, nso: int, nsr, nbe: bool, nbc: dataclasses.RGBColor, nbo: int,
+            df_family: str, df_style: str, dfs: int, dfc: dataclasses.RGBColor, dfo: int, dse: bool,
+            dsc: dataclasses.RGBColor, dso: int, dsr, dbe: bool, dbc: dataclasses.RGBColor, dbo: int,
+            mf_family: str, mf_style: str, mfs: int, mfc: dataclasses.RGBColor, mfo: int, mse: bool,
+            msc: dataclasses.RGBColor, mso: int, msr, mbe: bool, mbc: dataclasses.RGBColor, mbo: int,
+            rf_family: str, rf_style: str, rfs: int, rfc: dataclasses.RGBColor, rfo: int, rse: bool,
+            rsc: dataclasses.RGBColor, rso: int, rsr, rbe: bool, rbc: dataclasses.RGBColor, rbo: int) \
+        -> Optional[List[np.ndarray]]:
+    """
+    Processes the images and JSON data to generate the listicle images.
+    :param image_files: The list of images to process. This is a gradio File.
+    :param json_data: The JSON data to process.
+    :param nf_family: The font family for the name.
+    :param nf_style: The font style for the name.
+    :param nfs: The font size for the name.
+    :param nfc: The font color for the name.
+    :param nfo: The font opacity for the name.
+    :param nse: Whether to show the shadow for the name.
+    :param nsc: The shadow color for the name.
+    :param nso: The shadow opacity for the name.
+    :param nsr: The shadow radius for the name.
+    :param nbe: Whether to show the background for the name.
+    :param nbc: The background color for the name.
+    :param nbo: The background opacity for the name.
+    :param df_family: The font family for the description.
+    :param df_style: The font style for the description.
+    :param dfs: The font size for the description.
+    :param dfc: The font color for the description.
+    :param dfo: The font opacity for the description.
+    :param dse: Whether to show the shadow for the description.
+    :param dsc: The shadow color for the description.
+    :param dso: The shadow opacity for the description.
+    :param dsr: The shadow radius for the description.
+    :param dbe: Whether to show the background for the description.
+    :param dbc: The background color for the description.
+    :param dbo: The background opacity for the description.
+    :param mf_family: The font family for the association.
+    :param mf_style: The font style for the association.
+    :param mfs: The font size for the association.
+    :param mfc: The font color for the association.
+    :param mfo: The font opacity for the association.
+    :param mse: Whether to show the shadow for the association.
+    :param msc: The shadow color for the association.
+    :param mso: The shadow opacity for the association.
+    :param msr: The shadow radius for the association.
+    :param mbe: Whether to show the background for the association.
+    :param mbc: The background color for the association.
+    :param mbo: The background opacity for the association.
+    :param rf_family: The font family for the rating.
+    :param rf_style: The font style for the rating.
+    :param rfs: The font size for the rating.
+    :param rfc: The font color for the rating.
+    :param rfo: The font opacity for the rating.
+    :param rse: Whether to show the shadow for the rating.
+    :param rsc: The shadow color for the rating.
+    :param rso: The shadow opacity for the rating.
+    :param rsr: The shadow radius for the rating.
+    :param rbe: Whether to show the background for the rating.
+    :param rbc: The background color for the rating.
+    :param rbo: The background opacity for the rating.
+    :return: The list of processed images as numpy arrays. If there was an error, returns None.
+    """
     if not json_data:
         print("No JSON file uploaded.")
-        return
+        return None
     if not image_files:
         print("No images uploaded.")
-        return
+        return None
 
     font_families = font_manager.get_fonts()
     nff = font_families[nf_family][nf_style]
@@ -126,7 +186,8 @@ def validate_json(json_file: str) -> None:
         gr.Info("JSON is valid!")
 
 
-def send_artifacts_to_batch(listicle_images: gr.data_classes.RootModel, json_data: str) -> (list, str):
+def send_artifacts_to_batch(listicle_images: gr.data_classes.RootModel, json_data: str) \
+        -> (Optional[list], Optional[str]):
     """
     Sends the artifacts to the batch processing section.
     :param listicle_images: The list of images to send. This is a Gradio Gallery.
@@ -135,10 +196,10 @@ def send_artifacts_to_batch(listicle_images: gr.data_classes.RootModel, json_dat
     """
     if not listicle_images or len(listicle_images.root) == 0:
         gr.Warning("No images to send.")
-        return
+        return None, None
     if not json_data or len(json_data) == 0:
         gr.Warning("No JSON data to send.")
-        return
+        return None, None
     # Parse the listicle_images GalleryData to get file paths
     listicle_images = listicle_images.root
     listicle_images = [image.image.path for image in listicle_images]
@@ -163,12 +224,14 @@ def save_artifacts(listicle_images: gr.data_classes.RootModel, image_type: gr.Dr
     # Save the JSON data
     if save_dir is not None and save_dir != "":
         json_filepath = os.path.join(save_dir, "data.json")
-        with open(json_filepath, "w") as file:
+        with open(json_filepath, "w", encoding="utf-8") as file:
             json_data = json.loads(json_data)
             json.dump(json_data, file, indent=4)
 
         gr.Info(f"Saved generated artifacts to {save_dir}.")
 
+    return None
+
 
 def generate_listicle(api_key: str, api_text_model: str, api_image_model: str, number_of_items: int, topic: str,
                       association: str, rating_type: str, details: str = "", generate_artifacts: bool = False) \
@@ -226,8 +289,7 @@ def generate_listicle(api_key: str, api_text_model: str, api_image_model: str, n
         message = (f"Format the listicle into JSON. For the items, store as a list named 'items' with the content "
                    f"format: {json_format}.")
         if rating_type is not None and rating_type != "":
-            message += (f"Include a top-level field `rating_type: <string>` with what the rating "
-                        f"represents.")
+            message += "Include a top-level field `rating_type: <string>` with what the rating represents."
 
         listicle_json = chatgpt_api.get_chat_response(openai, json_model, role, prompt=message,
                                                       context=listicle_json_context, as_json=True)
diff --git a/ui/music/utils.py b/ui/music/utils.py
index eb6dd98..7bd5105 100644
--- a/ui/music/utils.py
+++ b/ui/music/utils.py
@@ -11,11 +11,9 @@
 from moviepy.editor import AudioFileClip
 import numpy as np
 import librosa
-from utils import font_manager
-import utils.image as image_utils
 from api import chatgpt as chatgpt_api
 from processing import image as image_processing
-from utils import progress, visualizer
+from utils import progress, visualizer, font_manager, image as image_utils, dataclasses
 
 
 def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]], np.ndarray):
@@ -50,15 +48,15 @@ def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]]
 def create_music_video(
         image_path: str, audio_path: str, fps: int,
         artist: str, artist_font_type: str, artist_font_style: str, artist_font_size: int,
-        artist_font_color: tuple[int, int, int], artist_font_opacity: int, artist_shadow_enabled: bool,
-        artist_shadow_color: tuple[int, int, int], artist_shadow_opacity: int, artist_shadow_radius: int,
-        artist_background_enabled: bool, artist_background_color: tuple[int, int, int], artist_background_opacity: int,
+        artist_font_color: dataclasses.RGBColor, artist_font_opacity: int, artist_shadow_enabled: bool,
+        artist_shadow_color: dataclasses.RGBColor, artist_shadow_opacity: int, artist_shadow_radius: int,
+        artist_background_enabled: bool, artist_background_color: dataclasses.RGBColor, artist_background_opacity: int,
         song: str, song_font_type: str, song_font_style: str, song_font_size: int,
-        song_font_color: tuple[int, int, int], song_font_opacity: int, song_shadow_enabled: bool,
-        song_shadow_color: tuple[int, int, int], song_shadow_opacity: int, song_shadow_radius: int,
-        song_background_enabled: bool, song_background_color: tuple[int, int, int], song_background_opacity: int,
-        background_color: tuple[int, int, int] = (0, 0, 0), background_opacity: int = 66,
-        generate_audio_visualizer: bool = False, audio_visualizer_color: tuple[int, int, int] =(255, 255, 255),
+        song_font_color: dataclasses.RGBColor, song_font_opacity: int, song_shadow_enabled: bool,
+        song_shadow_color: dataclasses.RGBColor, song_shadow_opacity: int, song_shadow_radius: int,
+        song_background_enabled: bool, song_background_color: dataclasses.RGBColor, song_background_opacity: int,
+        background_color: dataclasses.RGBColor = (0, 0, 0), background_opacity: int = 66,
+        generate_audio_visualizer: bool = False, audio_visualizer_color: dataclasses.RGBColor = (255, 255, 255),
         audio_visualizer_opacity: int = 100, visualizer_drawing: Optional[str] = None,
         audio_visualizer_num_rows: int = 90, audio_visualizer_num_columns: int = 65, audio_visualizer_min_size: int = 1,
         audio_visualizer_max_size: int = 7) -> Optional[str]:
@@ -342,10 +340,10 @@ def generate_cover_image(api_key: str, api_model: str, prompt: str) -> Optional[
 
 
 def process(image_path: str, artist: str, song: str,
-            af_family: str, af_style: str, afs: int, afc: tuple[int, int, int], afo: int, ase: bool,
-            asc: tuple[int, int, int], aso: int, asr: Optional[int], abe: bool, abc: tuple[int, int, int], abo: int,
-            sf_family: str, sf_style: str, sfs: int, sfc: tuple[int, int, int], sfo: int, sse: bool,
-            ssc: tuple[int, int, int], sso: int, ssr: Optional[int], sbe: bool, sbc: tuple[int, int, int], sbo: int) \
+            af_family: str, af_style: str, afs: int, afc: dataclasses.RGBColor, afo: int, ase: bool,
+            asc: dataclasses.RGBColor, aso: int, asr: Optional[int], abe: bool, abc: dataclasses.RGBColor, abo: int,
+            sf_family: str, sf_style: str, sfs: int, sfc: dataclasses.RGBColor, sfo: int, sse: bool,
+            ssc: dataclasses.RGBColor, sso: int, ssr: Optional[int], sbe: bool, sbc: dataclasses.RGBColor, sbo: int) \
         -> Optional[np.ndarray]:
     """
     Processes the image at the given path (by adding the requested text) and returns the processed image.
diff --git a/utils/dataclasses.py b/utils/dataclasses.py
new file mode 100644
index 0000000..655e7dd
--- /dev/null
+++ b/utils/dataclasses.py
@@ -0,0 +1,28 @@
+"""
+This module contains dataclasses and type aliases used in the project.
+"""
+from typing import Union
+from dataclasses import dataclass
+
+
+@dataclass
+class FourEdges:
+    """
+    A dataclass representing the four edges of a rectangle.
+    """
+    top: int
+    bottom: int
+    left: int
+    right: int
+
+
+@dataclass
+class Position:
+    """
+    A dataclass representing a position on a 2d plane.
+    """
+    x: int
+    y: int
+
+
+RGBColor = Union[str, tuple[int, int, int]]
diff --git a/utils/font_manager.py b/utils/font_manager.py
index 4074ec1..f6dcbce 100644
--- a/utils/font_manager.py
+++ b/utils/font_manager.py
@@ -27,7 +27,7 @@ def initialize_inflect() -> inflect.engine:
     Initializes the inflect engine.
     :return: The inflect engine.
     """
-    global P
+    global P  # pylint: disable=global-statement
     if P is None:
         P = inflect.engine()
 
@@ -39,7 +39,7 @@ def initialize_fonts() -> NestedDict:
     Initializes the font families from the global FONTS_DIRS.
     :return: The font families and their paths. They are called by map[font_family][font_style].
     """
-    global FONT_FAMILIES
+    global FONT_FAMILIES  # pylint: disable=global-statement
 
     font_files = []
     # Add TrendGenie fonts
@@ -76,7 +76,7 @@ def get_fonts() -> NestedDict:
     Gets the font families. If they are not initialized, it initializes them.
     :return: The font families and their paths. They are called by map[font_family][font_style].
     """
-    global FONT_FAMILIES
+    global FONT_FAMILIES  # pylint: disable=global-statement
     if FONT_FAMILIES is None:
         FONT_FAMILIES = initialize_fonts()
 
@@ -88,7 +88,7 @@ def get_inflect() -> inflect.engine:
     Gets the inflect engine. If it is not initialized, it initializes it.
     :return: The inflect engine.
     """
-    global P
+    global P  # pylint: disable=global-statement
     if P is None:
         P = initialize_inflect()
 
diff --git a/utils/image.py b/utils/image.py
index 0204e57..b070a04 100644
--- a/utils/image.py
+++ b/utils/image.py
@@ -1,7 +1,8 @@
 """
 This file contains functions for image processing.
 """
-from typing import Tuple, Union
+from typing import Tuple
+from utils import dataclasses
 
 
 def get_alpha_from_opacity(opacity: int) -> int:
@@ -14,7 +15,7 @@ def get_alpha_from_opacity(opacity: int) -> int:
     return int(opacity * 255 / 100)
 
 
-def get_rgba(color: Union[str, Tuple[int, int, int]], opacity: int) -> Tuple[int, int, int, int]:
+def get_rgba(color: dataclasses.RGBColor, opacity: int) -> Tuple[int, int, int, int]:
     """
      Gets the RGBA value for a given color and opacity.
      :param color: The color to use. Either a hex string or a tuple of RGB values.
@@ -29,7 +30,7 @@ def get_rgba(color: Union[str, Tuple[int, int, int]], opacity: int) -> Tuple[int
     return color[0], color[1], color[2], get_alpha_from_opacity(opacity)
 
 
-def get_bgra(color: Union[str, Tuple[int, int, int]], opacity: int) -> Tuple[int, int, int, int]:
+def get_bgra(color: dataclasses.RGBColor, opacity: int) -> Tuple[int, int, int, int]:
     """
      Gets the BGRA value for a given color and opacity.
      :param color: The color to use. Either a hex string or a tuple of BGR values.
diff --git a/utils/path_handler.py b/utils/path_handler.py
index 58251cb..7bf6228 100644
--- a/utils/path_handler.py
+++ b/utils/path_handler.py
@@ -12,7 +12,7 @@ def get_default_path() -> str:
     Gets the default path for saving files, which is the user's home directory under a folder called "trendgenie".
     :return: The default path.
     """
-    global DEFAULT_PATH
+    global DEFAULT_PATH  # pylint: disable=global-statement
     if DEFAULT_PATH is None:
         homepath = Path.home()
         DEFAULT_PATH = os.path.join(homepath, "trendgenie")
diff --git a/utils/progress.py b/utils/progress.py
index 52569ba..5cb5cd8 100644
--- a/utils/progress.py
+++ b/utils/progress.py
@@ -1,9 +1,22 @@
+"""
+This module contains a function to print a progress bar to the console.
+"""
+
 import time
 from typing import Optional
 
 
 def print_progress_bar(current_iteration: int, total_iterations: int, bar_length: int = 50,
-                       start_time: Optional[float] = None, end: str = ''):
+                       start_time: Optional[float] = None, end: str = '') -> None:
+    """
+    Prints a progress bar to the console.
+    :param current_iteration: The current iteration.
+    :param total_iterations: The total number of iterations. Used to calculate the percentage.
+    :param bar_length: The length of the progress bar.
+    :param start_time: The time the process started. Used to calculate the elapsed time.
+    :param end: The end character to use. Defaults to '' which prints all in the same line.
+    :return: None
+    """
     progress_percentage = (current_iteration / total_iterations) * 100
     completed_length = int(bar_length * current_iteration // total_iterations)
     progress_bar = '█' * completed_length + '░' * (bar_length - completed_length)
@@ -23,4 +36,5 @@ def print_progress_bar(current_iteration: int, total_iterations: int, bar_length
     time_string = ''
     if estimated_remaining_time is not None and iterations_per_sec is not None:
         time_string = f'[{elapsed_time:.2f}s/{estimated_remaining_time:.2f}s, {iterations_per_sec:.2f}it/s]'
-    print(f'\r{progress_percentage:3.0f}%|{progress_bar}| {current_iteration}/{total_iterations} {time_string}', end=end, flush=True)
+    print(f'\r{progress_percentage:3.0f}%|{progress_bar}| {current_iteration}/{total_iterations} {time_string}',
+          end=end, flush=True)
diff --git a/utils/visualizer.py b/utils/visualizer.py
index e32e7bf..88619ba 100644
--- a/utils/visualizer.py
+++ b/utils/visualizer.py
@@ -67,7 +67,7 @@ def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict
             upper_bound = log_freqs[x + 1] if x < dot_count_x - 1 else end_freq + 1
             band_freqs = [freq for freq in freq_keys if lower_bound <= freq < upper_bound]
             if not band_freqs:
-                closest_freq = min(freq_keys, key=lambda f: abs(f - lower_bound))
+                closest_freq = min(freq_keys, key=lambda f, lb=lower_bound: abs(f - lb))
                 band_freqs = [closest_freq]
 
             band_loudness = [frequency_data[freq] for freq in band_freqs]

From 6593f0ee6813c89f7debee1380525de3d974aac6 Mon Sep 17 00:00:00 2001
From: Fabian Gonzalez <fabiangonz98@gmail.com>
Date: Mon, 15 Jan 2024 00:20:51 -0500
Subject: [PATCH 11/13] Add requirements installation when linting

---
 .github/workflows/pylint.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index 2d4b561..aa949d3 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -15,6 +15,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
+        pip install -r requirements.txt
         pip install pylint
     - name: Analysing the code with pylint
       run: |

From 72d3eb88658343c38f4e552355b1ec72eba48c05 Mon Sep 17 00:00:00 2001
From: Fabian Gonzalez <fabiangonz98@gmail.com>
Date: Mon, 15 Jan 2024 11:39:07 -0500
Subject: [PATCH 12/13] Flip row/col in visualizer

---
 processing/image.py       |  61 ++++++++++----------
 ui/components/openai.py   |   7 ++-
 ui/listicles/interface.py |  48 ++++++++++------
 ui/listicles/utils.py     |  38 +++++-------
 ui/music/interface.py     |  49 ++++++++++------
 ui/music/utils.py         | 118 +++++++++++++++++++++-----------------
 utils/dataclasses.py      |  82 +++++++++++++++++++++++++-
 utils/gradio.py           |   7 +--
 utils/visualizer.py       |  37 ++++++------
 9 files changed, 273 insertions(+), 174 deletions(-)

diff --git a/processing/image.py b/processing/image.py
index 9fe7628..eb97bc1 100644
--- a/processing/image.py
+++ b/processing/image.py
@@ -35,32 +35,32 @@ def render_image_output() -> (gr.Image, gr.Textbox, gr.Dropdown, gr.Button):
     return image_output, image_name, image_suffix, save_image_button
 
 
-def render_text_editor_parameters(name: str) -> ((gr.Dropdown, gr.Dropdown, gr.ColorPicker, gr.Slider, gr.Number),
-                                                 (gr.Checkbox, gr.ColorPicker, gr.Slider, gr.Number),
-                                                 (gr.Checkbox, gr.ColorPicker, gr.Slider)):
+def render_text_editor_parameters(name: str) -> (dataclasses.FontGradioComponents,
+                                                 dataclasses.FontDropShadowGradioComponents,
+                                                 dataclasses.FontBackgroundGradioComponents):
     """
     Renders the text editor parameters.
     :param name: The name of the text editor parameters. This is used as the label for the accordion.
-    :return: A tuple containing the font, drop shadow, and background components.
+    :return: Classes containing the font, drop shadow, and background components.
     """
     with gr.Accordion(label=name):
         with gr.Column():
-            font_family, font_style, font_color, font_opacity, font_size = gru.render_font_picker()
+            font_data = gru.render_font_picker()
             with gr.Group():
-                drop_shadow_checkbox = gr.Checkbox(False, label="Enable Drop Shadow", interactive=True)
-                with gr.Group(visible=drop_shadow_checkbox.value) as additional_options:
+                drop_shadow_enabled = gr.Checkbox(False, label="Enable Drop Shadow", interactive=True)
+                with gr.Group(visible=drop_shadow_enabled.value) as additional_options:
                     drop_shadow_color, drop_shadow_opacity = gru.render_color_opacity_picker()
                     drop_shadow_radius = gr.Number(0, label="Shadow Radius")
-                    gru.bind_checkbox_to_visibility(drop_shadow_checkbox, additional_options)
+                    gru.bind_checkbox_to_visibility(drop_shadow_enabled, additional_options)
             with gr.Group():
-                background_checkbox = gr.Checkbox(False, label="Enable Background", interactive=True)
-                with gr.Group(visible=background_checkbox.value) as additional_options:
+                background_enabled = gr.Checkbox(False, label="Enable Background", interactive=True)
+                with gr.Group(visible=background_enabled.value) as additional_options:
                     background_color, background_opacity = gru.render_color_opacity_picker()
-                    gru.bind_checkbox_to_visibility(background_checkbox, additional_options)
+                    gru.bind_checkbox_to_visibility(background_enabled, additional_options)
 
-    return ((font_family, font_style, font_size, font_color, font_opacity),
-            (drop_shadow_checkbox, drop_shadow_color, drop_shadow_opacity, drop_shadow_radius),
-            (background_checkbox, background_color, background_opacity))
+    return (font_data, dataclasses.FontDropShadowGradioComponents(drop_shadow_enabled, drop_shadow_color,
+                                                                  drop_shadow_opacity, drop_shadow_radius),
+            dataclasses.FontBackgroundGradioComponents(background_enabled, background_color, background_opacity))
 
 
 def add_background(image_pil: Image, draw: ImageDraw, position: tuple[int, int], text: str, font: ImageFont,
@@ -256,8 +256,6 @@ def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[i
     font = ImageFont.truetype(font_path, font_size)
     draw = ImageDraw.Draw(txt_layer)
 
-    img_width, _ = image_pil.size
-
     if max_width:  # Prepare for text wrapping if max_width is provided
         wrapped_text = textwrap.fill(text, width=max_width)
     else:
@@ -266,31 +264,32 @@ def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[i
     lines = wrapped_text.split('\n')
 
     y_offset = 0
-    max_line_width = 0  # Keep track of the widest line
-    total_height = 0  # Accumulate total height of text block
+    # max_line_width = 0  # Keep track of the widest line
+    # total_height = 0  # Accumulate total height of text block
+    text_container = dataclasses.Size(width=0, height=0)
     for line in lines:
         bbox = draw.textbbox((0, 0), line, font=font)
-        line_width = bbox[2] - bbox[0]
-        line_height = bbox[3] - bbox[1]
-        max_line_width = max(max_line_width, line_width)
-        total_height += line_height
+        line_size = dataclasses.Size(width=bbox[2] - bbox[0], height=bbox[3] - bbox[1])
+        text_container.width = max(text_container.width, line_size.width)
+        text_container.height += line_size.height
 
-        text_x = position[0]
+        pos = dataclasses.Position
+        pos.x = position[0]
         if x_center:
-            text_x = (img_width - line_width) / 2
-        line_y = position[1] + y_offset
-        y_offset += (line_height + 6)
+            pos.x = (image_pil.width - line_size.width) / 2
+        pos.y = position[1] + y_offset
+        y_offset += (line_size.height + 6)
 
         if show_background:
-            (text_x, line_y), _ = add_background(image_pil, draw, (text_x, line_y), line, font,
-                                                 fill_color=background_color, border_radius=10)
+            (pos.x, pos.y), _ = add_background(image_pil, draw, (pos.x, pos.y), line, font,
+                                               fill_color=background_color, border_radius=10)
 
         if show_shadow:
-            shadow_position = (text_x, line_y)
+            shadow_position = (pos.x, pos.y)
             add_blurred_shadow(image_pil, line, shadow_position, font, shadow_color=shadow_color,
                                blur_radius=shadow_radius)
 
-        draw.text((text_x, line_y), line, font=font, fill=font_color)
+        draw.text((pos.x, pos.y), line, font=font, fill=font_color)
 
     image_pil = Image.alpha_composite(image_pil, txt_layer)
-    return np.array(image_pil), (max_line_width, total_height)
+    return np.array(image_pil), (text_container.width, text_container.height)
diff --git a/ui/components/openai.py b/ui/components/openai.py
index 4fec6bf..8470d54 100644
--- a/ui/components/openai.py
+++ b/ui/components/openai.py
@@ -2,15 +2,16 @@
 This module contains ui components for the OpenAI API.
 """
 import gradio as gr
+from utils import dataclasses
 
 
 def render_openai_setup(show_text_model: bool = True, show_image_model: bool = True) \
-        -> (gr.Textbox, gr.Dropdown, gr.Dropdown):
+        -> dataclasses.OpenAIGradioComponents:
     """
     Renders the OpenAI API setup components.
     :param show_text_model: Whether to show the text model dropdown.
     :param show_image_model: Whether to show the image model dropdown.
-    :return: A tuple containing the API key, text model, and image model components.
+    :return: A class containing the API key, text model, and image model components.
     """
     api_text_model = None
     api_image_model = None
@@ -25,4 +26,4 @@ def render_openai_setup(show_text_model: bool = True, show_image_model: bool = T
             api_image_model = gr.Dropdown(["dall-e-2", "dall-e-3"], label="API Image Model", value="dall-e-2",
                                           interactive=True)
 
-    return api_key, api_text_model, api_image_model
+    return dataclasses.OpenAIGradioComponents(api_key, api_text_model, api_image_model)
diff --git a/ui/listicles/interface.py b/ui/listicles/interface.py
index 5dd1ab6..bb15613 100644
--- a/ui/listicles/interface.py
+++ b/ui/listicles/interface.py
@@ -81,15 +81,11 @@ def set_json(json_file):
         with gr.Column(scale=3):
             gr.Markdown("# Parameters")
             with gr.Row(equal_height=False):
-                (nf_family, nf_style, nfs, nfc, nfo), (nse, nsc, nso, nsr), (
-                    nbe, nbc, nbo) = image_processing.render_text_editor_parameters("Name")
-                (df_family, df_style, dfs, dfc, dfo), (dse, dsc, dso, dsr), (
-                    dbe, dbc, dbo) = image_processing.render_text_editor_parameters("Description")
+                name_font, name_shadow, name_background = image_processing.render_text_editor_parameters("Name")
+                desc_font, desc_shadow, desc_background = image_processing.render_text_editor_parameters("Description")
             with gr.Row(equal_height=False):
-                (mf_family, mf_style, mfs, mfc, mfo), (mse, msc, mso, msr), (
-                    mbe, mbc, mbo) = image_processing.render_text_editor_parameters("Association")
-                (rf_family, rf_style, rfs, rfc, rfo), (rse, rsc, rso, rsr), (
-                    rbe, rbc, rbo) = image_processing.render_text_editor_parameters("Rating")
+                asc_font, asc_shadow, asc_background = image_processing.render_text_editor_parameters("Association")
+                rate_font, rate_shadow, rate_background = image_processing.render_text_editor_parameters("Rating")
 
         with gr.Column(scale=1):
             gr.Markdown("# Output")
@@ -103,14 +99,26 @@ def set_json(json_file):
     save_button.click(image_processing.save_images_to_disk, inputs=[output_preview, image_type],
                       outputs=[])
     process_button.click(listicle_utils.process, inputs=[input_batch_images, input_batch_json,
-                                                         nf_family, nf_style, nfs, nfc, nfo, nse, nsc, nso, nsr, nbe,
-                                                         nbc, nbo,
-                                                         df_family, df_style, dfs, dfc, dfo, dse, dsc, dso, dsr, dbe,
-                                                         dbc, dbo,
-                                                         mf_family, mf_style, mfs, mfc, mfo, mse, msc, mso, msr, mbe,
-                                                         mbc, mbo,
-                                                         rf_family, rf_style, rfs, rfc, rfo, rse, rsc, rso, rsr, rbe,
-                                                         rbc, rbo
+                                                         name_font.family, name_font.style, name_font.size,
+                                                         name_font.color, name_font.opacity, name_shadow.enabled,
+                                                         name_shadow.color, name_shadow.opacity, name_shadow.radius,
+                                                         name_background.enabled, name_background.color,
+                                                         name_background.opacity,
+                                                         desc_font.family, desc_font.style, desc_font.size,
+                                                         desc_font.color, desc_font.opacity, desc_shadow.enabled,
+                                                         desc_shadow.color, desc_shadow.opacity, desc_shadow.radius,
+                                                         desc_background.enabled, desc_background.color,
+                                                         desc_background.opacity,
+                                                         asc_font.family, asc_font.style, asc_font.size,
+                                                         asc_font.color, asc_font.opacity, asc_shadow.enabled,
+                                                         asc_shadow.color, asc_shadow.opacity, asc_shadow.radius,
+                                                         asc_background.enabled, asc_background.color,
+                                                         asc_background.opacity,
+                                                         rate_font.family, rate_font.style, rate_font.size,
+                                                         rate_font.color, rate_font.opacity, rate_shadow.enabled,
+                                                         rate_shadow.color, rate_shadow.opacity, rate_shadow.radius,
+                                                         rate_background.enabled, rate_background.color,
+                                                         rate_background.opacity,
                                                          ], outputs=[output_preview])
 
     return input_batch_images, input_batch_json
@@ -121,7 +129,8 @@ def render_generate_section() -> (gr.Button, gr.Gallery, gr.Code):
     Renders the Generate Artifacts section of the web app.
     :return: The send artifacts to batch button, the listicle image output gallery, and the listicle json output.
     """
-    api_key, api_text_model, api_image_model = openai_components.render_openai_setup()
+    # api_key, api_text_model, api_image_model = openai_components.render_openai_setup()
+    open_ai_components = openai_components.render_openai_setup()
     with gr.Row(equal_height=False):
         with gr.Group():
             with gr.Group():
@@ -159,8 +168,9 @@ def render_generate_section() -> (gr.Button, gr.Gallery, gr.Code):
                 send_artifacts_to_batch_button = gr.Button("Send Artifacts to 'Batch Processing'",
                                                            variant="secondary")
         generate_listicle_button.click(listicle_utils.generate_listicle,
-                                       inputs=[api_key, api_text_model, api_image_model, num_items, topic,
-                                               association, rating_type, details, generate_artifacts],
+                                       inputs=[open_ai_components.api_key, open_ai_components.api_text_model,
+                                               open_ai_components.api_image_model, num_items, topic, association,
+                                               rating_type, details, generate_artifacts],
                                        outputs=[listicle_output, listicle_json_output, listicle_image_output])
         download_artifacts_button.click(
             listicle_utils.save_artifacts,
diff --git a/ui/listicles/utils.py b/ui/listicles/utils.py
index 13a6e2b..747c6c4 100644
--- a/ui/listicles/utils.py
+++ b/ui/listicles/utils.py
@@ -83,24 +83,16 @@ def process(image_files: list[Any], json_data: str,
         return None
 
     font_families = font_manager.get_fonts()
-    nff = font_families[nf_family][nf_style]
-    dff = font_families[df_family][df_style]
-    mff = font_families[mf_family][mf_style]
-    rff = font_families[rf_family][rf_style]
 
     images = []
 
-    rating_offset = 34
-    text_offset = 49
     json_data = json.loads(json_data)
-
     if len(image_files) != len(json_data["items"]):
         gr.Warning(
             f"Number of images ({len(image_files)}) does not match the number of items in the JSON ({len(json_data)}).")
 
     # We skip any entries that don't have an image field.
-    json_data_items = json_data["items"]
-    json_dict = {item["image"]: item for item in json_data_items if "image" in item}
+    json_dict = {item["image"]: item for item in json_data["items"] if "image" in item}
 
     for image_file in image_files:
         img_name = os.path.basename(image_file.name)
@@ -112,31 +104,31 @@ def process(image_files: list[Any], json_data: str,
         img = image_processing.read_image_from_disk(image_file.name, size=(1080, 1920))
         item = json_dict[img_name]
 
-        # Calculate positions for the text
+        # Calculate y-positions for the text
         top_center = (0, int(img.shape[0] * 0.13))
         bottom_center = (0, int(img.shape[0] * 0.70))
 
         # Add association and rating at the top center, one above the other
-        img, (_, association_height) = image_processing.add_text(img, item["association"], top_center, mff,
-                                                                 font_size=mfs,
+        img, (_, association_height) = image_processing.add_text(img, item["association"], top_center,
+                                                                 font_families[mf_family][mf_style], font_size=mfs,
                                                                  font_color=image_utils.get_rgba(mfc, mfo),
-                                                                 show_shadow=mse,
-                                                                 shadow_radius=msr,
+                                                                 show_shadow=mse, shadow_radius=msr,
                                                                  shadow_color=image_utils.get_rgba(msc, mso),
                                                                  show_background=mbe,
                                                                  background_color=image_utils.get_rgba(mbc, mbo),
                                                                  x_center=True)
 
         img, (_, _) = image_processing.add_text(img, f'{json_data["rating_type"]}: {item["rating"]}%',
-                                                (0, top_center[1] + association_height + rating_offset),
-                                                rff, font_size=rfs, font_color=image_utils.get_rgba(rfc, rfo),
-                                                show_shadow=rse, shadow_radius=rsr,
-                                                shadow_color=image_utils.get_rgba(rsc, rso),
+                                                (0, top_center[1] + association_height + 34),
+                                                font_families[rf_family][rf_style], font_size=rfs,
+                                                font_color=image_utils.get_rgba(rfc, rfo), show_shadow=rse,
+                                                shadow_radius=rsr, shadow_color=image_utils.get_rgba(rsc, rso),
                                                 show_background=rbe, background_color=image_utils.get_rgba(rbc, rbo),
                                                 x_center=True)
 
         # Add name and description at the bottom center, one above the other
-        img, (_, name_height) = image_processing.add_text(img, item["name"], bottom_center, nff, font_size=nfs,
+        img, (_, name_height) = image_processing.add_text(img, item["name"], bottom_center,
+                                                          font_families[nf_family][nf_style], font_size=nfs,
                                                           font_color=image_utils.get_rgba(nfc, nfo),
                                                           max_width=15,
                                                           show_shadow=nse, shadow_radius=nsr,
@@ -145,10 +137,10 @@ def process(image_files: list[Any], json_data: str,
                                                           background_color=image_utils.get_rgba(nbc, nbo),
                                                           x_center=True)
         img, (_, _) = image_processing.add_text(img, f'"{item["description"]}"',
-                                                (0, bottom_center[1] + name_height + text_offset), dff,
-                                                font_size=dfs, font_color=image_utils.get_rgba(dfc, dfo),
-                                                show_shadow=dse, shadow_radius=dsr,
-                                                shadow_color=image_utils.get_rgba(dsc, dso),
+                                                (0, bottom_center[1] + name_height + 49),
+                                                font_families[df_family][df_style], font_size=dfs,
+                                                font_color=image_utils.get_rgba(dfc, dfo), show_shadow=dse,
+                                                shadow_radius=dsr, shadow_color=image_utils.get_rgba(dsc, dso),
                                                 show_background=dbe, background_color=image_utils.get_rgba(dbc, dbo),
                                                 max_width=43, x_center=True)
 
diff --git a/ui/music/interface.py b/ui/music/interface.py
index 3361495..8808022 100644
--- a/ui/music/interface.py
+++ b/ui/music/interface.py
@@ -41,7 +41,7 @@ def render_generate_cover() -> (gr.Button, gr.Button, gr.Image):
         sending the generated cover image to the "Add Text to Image" section, and an image display component for
         displaying the generated cover image.
     """
-    api_key, _, api_image_model = openai_components.render_openai_setup(show_text_model=False)
+    open_ai_components = openai_components.render_openai_setup(show_text_model=False)
     with gr.Row(equal_height=False):
         with gr.Group():
             image_prompt = gr.Textbox(label="Image Prompt", lines=6, max_lines=10)
@@ -54,7 +54,8 @@ def render_generate_cover() -> (gr.Button, gr.Button, gr.Image):
                     send_to_process_button = gr.Button("Send Image to 'Add Text to Image'", variant="secondary")
                     send_to_create_video_button = gr.Button("Send Image to 'Create Music Video'", variant="secondary")
 
-    generate_image_button.click(generate_cover_image, inputs=[api_key, api_image_model, image_prompt],
+    generate_image_button.click(generate_cover_image, inputs=[open_ai_components.api_key,
+                                                              open_ai_components.api_image_model, image_prompt],
                                 outputs=[image_output])
     save_image_button.click(image_processing.save_image_to_disk, inputs=[image_output, image_name, image_suffix],
                             outputs=[])
@@ -70,7 +71,7 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image):
         display component for displaying the cover image before processing, and an image display component for
         displaying the cover image after processing.
     """
-    with gr.Column():
+    with (gr.Column()):
         gr.Markdown("## Input")
         with gr.Group():
             input_image = gr.Image(sources=["upload"], label="Cover Image (png)", type="filepath",
@@ -80,13 +81,13 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image):
         with gr.Row(equal_height=False):
             with gr.Group():
                 artist_name = gr.Textbox(label="Artist Name", lines=1, max_lines=1, scale=1)
-                (af_family, af_style, afs, afc, afo), (ase, asc, aso, asr), (
-                    abe, abc, abo) = image_processing.render_text_editor_parameters("Artist Text Parameters")
+                artist_font, artist_shadow, artist_background = image_processing.render_text_editor_parameters(
+                    "Artist Text Parameters")
 
             with gr.Group():
                 song_name = gr.Textbox(label="Song Title", lines=1, max_lines=1, scale=2)
-                (sf_family, sf_style, sfs, sfc, sfo), (sse, ssc, sso, ssr), (
-                    sbe, sbc, sbo) = image_processing.render_text_editor_parameters("Song Text Parameters")
+                song_font, song_shadow, song_background = \
+                    image_processing.render_text_editor_parameters("Song Text Parameters")
 
         process_button = gr.Button("Process", variant="primary")
 
@@ -97,8 +98,14 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image):
                 send_to_create_video_button = gr.Button("Send Image to 'Create Music Video'", variant="secondary")
 
     process_button.click(process, inputs=[input_image, artist_name, song_name,
-                                          af_family, af_style, afs, afc, afo, ase, asc, aso, asr, abe, abc, abo,
-                                          sf_family, sf_style, sfs, sfc, sfo, sse, ssc, sso, ssr, sbe, sbc, sbo],
+                                          artist_font.family, artist_font.style, artist_font.size, artist_font.color,
+                                          artist_font.opacity, artist_shadow.enabled, artist_shadow.color,
+                                          artist_shadow.opacity, artist_shadow.radius, artist_background.enabled,
+                                          artist_background.color, artist_background.opacity, song_font.family,
+                                          song_font.style, song_font.size, song_font.color, song_font.opacity,
+                                          song_shadow.enabled, song_shadow.color, song_shadow.opacity,
+                                          song_shadow.radius, song_background.enabled, song_background.color,
+                                          song_background.opacity],
                          outputs=[image_output])
     save_image_button.click(image_processing.save_image_to_disk,
                             inputs=[image_output, image_name, image_suffix], outputs=[])
@@ -120,12 +127,11 @@ def render_music_video_creation() -> gr.Image:
         background_color, background_opacity = gru.render_color_opacity_picker(default_name_label="Background")
         with gr.Group():
             artist_name = gr.Textbox(label="Artist Name", lines=1, max_lines=1, scale=1)
-            (artist_ffamily, artist_fstyle, artist_fsize, artist_fcolor, artist_fopacity), (ase, asc, aso, asr), (
-                abe, abc, abo) = image_processing.render_text_editor_parameters("Text Parameters")
+            artist_font, artist_shadow, artist_background = \
+                image_processing.render_text_editor_parameters("Text Parameters")
         with gr.Group():
             song_title = gr.Textbox(label="Song Title", lines=1, max_lines=1, scale=2)
-            (song_ffamily, song_fstyle, song_fsize, song_fcolor, song_fopacity), (sse, ssc, sso, ssr), (
-                sbe, sbc, sbo) = image_processing.render_text_editor_parameters("Text Parameters")
+            song_font, song_shadow, song_background = image_processing.render_text_editor_parameters("Text Parameters")
         with gr.Column():
             # Defaulting to 1. It's a still image, but may expand by adding some effects (grain, and not sure what else)
             fps = gr.Number(value=1, label="FPS", minimum=1, maximum=144)
@@ -159,12 +165,17 @@ def render_music_video_creation() -> gr.Image:
     with gr.Group():
         video_output, video_name, video_suffix, save_video_button = video_processing.render_video_output()
 
-    create_video_button.click(create_music_video, inputs=[cover_image, audio_filepath, fps,
-                                                          artist_name, artist_ffamily, artist_fstyle, artist_fsize,
-                                                          artist_fcolor, artist_fopacity, ase, asc, aso, asr, abe, abc,
-                                                          abo, song_title, song_ffamily, song_fstyle, song_fsize,
-                                                          song_fcolor, song_fopacity, sse, ssc, sso, ssr, sbe, sbc, sbo,
-                                                          background_color, background_opacity,
+    create_video_button.click(create_music_video, inputs=[cover_image, audio_filepath, fps, artist_name,
+                                                          artist_font.family, artist_font.style, artist_font.size,
+                                                          artist_font.color, artist_font.opacity, artist_shadow.enabled,
+                                                          artist_shadow.color, artist_shadow.opacity,
+                                                          artist_shadow.radius, artist_background.enabled,
+                                                          artist_background.color, artist_background.opacity,
+                                                          song_title, song_font.family, song_font.style, song_font.size,
+                                                          song_font.color, song_font.opacity, song_shadow.enabled,
+                                                          song_shadow.color, song_shadow.opacity, song_shadow.radius,
+                                                          song_background.enabled, song_background.color,
+                                                          song_background.opacity, background_color, background_opacity,
                                                           generate_audio_visualizer_button, audio_visualizer_color,
                                                           audio_visualizer_opacity, audio_visualizer_drawing,
                                                           audio_visualizer_num_rows, audio_visualizer_num_columns,
diff --git a/ui/music/utils.py b/ui/music/utils.py
index 7bd5105..10726b9 100644
--- a/ui/music/utils.py
+++ b/ui/music/utils.py
@@ -45,6 +45,48 @@ def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]]
     return downsampled_frequency_loudness, downsampled_times
 
 
+def _audio_visualizer_generator(frame_size: dataclasses.Size, audio_path: str, audio_length: int, fps: int,
+                                audio_visualizer: dataclasses.RGBOpacity, dot_size: dataclasses.MinMax,
+                                dot_count: dataclasses.RowCol, visualizer_drawing: Optional[str] = None) -> str:
+    print("Generating audio visualizer...")
+
+    audio_visualizer_color_and_opacity = image_utils.get_rgba(audio_visualizer.color, audio_visualizer.opacity)
+
+    custom_drawing = None
+    if visualizer_drawing is not None and visualizer_drawing != "":
+        custom_drawing = cv2.imread(visualizer_drawing, cv2.IMREAD_UNCHANGED)
+        if custom_drawing.shape[2] == 3:
+            custom_drawing = cv2.cvtColor(custom_drawing, cv2.COLOR_BGR2RGBA)
+        else:
+            custom_drawing = cv2.cvtColor(custom_drawing, cv2.COLOR_BGRA2RGBA)
+
+    frequency_loudness, times = analyze_audio(audio_path, fps)
+    frame_cache = np.zeros((frame_size.height, frame_size.width, 4), dtype=np.uint8)
+
+    total_iterations = len(times)
+    start_time = time.time()
+    vis = visualizer.Visualizer(size=dataclasses.Size(frame_size.width, frame_size.height),
+                                dot_size=dot_size, color=audio_visualizer_color_and_opacity,
+                                dot_count=dataclasses.RowCol(dot_count.row, dot_count.col))
+    vis.initialize_static_values()
+    temp_visualizer_images_dir = tempfile.mkdtemp()
+    os.makedirs(temp_visualizer_images_dir, exist_ok=True)
+    for i, time_point in enumerate(times):
+        if time_point > audio_length:
+            break
+        frame = frame_cache.copy()
+        vis.draw_visualizer(frame, frequency_loudness[i], custom_drawing=custom_drawing)
+        frame_np = np.array(frame)
+        frame_np = cv2.cvtColor(frame_np, cv2.COLOR_RGBA2BGRA)
+        frame_filename = f'{temp_visualizer_images_dir}/frame_{i:05d}.png'
+        cv2.imwrite(frame_filename, frame_np)
+
+        progress.print_progress_bar(i, total_iterations, start_time=start_time)
+    progress.print_progress_bar(total_iterations, total_iterations, end='\n', start_time=start_time)
+
+    return temp_visualizer_images_dir
+
+
 def create_music_video(
         image_path: str, audio_path: str, fps: int,
         artist: str, artist_font_type: str, artist_font_style: str, artist_font_size: int,
@@ -112,7 +154,7 @@ def create_music_video(
 
     # Could probably expand to 4k, but unnecessary for this type of music video
     # Maybe in a future iteration it could be worth it
-    width, height = 1920, 1080
+    frame_size = dataclasses.Size(1920, 1080)
 
     # Set up cover
     cover = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
@@ -122,33 +164,32 @@ def create_music_video(
         cover = cv2.cvtColor(cover, cv2.COLOR_BGRA2RGBA)
 
     # Create canvas with 4 channels (RGBA)
-    canvas = np.zeros((height, width, 4), dtype=np.uint8)
+    canvas = np.zeros((frame_size.height, frame_size.width, 4), dtype=np.uint8)
 
     # Calculate dimensions for resizing the cover to fit within the canvas while maintaining its aspect ratio
-    cover_width, cover_height = cover.shape[1], cover.shape[0]
-    canvas_width, canvas_height = width, height
-    resize_factor = min(canvas_width / cover_width, canvas_height / cover_height)
+    cover_size = dataclasses.Size(cover.shape[1], cover.shape[0])
+    resize_factor = min(frame_size.width / cover_size.width, frame_size.height / cover_size.height)
     resize_factor *= (7 / 10)
-    new_width = int(cover_width * resize_factor)
-    new_height = int(cover_height * resize_factor)
+    cover_size.width = int(cover_size.width * resize_factor)
+    cover_size.height = int(cover_size.height * resize_factor)
 
     # Calculate cover position to center it on the canvas
-    cover_pos = ((canvas_width - new_width) // 2, (canvas_height - new_height) // 2)
-    cover = cv2.resize(cover, (new_width, new_height))
+    cover_pos = ((frame_size.width - cover_size.width) // 2, (frame_size.height - cover_size.height) // 2)
+    cover = cv2.resize(cover, (cover_size.width, cover_size.height))
 
-    canvas[cover_pos[1]:cover_pos[1] + new_height, cover_pos[0]:cover_pos[0] + new_width] = cover
+    canvas[cover_pos[1]:cover_pos[1] + cover_size.height, cover_pos[0]:cover_pos[0] + cover_size.width] = cover
 
     # Load song / audio
     audio_clip = AudioFileClip(audio_path)
 
     # Add video background
     background = cv2.imread(image_path)
-    background = cv2.resize(background, (width, height))
+    background = cv2.resize(background, (frame_size.width, frame_size.height))
     background = cv2.GaussianBlur(background, (49, 49), 0)
     if background.shape[2] == 3:
         background = cv2.cvtColor(background, cv2.COLOR_BGR2BGRA)
     background_color_overlay = image_utils.get_bgra(background_color, background_opacity)
-    overlay = np.full((height, width, 4), background_color_overlay, dtype=np.uint8)
+    overlay = np.full((frame_size.height, frame_size.width, 4), background_color_overlay, dtype=np.uint8)
     alpha_overlay = overlay[:, :, 3] / 255.0
     alpha_background = background[:, :, 3] / 255.0
     for c in range(0, 3):
@@ -159,48 +200,21 @@ def create_music_video(
     tmp_background_image_path = tempfile.mktemp(suffix=".png")
     cv2.imwrite(tmp_background_image_path, background_bgr)
 
-    audio_visualizer_color_and_opacity = image_utils.get_rgba(audio_visualizer_color, audio_visualizer_opacity)
-
-    # Add audio visualizer
-    custom_drawing = None
-    if visualizer_drawing is not None and visualizer_drawing != "":
-        custom_drawing = cv2.imread(visualizer_drawing, cv2.IMREAD_UNCHANGED)
-        if custom_drawing.shape[2] == 3:
-            custom_drawing = cv2.cvtColor(custom_drawing, cv2.COLOR_BGR2RGBA)
-        else:
-            custom_drawing = cv2.cvtColor(custom_drawing, cv2.COLOR_BGRA2RGBA)
-
     if generate_audio_visualizer:
-        print("Generating audio visualizer...")
-        frequency_loudness, times = analyze_audio(audio_path, fps)
-        frame_cache = np.zeros((height, width, 4), dtype=np.uint8)
-
-        total_iterations = len(times)
-        start_time = time.time()
-        vis = visualizer.Visualizer(width=width, height=height, base_size=audio_visualizer_min_size,
-                                    max_size=audio_visualizer_max_size, color=audio_visualizer_color_and_opacity,
-                                    dot_count=(audio_visualizer_num_rows, audio_visualizer_num_columns))
-        vis.initialize_static_values()
-        temp_visualizer_images_dir = tempfile.mkdtemp()
-        os.makedirs(temp_visualizer_images_dir, exist_ok=True)
-        for i, time_point in enumerate(times):
-            if time_point > audio_clip.duration:
-                break
-            frame = frame_cache.copy()
-            vis.draw_visualizer(frame, frequency_loudness[i], custom_drawing=custom_drawing)
-            frame_np = np.array(frame)
-            frame_np = cv2.cvtColor(frame_np, cv2.COLOR_RGBA2BGRA)
-            frame_filename = f'{temp_visualizer_images_dir}/frame_{i:05d}.png'
-            cv2.imwrite(frame_filename, frame_np)
-
-            progress.print_progress_bar(i, total_iterations, start_time=start_time)
-        progress.print_progress_bar(total_iterations, total_iterations, end='\n', start_time=start_time)
+        temp_visualizer_images_dir = _audio_visualizer_generator(frame_size, audio_path, audio_clip.duration, fps,
+                                                                 dataclasses.RGBOpacity(audio_visualizer_color,
+                                                                                        audio_visualizer_opacity),
+                                                                 dataclasses.MinMax(audio_visualizer_min_size,
+                                                                                    audio_visualizer_max_size),
+                                                                 dataclasses.RowCol(audio_visualizer_num_rows,
+                                                                                    audio_visualizer_num_columns),
+                                                                 visualizer_drawing=visualizer_drawing)
 
     # Add text
     font_families = font_manager.get_fonts()
-    text_canvas = np.zeros((height, width, 4), dtype=np.uint8)
+    text_canvas = np.zeros((frame_size.height, frame_size.width, 4), dtype=np.uint8)
 
-    song_pos = (20, int(height * 0.925))
+    song_pos = (20, int(frame_size.height * 0.925))
     text_canvas, (_, song_height) = image_processing.add_text(text_canvas, song, song_pos,
                                                               font_families[song_font_type][song_font_style],
                                                               font_size=song_font_size,
@@ -381,8 +395,6 @@ def process(image_path: str, artist: str, song: str,
         return None
 
     font_families = font_manager.get_fonts()
-    aff = font_families[af_family][af_style]
-    sff = font_families[sf_family][sf_style]
 
     img = image_processing.read_image_from_disk(image_path)
 
@@ -390,7 +402,7 @@ def process(image_path: str, artist: str, song: str,
     top_center = (0, int(img.shape[0] * 0.13))
     bottom_center = (0, int(img.shape[0] * 0.87))
 
-    img, (_, _) = image_processing.add_text(img, artist, top_center, aff,
+    img, (_, _) = image_processing.add_text(img, artist, top_center, font_families[af_family][af_style],
                                             font_size=afs,
                                             font_color=image_utils.get_rgba(afc, afo),
                                             show_shadow=ase,
@@ -400,7 +412,7 @@ def process(image_path: str, artist: str, song: str,
                                             background_color=image_utils.get_rgba(abc, abo),
                                             x_center=True)
 
-    img, (_, _) = image_processing.add_text(img, song, bottom_center, sff, font_size=sfs,
+    img, (_, _) = image_processing.add_text(img, song, bottom_center, font_families[sf_family][sf_style], font_size=sfs,
                                             font_color=image_utils.get_rgba(sfc, sfo),
                                             max_width=15,
                                             show_shadow=sse, shadow_radius=ssr,
diff --git a/utils/dataclasses.py b/utils/dataclasses.py
index 655e7dd..77c3531 100644
--- a/utils/dataclasses.py
+++ b/utils/dataclasses.py
@@ -1,8 +1,9 @@
 """
 This module contains dataclasses and type aliases used in the project.
 """
-from typing import Union
+from typing import Union, Optional
 from dataclasses import dataclass
+import gradio as gr
 
 
 @dataclass
@@ -25,4 +26,83 @@ class Position:
     y: int
 
 
+@dataclass
+class Size:
+    """
+    A dataclass representing a size on a 2d plane.
+    """
+    width: int
+    height: int
+
+
+@dataclass
+class OpenAIGradioComponents:
+    """
+    A dataclass representing the components of the OpenAI API.
+    """
+    api_key: gr.Textbox
+    api_text_model: Optional[gr.Dropdown]
+    api_image_model: Optional[gr.Dropdown]
+
+
+@dataclass
+class RGBOpacity:
+    """
+    A dataclass representing an RGB color with an opacity value.
+    """
+    color: tuple[int, int, int]
+    opacity: int
+
+
+@dataclass
+class MinMax:
+    """
+    A dataclass representing a minimum and maximum value.
+    """
+    min: int
+    max: int
+
+
+@dataclass
+class RowCol:
+    """
+    A dataclass representing a row and column.
+    """
+    row: int
+    col: int
+
+
+@dataclass
+class FontGradioComponents:
+    """
+    A dataclass representing the components of the font editor.
+    """
+    family: gr.Dropdown
+    style: gr.Dropdown
+    color: gr.ColorPicker
+    opacity: gr.Slider
+    size: gr.Number
+
+
+@dataclass
+class FontDropShadowGradioComponents:
+    """
+    A dataclass representing the components of the drop shadow editor.
+    """
+    enabled: gr.Checkbox
+    color: gr.ColorPicker
+    opacity: gr.Slider
+    radius: gr.Number
+
+
+@dataclass
+class FontBackgroundGradioComponents:
+    """
+    A dataclass representing the components of the background editor.
+    """
+    enabled: gr.Checkbox
+    color: gr.ColorPicker
+    opacity: gr.Slider
+
+
 RGBColor = Union[str, tuple[int, int, int]]
diff --git a/utils/gradio.py b/utils/gradio.py
index f086830..54353ff 100644
--- a/utils/gradio.py
+++ b/utils/gradio.py
@@ -2,7 +2,7 @@
 This module contains utility functions for rendering widely-used Gradio components.
 """
 import gradio as gr
-from utils import font_manager
+from utils import font_manager, dataclasses
 
 
 def render_color_opacity_picker(default_name_label: str = "Font") -> tuple[gr.ColorPicker, gr.Slider]:
@@ -33,8 +33,7 @@ def bind_checkbox_to_visibility(checkbox: gr.Checkbox, group: gr.Group):
     )
 
 
-def render_font_picker(default_font_size: int = 55) \
-        -> tuple[gr.Dropdown, gr.Dropdown, gr.ColorPicker, gr.Slider, gr.Number]:
+def render_font_picker(default_font_size: int = 55) -> dataclasses.FontGradioComponents:
     """
     Renders a font picker with the appropriate styling.
     :param default_font_size: The default font size to use.
@@ -62,7 +61,7 @@ def update_font_styles(selected_font_family):
         font_color, font_opacity = render_color_opacity_picker()
         font_size = gr.Number(default_font_size, label="Font Size", interactive=True)
 
-    return font_family, font_style, font_color, font_opacity, font_size
+    return dataclasses.FontGradioComponents(font_family, font_style, font_color, font_opacity, font_size)
 
 
 def render_tool_description(description: str):
diff --git a/utils/visualizer.py b/utils/visualizer.py
index 88619ba..8e53285 100644
--- a/utils/visualizer.py
+++ b/utils/visualizer.py
@@ -4,6 +4,7 @@
 from typing import Dict, Optional
 import numpy as np
 import cv2
+from utils import dataclasses
 
 
 class Visualizer:
@@ -11,13 +12,11 @@ class Visualizer:
     This class is used to draw the visualizer on the canvas.
     Will be replaced with a more general solution in the future to allow for more customization.
     """
-    def __init__(self, base_size, max_size, color, dot_count, width, height):
-        self.base_size = base_size
-        self.max_size = max_size
+    def __init__(self, dot_size: dataclasses.MinMax, color, dot_count: dataclasses.RowCol, size: dataclasses.Size):
+        self.dot_size = dot_size
         self.color = color
         self.dot_count = dot_count
-        self.width = width
-        self.height = height
+        self.size = size
         self.cached_dot_positions = None
         self.cached_resized_drawing = {}
 
@@ -27,13 +26,13 @@ def initialize_static_values(self: "Visualizer") -> None:
         :return: None.
         """
         # Calculate and store dot positions
-        x_positions = (self.width / self.dot_count[0]) * np.arange(self.dot_count[0]) + (
-                self.width / self.dot_count[0] / 2)
-        y_positions = (self.height / self.dot_count[1]) * np.arange(self.dot_count[1]) + (
-                self.height / self.dot_count[1] / 2)
+        x_positions = (self.size.width / self.dot_count.col) * np.arange(self.dot_count.col) + (
+                self.size.width / self.dot_count.col / 2)
+        y_positions = (self.size.height / self.dot_count.row) * np.arange(self.dot_count.row) + (
+                self.size.height / self.dot_count.row / 2)
         grid_x, grid_y = np.meshgrid(x_positions, y_positions)
-        self.cached_dot_positions = [(grid_x[y, x], grid_y[y, x]) for x in range(self.dot_count[0]) for y in
-                                     range(self.dot_count[1])]
+        self.cached_dot_positions = [(grid_x[y, x], grid_y[y, x]) for x in range(self.dot_count.col) for y in
+                                     range(self.dot_count.row)]
 
     def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict[float, float],
                         custom_drawing: Optional[np.ndarray] = None) -> None:
@@ -44,15 +43,11 @@ def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict
         :param custom_drawing: A custom drawing to use instead of the default circle.
         :return: None.
         """
-        # Calculate and store dot positions
-        dot_count_x = self.dot_count[0]
-        dot_count_y = self.dot_count[1]
-
         # Precompute log frequencies
         freq_keys = np.array(list(frequency_data.keys()))
         start_freq = freq_keys[freq_keys > 0][0] if freq_keys[freq_keys > 0].size > 0 else 1.0
         end_freq = freq_keys[-1]
-        log_freqs = np.logspace(np.log10(start_freq), np.log10(end_freq), dot_count_x)
+        log_freqs = np.logspace(np.log10(start_freq), np.log10(end_freq), self.dot_count.col)
 
         # Find the maximum and minimum loudness values, ignoring -80 dB
         freq_bands = np.array([frequency_data[key] for key in freq_keys if key > 0])  # Ignore 0 Hz
@@ -62,9 +57,9 @@ def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict
 
         # Precompute loudness values
         loudness_values = {}
-        for x in range(dot_count_x):
+        for x in range(self.dot_count.col):
             lower_bound = log_freqs[x]
-            upper_bound = log_freqs[x + 1] if x < dot_count_x - 1 else end_freq + 1
+            upper_bound = log_freqs[x + 1] if x < self.dot_count.col - 1 else end_freq + 1
             band_freqs = [freq for freq in freq_keys if lower_bound <= freq < upper_bound]
             if not band_freqs:
                 closest_freq = min(freq_keys, key=lambda f, lb=lower_bound: abs(f - lb))
@@ -76,15 +71,15 @@ def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict
 
         cached_dot_sizes = {}
         for i, (pos_x, pos_y) in enumerate(self.cached_dot_positions):
-            column = i // dot_count_y  # Ensure the correct column is computed
+            column = i // self.dot_count.row  # Ensure the correct column is computed
 
             if column not in cached_dot_sizes:
                 avg_loudness = loudness_values[column]
                 # Scale the loudness to the dot size
                 scaled_loudness = (avg_loudness - min_loudness) / (
                         max_loudness - min_loudness) if max_loudness != min_loudness else 0
-                dot_size = self.base_size + scaled_loudness * (self.max_size - self.base_size)
-                dot_size = min(max(dot_size, self.base_size), self.max_size)
+                dot_size = self.dot_size.min + scaled_loudness * (self.dot_size.max - self.dot_size.min)
+                dot_size = min(max(dot_size, self.dot_size.min), self.dot_size.max)
 
                 cached_dot_sizes[column] = dot_size
             else:

From 04a622b36de81be339d76bd7aa060f2a2649b722 Mon Sep 17 00:00:00 2001
From: Fabian Gonzalez <fabiangonz98@gmail.com>
Date: Tue, 16 Jan 2024 19:42:27 -0500
Subject: [PATCH 13/13] 100% Linter

---
 .github/workflows/pylint.yml |   4 +-
 .pylintrc                    |   1 +
 processing/image.py          |  42 +++--
 processing/video.py          |   6 +-
 requirements.txt             |   4 +-
 ui/listicles/interface.py    |  68 +++++---
 ui/listicles/utils.py        |   1 +
 ui/music/interface.py        | 116 ++++++++-----
 ui/music/utils.py            | 317 +++++++++++++++++++----------------
 utils/dataclasses.py         |  68 +++++++-
 utils/gradio.py              |  11 +-
 utils/image.py               |  44 +++++
 utils/visualizer.py          | 131 ++++++++++-----
 13 files changed, 530 insertions(+), 283 deletions(-)

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index aa949d3..e6dde2a 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -1,11 +1,11 @@
 name: Python linter
 on: [push]
 jobs:
-  build:
+  lint:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.10", "3.11"]
+        python-version: ["3.11"]
     steps:
     - uses: actions/checkout@v3
     - name: Set up Python ${{ matrix.python-version }}
diff --git a/.pylintrc b/.pylintrc
index 2083c87..b390428 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,6 +1,7 @@
 [MAIN]
 max-line-length=120
 max-attributes=10
+max-locals=20
 # Currently, this is added because gradio Inputs don't support passing tuples/dataclasses/etc. as arguments, meaning I
 # can't shorten some methods that take a lot of arguments.
 disable=too-many-arguments
diff --git a/processing/image.py b/processing/image.py
index eb97bc1..2c35dfb 100644
--- a/processing/image.py
+++ b/processing/image.py
@@ -35,9 +35,7 @@ def render_image_output() -> (gr.Image, gr.Textbox, gr.Dropdown, gr.Button):
     return image_output, image_name, image_suffix, save_image_button
 
 
-def render_text_editor_parameters(name: str) -> (dataclasses.FontGradioComponents,
-                                                 dataclasses.FontDropShadowGradioComponents,
-                                                 dataclasses.FontBackgroundGradioComponents):
+def render_text_editor_parameters(name: str) -> dataclasses.FontDisplayGradioComponents:
     """
     Renders the text editor parameters.
     :param name: The name of the text editor parameters. This is used as the label for the accordion.
@@ -49,18 +47,21 @@ def render_text_editor_parameters(name: str) -> (dataclasses.FontGradioComponent
             with gr.Group():
                 drop_shadow_enabled = gr.Checkbox(False, label="Enable Drop Shadow", interactive=True)
                 with gr.Group(visible=drop_shadow_enabled.value) as additional_options:
-                    drop_shadow_color, drop_shadow_opacity = gru.render_color_opacity_picker()
+                    drop_shadow_color_opacity = gru.render_color_opacity_picker()
                     drop_shadow_radius = gr.Number(0, label="Shadow Radius")
                     gru.bind_checkbox_to_visibility(drop_shadow_enabled, additional_options)
             with gr.Group():
                 background_enabled = gr.Checkbox(False, label="Enable Background", interactive=True)
                 with gr.Group(visible=background_enabled.value) as additional_options:
-                    background_color, background_opacity = gru.render_color_opacity_picker()
+                    background_color_opacity = gru.render_color_opacity_picker()
                     gru.bind_checkbox_to_visibility(background_enabled, additional_options)
 
-    return (font_data, dataclasses.FontDropShadowGradioComponents(drop_shadow_enabled, drop_shadow_color,
-                                                                  drop_shadow_opacity, drop_shadow_radius),
-            dataclasses.FontBackgroundGradioComponents(background_enabled, background_color, background_opacity))
+    drop_shadow_data = dataclasses.FontDropShadowGradioComponents(drop_shadow_enabled, drop_shadow_color_opacity.color,
+                                                                  drop_shadow_color_opacity.opacity, drop_shadow_radius)
+    background_data = dataclasses.FontBackgroundGradioComponents(background_enabled, background_color_opacity.color,
+                                                                 background_color_opacity.opacity)
+
+    return dataclasses.FontDisplayGradioComponents(font_data, drop_shadow_data, background_data)
 
 
 def add_background(image_pil: Image, draw: ImageDraw, position: tuple[int, int], text: str, font: ImageFont,
@@ -216,7 +217,23 @@ def save_image_to_disk(image_path: str, name: Optional[str] = None, save_dir: st
     return save_dir
 
 
-# Function to add text to an image with custom font, size, and wrapping
+def _get_lines(text: str, max_width: Optional[int] = None) -> list[str]:
+    """
+    Gets the lines of text from a string.
+    :param text: The text to get the lines from.
+    :param max_width: The maximum width of the text before wrapping.
+    :return: A list of lines.
+    """
+    if max_width:  # Prepare for text wrapping if max_width is provided
+        wrapped_text = textwrap.fill(text, width=max_width)
+    else:
+        wrapped_text = text
+
+    return wrapped_text.split('\n')
+
+
+# A lot of the reported variables come from the parameters
+# pylint: disable=too-many-locals
 def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[int, int], font_path: str,
              font_size: int, font_color: Tuple[int, int, int, int] = (255, 255, 255, 255),
              shadow_color: Tuple[int, int, int, int] = (255, 255, 255, 255),
@@ -256,12 +273,7 @@ def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[i
     font = ImageFont.truetype(font_path, font_size)
     draw = ImageDraw.Draw(txt_layer)
 
-    if max_width:  # Prepare for text wrapping if max_width is provided
-        wrapped_text = textwrap.fill(text, width=max_width)
-    else:
-        wrapped_text = text
-
-    lines = wrapped_text.split('\n')
+    lines = _get_lines(text, max_width)
 
     y_offset = 0
     # max_line_width = 0  # Keep track of the widest line
diff --git a/processing/video.py b/processing/video.py
index 3d19e59..cbb4036 100644
--- a/processing/video.py
+++ b/processing/video.py
@@ -8,13 +8,13 @@
 from typing import Optional, Literal
 import gradio as gr
 from moviepy.editor import VideoFileClip
-from utils import path_handler
+from utils import path_handler, dataclasses
 
 VIDEO_FOLDER = "videos"
 default_path = os.path.join(path_handler.get_default_path(), VIDEO_FOLDER)
 
 
-def render_video_output() -> (gr.Video, gr.Textbox, gr.Dropdown, gr.Button):
+def render_video_output() -> dataclasses.VideoOutputGradioComponents:
     """
     Creates and returns a set of Gradio interface components for video output.
 
@@ -32,7 +32,7 @@ def render_video_output() -> (gr.Video, gr.Textbox, gr.Dropdown, gr.Button):
         video_suffix = gr.Dropdown([".mp4", ".mov"], value=".mp4", label="File Type", allow_custom_value=False)
     save_video_button = gr.Button("Save To Disk", variant="primary")
 
-    return video_output, video_name, video_suffix, save_video_button
+    return dataclasses.VideoOutputGradioComponents(video_output, video_name, video_suffix, save_video_button)
 
 
 def save_video_to_disk(video_path: str, name: Optional[str] = None, video_suffix: Literal[".mp4", ".mov"] = ".mp4",
diff --git a/requirements.txt b/requirements.txt
index 7922ce1..7614d6e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
 gradio~=4.12.0
 inflect~=7.0.0
 openai~=1.6.1
-numpy~=1.23.5
-Pillow~=8.4.0
+numpy~=1.26.3
+Pillow~=10.2.0
 opencv-python-headless~=4.8.1.78
 fonttools~=4.47.0
 moviepy~=1.0.3
diff --git a/ui/listicles/interface.py b/ui/listicles/interface.py
index bb15613..87d386a 100644
--- a/ui/listicles/interface.py
+++ b/ui/listicles/interface.py
@@ -81,11 +81,11 @@ def set_json(json_file):
         with gr.Column(scale=3):
             gr.Markdown("# Parameters")
             with gr.Row(equal_height=False):
-                name_font, name_shadow, name_background = image_processing.render_text_editor_parameters("Name")
-                desc_font, desc_shadow, desc_background = image_processing.render_text_editor_parameters("Description")
+                name_font_display = image_processing.render_text_editor_parameters("Name")
+                desc_font_display = image_processing.render_text_editor_parameters("Description")
             with gr.Row(equal_height=False):
-                asc_font, asc_shadow, asc_background = image_processing.render_text_editor_parameters("Association")
-                rate_font, rate_shadow, rate_background = image_processing.render_text_editor_parameters("Rating")
+                asc_font_display = image_processing.render_text_editor_parameters("Association")
+                rate_font_display = image_processing.render_text_editor_parameters("Rating")
 
         with gr.Column(scale=1):
             gr.Markdown("# Output")
@@ -99,26 +99,46 @@ def set_json(json_file):
     save_button.click(image_processing.save_images_to_disk, inputs=[output_preview, image_type],
                       outputs=[])
     process_button.click(listicle_utils.process, inputs=[input_batch_images, input_batch_json,
-                                                         name_font.family, name_font.style, name_font.size,
-                                                         name_font.color, name_font.opacity, name_shadow.enabled,
-                                                         name_shadow.color, name_shadow.opacity, name_shadow.radius,
-                                                         name_background.enabled, name_background.color,
-                                                         name_background.opacity,
-                                                         desc_font.family, desc_font.style, desc_font.size,
-                                                         desc_font.color, desc_font.opacity, desc_shadow.enabled,
-                                                         desc_shadow.color, desc_shadow.opacity, desc_shadow.radius,
-                                                         desc_background.enabled, desc_background.color,
-                                                         desc_background.opacity,
-                                                         asc_font.family, asc_font.style, asc_font.size,
-                                                         asc_font.color, asc_font.opacity, asc_shadow.enabled,
-                                                         asc_shadow.color, asc_shadow.opacity, asc_shadow.radius,
-                                                         asc_background.enabled, asc_background.color,
-                                                         asc_background.opacity,
-                                                         rate_font.family, rate_font.style, rate_font.size,
-                                                         rate_font.color, rate_font.opacity, rate_shadow.enabled,
-                                                         rate_shadow.color, rate_shadow.opacity, rate_shadow.radius,
-                                                         rate_background.enabled, rate_background.color,
-                                                         rate_background.opacity,
+                                                         name_font_display.font.family, name_font_display.font.style,
+                                                         name_font_display.font.size, name_font_display.font.color,
+                                                         name_font_display.font.opacity,
+                                                         name_font_display.drop_shadow.enabled,
+                                                         name_font_display.drop_shadow.color,
+                                                         name_font_display.drop_shadow.opacity,
+                                                         name_font_display.drop_shadow.radius,
+                                                         name_font_display.background.enabled,
+                                                         name_font_display.background.color,
+                                                         name_font_display.background.opacity,
+                                                         desc_font_display.font.family, desc_font_display.font.style,
+                                                         desc_font_display.font.size, desc_font_display.font.color,
+                                                         desc_font_display.font.opacity,
+                                                         desc_font_display.drop_shadow.enabled,
+                                                         desc_font_display.drop_shadow.color,
+                                                         desc_font_display.drop_shadow.opacity,
+                                                         desc_font_display.drop_shadow.radius,
+                                                         desc_font_display.background.enabled,
+                                                         desc_font_display.background.color,
+                                                         desc_font_display.background.opacity,
+                                                         asc_font_display.font.family, asc_font_display.font.style,
+                                                         asc_font_display.font.size, asc_font_display.font.color,
+                                                         asc_font_display.font.opacity,
+                                                         asc_font_display.drop_shadow.enabled,
+                                                         asc_font_display.drop_shadow.color,
+                                                         asc_font_display.drop_shadow.opacity,
+                                                         asc_font_display.drop_shadow.radius,
+                                                         asc_font_display.background.enabled,
+                                                         asc_font_display.background.color,
+                                                         asc_font_display.background.opacity,
+                                                         rate_font_display.font.family, rate_font_display.font.style,
+                                                         rate_font_display.font.size, rate_font_display.font.color,
+                                                         rate_font_display.font.opacity,
+                                                         rate_font_display.drop_shadow.enabled,
+                                                         rate_font_display.drop_shadow.color,
+                                                         rate_font_display.drop_shadow.opacity,
+                                                         rate_font_display.drop_shadow.radius,
+                                                         rate_font_display.background.enabled,
+                                                         rate_font_display.background.color,
+                                                         rate_font_display.background.opacity,
                                                          ], outputs=[output_preview])
 
     return input_batch_images, input_batch_json
diff --git a/ui/listicles/utils.py b/ui/listicles/utils.py
index 747c6c4..01a7898 100644
--- a/ui/listicles/utils.py
+++ b/ui/listicles/utils.py
@@ -11,6 +11,7 @@
 import api.chatgpt as chatgpt_api
 
 
+# pylint: disable=too-many-locals
 def process(image_files: list[Any], json_data: str,
             nf_family: str, nf_style: str, nfs: int, nfc: dataclasses.RGBColor, nfo: int, nse: bool,
             nsc: dataclasses.RGBColor, nso: int, nsr, nbe: bool, nbc: dataclasses.RGBColor, nbo: int,
diff --git a/ui/music/interface.py b/ui/music/interface.py
index 8808022..c6e199b 100644
--- a/ui/music/interface.py
+++ b/ui/music/interface.py
@@ -7,6 +7,7 @@
 import processing.video as video_processing
 import processing.image as image_processing
 import ui.components.openai as openai_components
+from utils import dataclasses
 
 
 def render_music_section() -> None:
@@ -71,7 +72,7 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image):
         display component for displaying the cover image before processing, and an image display component for
         displaying the cover image after processing.
     """
-    with (gr.Column()):
+    with gr.Column():
         gr.Markdown("## Input")
         with gr.Group():
             input_image = gr.Image(sources=["upload"], label="Cover Image (png)", type="filepath",
@@ -81,13 +82,11 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image):
         with gr.Row(equal_height=False):
             with gr.Group():
                 artist_name = gr.Textbox(label="Artist Name", lines=1, max_lines=1, scale=1)
-                artist_font, artist_shadow, artist_background = image_processing.render_text_editor_parameters(
-                    "Artist Text Parameters")
+                artist_font_display = image_processing.render_text_editor_parameters("Artist Text Parameters")
 
             with gr.Group():
                 song_name = gr.Textbox(label="Song Title", lines=1, max_lines=1, scale=2)
-                song_font, song_shadow, song_background = \
-                    image_processing.render_text_editor_parameters("Song Text Parameters")
+                song_font_display = image_processing.render_text_editor_parameters("Song Text Parameters")
 
         process_button = gr.Button("Process", variant="primary")
 
@@ -98,14 +97,20 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image):
                 send_to_create_video_button = gr.Button("Send Image to 'Create Music Video'", variant="secondary")
 
     process_button.click(process, inputs=[input_image, artist_name, song_name,
-                                          artist_font.family, artist_font.style, artist_font.size, artist_font.color,
-                                          artist_font.opacity, artist_shadow.enabled, artist_shadow.color,
-                                          artist_shadow.opacity, artist_shadow.radius, artist_background.enabled,
-                                          artist_background.color, artist_background.opacity, song_font.family,
-                                          song_font.style, song_font.size, song_font.color, song_font.opacity,
-                                          song_shadow.enabled, song_shadow.color, song_shadow.opacity,
-                                          song_shadow.radius, song_background.enabled, song_background.color,
-                                          song_background.opacity],
+                                          artist_font_display.font.family, artist_font_display.font.style,
+                                          artist_font_display.font.size, artist_font_display.font.color,
+                                          artist_font_display.font.opacity, artist_font_display.drop_shadow.enabled,
+                                          artist_font_display.drop_shadow.color,
+                                          artist_font_display.drop_shadow.opacity,
+                                          artist_font_display.drop_shadow.radius,
+                                          artist_font_display.background.enabled,
+                                          artist_font_display.background.color, artist_font_display.background.opacity,
+                                          song_font_display.font.family, song_font_display.font.style,
+                                          song_font_display.font.size, song_font_display.font.color,
+                                          song_font_display.font.opacity, song_font_display.drop_shadow.enabled,
+                                          song_font_display.drop_shadow.color, song_font_display.drop_shadow.opacity,
+                                          song_font_display.drop_shadow.radius, song_font_display.background.enabled,
+                                          song_font_display.background.color, song_font_display.background.opacity],
                          outputs=[image_output])
     save_image_button.click(image_processing.save_image_to_disk,
                             inputs=[image_output, image_name, image_suffix], outputs=[])
@@ -124,14 +129,13 @@ def render_music_video_creation() -> gr.Image:
                                show_share_button=False, show_download_button=False, scale=2, image_mode="RGBA")
         audio_filepath = gr.File(label="Audio", file_types=["audio"], scale=1, height=100)
     with gr.Column():
-        background_color, background_opacity = gru.render_color_opacity_picker(default_name_label="Background")
+        background_color_opacity = gru.render_color_opacity_picker(default_name_label="Background")
         with gr.Group():
             artist_name = gr.Textbox(label="Artist Name", lines=1, max_lines=1, scale=1)
-            artist_font, artist_shadow, artist_background = \
-                image_processing.render_text_editor_parameters("Text Parameters")
+            artist_font_display = image_processing.render_text_editor_parameters("Text Parameters")
         with gr.Group():
             song_title = gr.Textbox(label="Song Title", lines=1, max_lines=1, scale=2)
-            song_font, song_shadow, song_background = image_processing.render_text_editor_parameters("Text Parameters")
+            song_font_display = image_processing.render_text_editor_parameters("Text Parameters")
         with gr.Column():
             # Defaulting to 1. It's a still image, but may expand by adding some effects (grain, and not sure what else)
             fps = gr.Number(value=1, label="FPS", minimum=1, maximum=144)
@@ -140,48 +144,70 @@ def render_music_video_creation() -> gr.Image:
                 generate_audio_visualizer_button = gr.Checkbox(value=False, label="Generate Audio Visualizer",
                                                                interactive=True)
                 with gr.Group() as audio_visualizer_group:
-                    audio_visualizer_color, audio_visualizer_opacity = \
-                        gru.render_color_opacity_picker("Audio Visualizer")
+                    audio_visualizer_color_opacity = gru.render_color_opacity_picker("Audio Visualizer")
                     with gr.Group():
                         with gr.Row():
-                            audio_visualizer_num_rows = gr.Number(value=90, label="Number of Rows",
-                                                                  minimum=1, maximum=100)
-                            audio_visualizer_num_columns = gr.Number(value=65, label="Number of Columns",
-                                                                     minimum=1, maximum=100)
+                            audio_visualizer_amount = dataclasses.RowColGradioComponents(
+                                row=gr.Number(value=90, label="Number of Rows", minimum=1,
+                                                                  maximum=100),
+                                col=gr.Number(value=65, label="Number of Columns", minimum=1,
+                                                                     maximum=100)
+                            )
                         with gr.Row():
-                            audio_visualizer_min_size = gr.Number(value=1, label="Minimum Size", minimum=1, maximum=100)
-                            audio_visualizer_max_size = gr.Number(value=7, label="Maximum Size", minimum=1, maximum=200)
-                    # Must be a PNG file to support transparency. The idea for this is more-so to have shapes that can
-                    # be rendered for the visualizer, and ideally they have transparent backgrounds, so using RGBA.
+                            audio_visualizer_dot_size = dataclasses.MinMaxGradioComponents(
+                                min=gr.Number(value=1, label="Minimum Size", minimum=1, maximum=100),
+                                max=gr.Number(value=7, label="Maximum Size", minimum=1, maximum=200)
+                            )
                     audio_visualizer_drawing = gr.Image(label="Visualizer Drawing (png)", type="filepath",
                                                         sources=["upload"], show_share_button=False,
                                                         show_download_button=False, scale=2, height=150,
                                                         image_mode="RGBA")
+                    visualizer_overlay_checkbox = gr.Checkbox(value=False, label="Overlay Visualizer on One-Another",
+                                                              info="If checked, alpha-blending will be applied, which "
+                                                                   "is noticeable on larger pngs where each drawing "
+                                                                   "overlaps. This is only important for transparent"
+                                                                   "images and is very slow. If the image is not "
+                                                                   "transparent, leave this unchecked.")
             gru.bind_checkbox_to_visibility(generate_audio_visualizer_button, audio_visualizer_group)
 
     create_video_button = gr.Button("Create Music Video", variant="primary")
 
     gr.Markdown("## Output")
     with gr.Group():
-        video_output, video_name, video_suffix, save_video_button = video_processing.render_video_output()
+        video_data = video_processing.render_video_output()
 
     create_video_button.click(create_music_video, inputs=[cover_image, audio_filepath, fps, artist_name,
-                                                          artist_font.family, artist_font.style, artist_font.size,
-                                                          artist_font.color, artist_font.opacity, artist_shadow.enabled,
-                                                          artist_shadow.color, artist_shadow.opacity,
-                                                          artist_shadow.radius, artist_background.enabled,
-                                                          artist_background.color, artist_background.opacity,
-                                                          song_title, song_font.family, song_font.style, song_font.size,
-                                                          song_font.color, song_font.opacity, song_shadow.enabled,
-                                                          song_shadow.color, song_shadow.opacity, song_shadow.radius,
-                                                          song_background.enabled, song_background.color,
-                                                          song_background.opacity, background_color, background_opacity,
-                                                          generate_audio_visualizer_button, audio_visualizer_color,
-                                                          audio_visualizer_opacity, audio_visualizer_drawing,
-                                                          audio_visualizer_num_rows, audio_visualizer_num_columns,
-                                                          audio_visualizer_min_size, audio_visualizer_max_size],
-                              outputs=[video_output])
-    save_video_button.click(video_processing.save_video_to_disk,
-                            inputs=[video_output, video_name, video_suffix], outputs=[])
+                                                          artist_font_display.font.family,
+                                                          artist_font_display.font.style, artist_font_display.font.size,
+                                                          artist_font_display.font.color,
+                                                          artist_font_display.font.opacity,
+                                                          artist_font_display.drop_shadow.enabled,
+                                                          artist_font_display.drop_shadow.color,
+                                                          artist_font_display.drop_shadow.opacity,
+                                                          artist_font_display.drop_shadow.radius,
+                                                          artist_font_display.background.enabled,
+                                                          artist_font_display.background.color,
+                                                          artist_font_display.background.opacity,
+                                                          song_title, song_font_display.font.family,
+                                                          song_font_display.font.style, song_font_display.font.size,
+                                                          song_font_display.font.color, song_font_display.font.opacity,
+                                                          song_font_display.drop_shadow.enabled,
+                                                          song_font_display.drop_shadow.color,
+                                                          song_font_display.drop_shadow.opacity,
+                                                          song_font_display.drop_shadow.radius,
+                                                          song_font_display.background.enabled,
+                                                          song_font_display.background.color,
+                                                          song_font_display.background.opacity,
+                                                          background_color_opacity.color,
+                                                          background_color_opacity.opacity,
+                                                          generate_audio_visualizer_button,
+                                                          audio_visualizer_color_opacity.color,
+                                                          audio_visualizer_color_opacity.opacity,
+                                                          audio_visualizer_drawing, visualizer_overlay_checkbox,
+                                                          audio_visualizer_amount.row, audio_visualizer_amount.col,
+                                                          audio_visualizer_dot_size.min, audio_visualizer_dot_size.max],
+                              outputs=[video_data.video])
+    video_data.save.click(video_processing.save_video_to_disk, inputs=[video_data.video, video_data.name,
+                                                                       video_data.suffix], outputs=[])
 
     return cover_image
diff --git a/ui/music/utils.py b/ui/music/utils.py
index 10726b9..1ffdb90 100644
--- a/ui/music/utils.py
+++ b/ui/music/utils.py
@@ -7,13 +7,14 @@
 import time
 import tempfile
 from typing import List, Dict, Optional
+from dataclasses import dataclass
 import cv2
 from moviepy.editor import AudioFileClip
 import numpy as np
 import librosa
 from api import chatgpt as chatgpt_api
 from processing import image as image_processing
-from utils import progress, visualizer, font_manager, image as image_utils, dataclasses
+from utils import progress, visualizer, font_manager, image as image_utils, dataclasses as local_dataclasses
 
 
 def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]], np.ndarray):
@@ -45,16 +46,25 @@ def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]]
     return downsampled_frequency_loudness, downsampled_times
 
 
-def _audio_visualizer_generator(frame_size: dataclasses.Size, audio_path: str, audio_length: int, fps: int,
-                                audio_visualizer: dataclasses.RGBOpacity, dot_size: dataclasses.MinMax,
-                                dot_count: dataclasses.RowCol, visualizer_drawing: Optional[str] = None) -> str:
-    print("Generating audio visualizer...")
+@dataclass
+class AudioVisualizerDotData:
+    """
+    A dataclass representing the data for the audio visualizer's dots.
+    """
+    size: local_dataclasses.MinMax
+    count: local_dataclasses.RowCol
+    color: local_dataclasses.RGBColor
+    opacity: int
+    visualizer_drawing: Optional[str] = None
+    visualizer_drawing_overlap: bool = False
 
-    audio_visualizer_color_and_opacity = image_utils.get_rgba(audio_visualizer.color, audio_visualizer.opacity)
 
+def _audio_visualizer_generator(frame_size: local_dataclasses.Size, audio_path: str, audio_length: int, fps: int,
+                                dot_data: AudioVisualizerDotData) -> str:
+    print("Generating audio visualizer...")
     custom_drawing = None
-    if visualizer_drawing is not None and visualizer_drawing != "":
-        custom_drawing = cv2.imread(visualizer_drawing, cv2.IMREAD_UNCHANGED)
+    if dot_data.visualizer_drawing is not None and dot_data.visualizer_drawing != "":
+        custom_drawing = cv2.imread(dot_data.visualizer_drawing, cv2.IMREAD_UNCHANGED)
         if custom_drawing.shape[2] == 3:
             custom_drawing = cv2.cvtColor(custom_drawing, cv2.COLOR_BGR2RGBA)
         else:
@@ -65,9 +75,9 @@ def _audio_visualizer_generator(frame_size: dataclasses.Size, audio_path: str, a
 
     total_iterations = len(times)
     start_time = time.time()
-    vis = visualizer.Visualizer(size=dataclasses.Size(frame_size.width, frame_size.height),
-                                dot_size=dot_size, color=audio_visualizer_color_and_opacity,
-                                dot_count=dataclasses.RowCol(dot_count.row, dot_count.col))
+    vis = visualizer.Visualizer(size=local_dataclasses.Size(frame_size.width, frame_size.height),
+                                dot_size=dot_data.size, color=image_utils.get_rgba(dot_data.color, dot_data.opacity),
+                                dot_count=local_dataclasses.RowCol(dot_data.count.row, dot_data.count.col))
     vis.initialize_static_values()
     temp_visualizer_images_dir = tempfile.mkdtemp()
     os.makedirs(temp_visualizer_images_dir, exist_ok=True)
@@ -75,7 +85,8 @@ def _audio_visualizer_generator(frame_size: dataclasses.Size, audio_path: str, a
         if time_point > audio_length:
             break
         frame = frame_cache.copy()
-        vis.draw_visualizer(frame, frequency_loudness[i], custom_drawing=custom_drawing)
+        vis.draw_visualizer(frame, frequency_loudness[i], custom_drawing=custom_drawing,
+                            custom_drawing_overlap=dot_data.visualizer_drawing_overlap)
         frame_np = np.array(frame)
         frame_np = cv2.cvtColor(frame_np, cv2.COLOR_RGBA2BGRA)
         frame_filename = f'{temp_visualizer_images_dir}/frame_{i:05d}.png'
@@ -87,23 +98,133 @@ def _audio_visualizer_generator(frame_size: dataclasses.Size, audio_path: str, a
     return temp_visualizer_images_dir
 
 
-def create_music_video(
+def _get_video_background(image_path: str, frame_size: local_dataclasses.Size,
+                          background_overlay_color_opacity: local_dataclasses.RGBOpacity) -> np.ndarray:
+    """
+    Gets the background for the video, which is a gaussian blurred version of the cover image stretched with a color
+    overlay.
+    :param image_path: The path to the image to use background.
+    :param frame_size: The size of the frame to use for the background.
+    :param background_overlay_color_opacity: The color and opacity to use for the background overlay.
+    :return:
+    """
+    background = cv2.imread(image_path)
+    background = cv2.resize(background, (frame_size.width, frame_size.height))
+    background = cv2.GaussianBlur(background, (49, 49), 0)
+    if background.shape[2] == 3:
+        background = cv2.cvtColor(background, cv2.COLOR_BGR2BGRA)
+    overlay = np.full((frame_size.height, frame_size.width, 4),
+                      image_utils.get_bgra(background_overlay_color_opacity.color,
+                                           background_overlay_color_opacity.opacity),
+                      dtype=np.uint8)
+    alpha_overlay = overlay[:, :, 3] / 255.0
+    alpha_background = background[:, :, 3] / 255.0
+    for c in range(0, 3):
+        background[:, :, c] = (alpha_overlay * overlay[:, :, c] +
+                               alpha_background * (1 - alpha_overlay) * background[:, :, c])
+    background[:, :, 3] = (alpha_overlay + alpha_background * (1 - alpha_overlay)) * 255
+    return background
+
+
+def _generate_final_video(background_image_path: str, visualizer_frames_dir: Optional[str], cover_image_path: str,
+                          audio_path: str, fps: int) -> str:
+    """
+    Generates the final video using the given parameters with ffmpeg.
+    :param background_image_path: The path to the background image to use for the video.
+    :param visualizer_frames_dir: The path to the directory containing the audio visualizer frames.
+    :param cover_image_path: The path to the cover image to use for the video.
+    :param audio_path: The path to the audio file to use for the video.
+    :param fps: The frames per second to use for the video.
+    :return:
+    """
+    temp_final_video_path = tempfile.mktemp(suffix=".mp4")
+
+    audio_clip = AudioFileClip(audio_path)
+    ffmpeg_commands = [
+        "ffmpeg", "-y",
+        "-loop", "1",
+        "-i", background_image_path,
+    ]
+
+    if visualizer_frames_dir is not None:
+        ffmpeg_commands.extend([
+            "-framerate", str(fps),
+            "-i", f'{visualizer_frames_dir}/frame_%05d.png',
+        ])
+        filter_complex = "[0][1]overlay=format=auto[bg];[bg][2]overlay=format=auto"
+    else:
+        filter_complex = "[0][1]overlay=format=auto"
+
+    ffmpeg_commands.extend([
+        "-framerate", str(fps),
+        "-i", cover_image_path,
+        "-i", audio_path,
+        "-filter_complex", filter_complex,
+        "-map", "3:a" if visualizer_frames_dir is not None else "2:a",
+        "-c:v", "libx264",
+        "-c:a", "aac",
+        "-strict", "experimental",
+        "-t", str(audio_clip.duration),
+        "-hide_banner",
+        "-framerate", str(fps),
+        '-pix_fmt', 'yuv420p',
+        temp_final_video_path
+    ])
+    print("Generating final video...")
+
+    duration_regex = re.compile(r"Duration: (\d\d):(\d\d):(\d\d)\.\d\d")
+    time_regex = re.compile(r"time=(\d\d):(\d\d):(\d\d)\.\d\d")
+
+    ffmpeg_start_time = time.time()
+    with subprocess.Popen(ffmpeg_commands, stderr=subprocess.PIPE, text=True) as ffmpeg_process:
+        for line in ffmpeg_process.stderr:
+            # Extract total duration of the video
+            duration_match = duration_regex.search(line)
+            if duration_match:
+                duration_match_groups = duration_match.groups()
+                curr_duration = local_dataclasses.Time(
+                    hours=int(duration_match_groups[0]),
+                    minutes=int(duration_match_groups[1]),
+                    seconds=int(duration_match_groups[2])
+                )
+
+            # Extract current time of encoding
+            time_match = time_regex.search(line)
+            if time_match and int(curr_duration) > 0:
+                time_match_groups = time_match.groups()
+                curr_time = local_dataclasses.Time(
+                    hours=int(time_match_groups[0]),
+                    minutes=int(time_match_groups[1]),
+                    seconds=int(time_match_groups[2])
+                )
+                progress.print_progress_bar(int(curr_time), int(curr_duration), start_time=ffmpeg_start_time)
+
+    progress.print_progress_bar(100, 100, end='\n', start_time=ffmpeg_start_time)
+
+    return temp_final_video_path
+
+
+def create_music_video(  # pylint: disable=too-many-locals
         image_path: str, audio_path: str, fps: int,
         artist: str, artist_font_type: str, artist_font_style: str, artist_font_size: int,
-        artist_font_color: dataclasses.RGBColor, artist_font_opacity: int, artist_shadow_enabled: bool,
-        artist_shadow_color: dataclasses.RGBColor, artist_shadow_opacity: int, artist_shadow_radius: int,
-        artist_background_enabled: bool, artist_background_color: dataclasses.RGBColor, artist_background_opacity: int,
-        song: str, song_font_type: str, song_font_style: str, song_font_size: int,
-        song_font_color: dataclasses.RGBColor, song_font_opacity: int, song_shadow_enabled: bool,
-        song_shadow_color: dataclasses.RGBColor, song_shadow_opacity: int, song_shadow_radius: int,
-        song_background_enabled: bool, song_background_color: dataclasses.RGBColor, song_background_opacity: int,
-        background_color: dataclasses.RGBColor = (0, 0, 0), background_opacity: int = 66,
-        generate_audio_visualizer: bool = False, audio_visualizer_color: dataclasses.RGBColor = (255, 255, 255),
+        artist_font_color: local_dataclasses.RGBColor, artist_font_opacity: int, artist_shadow_enabled: bool,
+        artist_shadow_color: local_dataclasses.RGBColor, artist_shadow_opacity: int, artist_shadow_radius: int,
+        artist_background_enabled: bool, artist_background_color: local_dataclasses.RGBColor,
+        artist_background_opacity: int, song: str, song_font_type: str, song_font_style: str, song_font_size: int,
+        song_font_color: local_dataclasses.RGBColor, song_font_opacity: int, song_shadow_enabled: bool,
+        song_shadow_color: local_dataclasses.RGBColor, song_shadow_opacity: int, song_shadow_radius: int,
+        song_background_enabled: bool, song_background_color: local_dataclasses.RGBColor, song_background_opacity: int,
+        background_color: local_dataclasses.RGBColor = (0, 0, 0), background_opacity: int = 66,
+        generate_audio_visualizer: bool = False, audio_visualizer_color: local_dataclasses.RGBColor = (255, 255, 255),
         audio_visualizer_opacity: int = 100, visualizer_drawing: Optional[str] = None,
-        audio_visualizer_num_rows: int = 90, audio_visualizer_num_columns: int = 65, audio_visualizer_min_size: int = 1,
+        visualizer_drawing_overlap: bool = False, audio_visualizer_num_rows: int = 90,
+        audio_visualizer_num_columns: int = 65, audio_visualizer_min_size: int = 1,
         audio_visualizer_max_size: int = 7) -> Optional[str]:
     """
     Creates a music video using the given parameters.
+    :param visualizer_drawing_overlap: Whether to overlap the visualizer drawings onto one-another with alpha-blending.
+      This is only noticeable on images with transparency and is a slow process, so if your visualizer drawings are
+      not transparent, it is recommended to set this to False.
     :param image_path: The path to the image to use as the cover + background for the video.
     :param audio_path: The path to the audio file to use for the video.
     :param fps: The frames per second to use for the video.
@@ -145,29 +266,22 @@ def create_music_video(
     :param audio_visualizer_max_size: The maximum size to use for the audio visualizer's drawings (peak loudness).
     :return: The path to the generated video, or None if there was an error.
     """
-    if image_path is None:
-        print("No cover image for the video.")
-        return None
-    if audio_path is None:
-        print("No audio to add to the video.")
+    if image_path is None or audio_path is None:
+        print("No cover image and/or audio for the video.")
         return None
 
     # Could probably expand to 4k, but unnecessary for this type of music video
     # Maybe in a future iteration it could be worth it
-    frame_size = dataclasses.Size(1920, 1080)
+    frame_size = local_dataclasses.Size(1920, 1080)
 
     # Set up cover
-    cover = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
-    if cover.shape[2] == 3:
-        cover = cv2.cvtColor(cover, cv2.COLOR_BGR2RGBA)
-    else:
-        cover = cv2.cvtColor(cover, cv2.COLOR_BGRA2RGBA)
+    cover = image_utils.open_image_as_rgba(image_path)
 
     # Create canvas with 4 channels (RGBA)
     canvas = np.zeros((frame_size.height, frame_size.width, 4), dtype=np.uint8)
 
     # Calculate dimensions for resizing the cover to fit within the canvas while maintaining its aspect ratio
-    cover_size = dataclasses.Size(cover.shape[1], cover.shape[0])
+    cover_size = local_dataclasses.Size(cover.shape[1], cover.shape[0])
     resize_factor = min(frame_size.width / cover_size.width, frame_size.height / cover_size.height)
     resize_factor *= (7 / 10)
     cover_size.width = int(cover_size.width * resize_factor)
@@ -183,32 +297,28 @@ def create_music_video(
     audio_clip = AudioFileClip(audio_path)
 
     # Add video background
-    background = cv2.imread(image_path)
-    background = cv2.resize(background, (frame_size.width, frame_size.height))
-    background = cv2.GaussianBlur(background, (49, 49), 0)
-    if background.shape[2] == 3:
-        background = cv2.cvtColor(background, cv2.COLOR_BGR2BGRA)
-    background_color_overlay = image_utils.get_bgra(background_color, background_opacity)
-    overlay = np.full((frame_size.height, frame_size.width, 4), background_color_overlay, dtype=np.uint8)
-    alpha_overlay = overlay[:, :, 3] / 255.0
-    alpha_background = background[:, :, 3] / 255.0
-    for c in range(0, 3):
-        background[:, :, c] = (alpha_overlay * overlay[:, :, c] +
-                               alpha_background * (1 - alpha_overlay) * background[:, :, c])
-    background[:, :, 3] = (alpha_overlay + alpha_background * (1 - alpha_overlay)) * 255
+    background = _get_video_background(image_path, frame_size,
+                                       local_dataclasses.RGBOpacity(background_color, background_opacity))
     background_bgr = cv2.cvtColor(background, cv2.COLOR_BGRA2BGR)
     tmp_background_image_path = tempfile.mktemp(suffix=".png")
     cv2.imwrite(tmp_background_image_path, background_bgr)
 
+    temp_visualizer_images_dir = None
     if generate_audio_visualizer:
         temp_visualizer_images_dir = _audio_visualizer_generator(frame_size, audio_path, audio_clip.duration, fps,
-                                                                 dataclasses.RGBOpacity(audio_visualizer_color,
-                                                                                        audio_visualizer_opacity),
-                                                                 dataclasses.MinMax(audio_visualizer_min_size,
-                                                                                    audio_visualizer_max_size),
-                                                                 dataclasses.RowCol(audio_visualizer_num_rows,
-                                                                                    audio_visualizer_num_columns),
-                                                                 visualizer_drawing=visualizer_drawing)
+                                                                 AudioVisualizerDotData(
+                                                                     size=local_dataclasses.MinMax(
+                                                                         audio_visualizer_min_size,
+                                                                         audio_visualizer_max_size),
+                                                                     color=audio_visualizer_color,
+                                                                     opacity=audio_visualizer_opacity,
+                                                                     count=local_dataclasses.RowCol(
+                                                                         audio_visualizer_num_rows,
+                                                                         audio_visualizer_num_columns),
+                                                                     visualizer_drawing=visualizer_drawing,
+                                                                     visualizer_drawing_overlap=\
+                                                                         visualizer_drawing_overlap)
+                                                                 )
 
     # Add text
     font_families = font_manager.get_fonts()
@@ -240,96 +350,19 @@ def create_music_video(
                                                                                       artist_shadow_opacity),
                                                     show_background=artist_background_enabled,
                                                     background_color=image_utils.get_rgba(
-                                                                    artist_background_color, artist_background_opacity))
-
-    text_np = np.array(text_canvas)
-    np_canvas = np.array(canvas)
-    # Normalize the alpha channels
-    alpha_text = text_np[:, :, 3] / 255.0
-    alpha_canvas = np_canvas[:, :, 3] / 255.0
-    alpha_final = alpha_text + alpha_canvas * (1 - alpha_text)
-
-    canvas_final = np.zeros_like(np_canvas)
-    # alpha blend
-    for c in range(3): # Loop over color (non-alpha) channels
-        canvas_final[:, :, c] = (alpha_text * text_np[:, :, c] + alpha_canvas * (1 - alpha_text) *
-                                 np_canvas[:, :, c]) / alpha_final
-    canvas_final[:, :, 3] = alpha_final * 255
-    canvas_final[:, :, :3][alpha_final == 0] = 0
+                                                        artist_background_color, artist_background_opacity))
 
+    canvas_final = image_utils.blend_alphas(np.array(text_canvas), np.array(canvas))
     temp_canvas_image_path = tempfile.mktemp(suffix=".png")
     # Convert to BGR for OpenCV
     canvas_final = cv2.cvtColor(canvas_final, cv2.COLOR_RGBA2BGRA)
     cv2.imwrite(temp_canvas_image_path, canvas_final)
 
-    temp_final_video_path = tempfile.mktemp(suffix=".mp4")
-
-    # set up the background video commands
-    ffmpeg_commands = [
-        "ffmpeg", "-y",
-        "-loop", "1",
-        "-i", tmp_background_image_path,
-    ]
+    temp_final_video_path = _generate_final_video(tmp_background_image_path, temp_visualizer_images_dir,
+                                                  temp_canvas_image_path, audio_path, fps)
 
-    if generate_audio_visualizer:
-        ffmpeg_commands.extend([
-            "-framerate", str(fps),
-            "-i", f'{temp_visualizer_images_dir}/frame_%05d.png',
-        ])
-        filter_complex = "[0][1]overlay=format=auto[bg];[bg][2]overlay=format=auto"
-        audio_input_map = "3:a"
-    else:
-        filter_complex = "[0][1]overlay=format=auto"
-        audio_input_map = "2:a"
-
-    ffmpeg_commands.extend([
-        "-framerate", str(fps),
-        "-i", temp_canvas_image_path,
-        "-i", audio_path,
-        "-filter_complex", filter_complex,
-        "-map", audio_input_map,
-        "-c:v", "libx264",
-        "-c:a", "aac",
-        "-strict", "experimental",
-        "-t", str(audio_clip.duration),
-        "-hide_banner",
-        "-framerate", str(fps),
-        '-pix_fmt', 'yuv420p',
-        temp_final_video_path
-    ])
-    print("Generating final video...")
-    ffmpeg_process = subprocess.Popen(ffmpeg_commands, stderr=subprocess.PIPE, text=True)
-
-    duration_regex = re.compile(r"Duration: (\d\d):(\d\d):(\d\d)\.\d\d")
-    time_regex = re.compile(r"time=(\d\d):(\d\d):(\d\d)\.\d\d")
-    total_duration_in_seconds = 0
-
-    ffmpeg_start_time = time.time()
-    while True:
-        line = ffmpeg_process.stderr.readline()
-        if not line:
-            break
-
-        # Extract total duration of the video
-        duration_match = duration_regex.search(line)
-        if duration_match:
-            hours, minutes, seconds = map(int, duration_match.groups())
-            total_duration_in_seconds = hours * 3600 + minutes * 60 + seconds
-
-        # Extract current time of encoding
-        time_match = time_regex.search(line)
-        if time_match and total_duration_in_seconds > 0:
-            hours, minutes, seconds = map(int, time_match.groups())
-            current_time = hours * 3600 + minutes * 60 + seconds
-            progress.print_progress_bar(current_time, total_duration_in_seconds, start_time=ffmpeg_start_time)
-
-    ffmpeg_process.wait()
-    if ffmpeg_process.returncode != 0:
-        raise subprocess.CalledProcessError(ffmpeg_process.returncode, ffmpeg_commands)
-    progress.print_progress_bar(100, 100, end='\n', start_time=ffmpeg_start_time)
-    print("Done generating final video!\n")
     # clean up the original frames
-    if generate_audio_visualizer:
+    if temp_visualizer_images_dir is not None:
         for file in os.listdir(temp_visualizer_images_dir):
             os.remove(os.path.join(temp_visualizer_images_dir, file))
         os.rmdir(temp_visualizer_images_dir)
@@ -353,11 +386,13 @@ def generate_cover_image(api_key: str, api_model: str, prompt: str) -> Optional[
     return chatgpt_api.url_to_gradio_image_name(image_url)
 
 
+# pylint: disable=too-many-locals
 def process(image_path: str, artist: str, song: str,
-            af_family: str, af_style: str, afs: int, afc: dataclasses.RGBColor, afo: int, ase: bool,
-            asc: dataclasses.RGBColor, aso: int, asr: Optional[int], abe: bool, abc: dataclasses.RGBColor, abo: int,
-            sf_family: str, sf_style: str, sfs: int, sfc: dataclasses.RGBColor, sfo: int, sse: bool,
-            ssc: dataclasses.RGBColor, sso: int, ssr: Optional[int], sbe: bool, sbc: dataclasses.RGBColor, sbo: int) \
+            af_family: str, af_style: str, afs: int, afc: local_dataclasses.RGBColor, afo: int, ase: bool,
+            asc: local_dataclasses.RGBColor, aso: int, asr: Optional[int], abe: bool, abc: local_dataclasses.RGBColor,
+            abo: int, sf_family: str, sf_style: str, sfs: int, sfc: local_dataclasses.RGBColor, sfo: int, sse: bool,
+            ssc: local_dataclasses.RGBColor, sso: int, ssr: Optional[int], sbe: bool, sbc: local_dataclasses.RGBColor,
+            sbo: int) \
         -> Optional[np.ndarray]:
     """
     Processes the image at the given path (by adding the requested text) and returns the processed image.
diff --git a/utils/dataclasses.py b/utils/dataclasses.py
index 77c3531..0755858 100644
--- a/utils/dataclasses.py
+++ b/utils/dataclasses.py
@@ -59,8 +59,17 @@ class MinMax:
     """
     A dataclass representing a minimum and maximum value.
     """
-    min: int
-    max: int
+    min: Union[int, float]
+    max: Union[int, float]
+
+
+@dataclass
+class MinMaxGradioComponents:
+    """
+    A dataclass representing the components of a minimum and maximum value editor.
+    """
+    min: gr.Number
+    max: gr.Number
 
 
 @dataclass
@@ -72,6 +81,15 @@ class RowCol:
     col: int
 
 
+@dataclass
+class RowColGradioComponents:
+    """
+    A dataclass representing the components of a row and column editor.
+    """
+    row: gr.Number
+    col: gr.Number
+
+
 @dataclass
 class FontGradioComponents:
     """
@@ -105,4 +123,50 @@ class FontBackgroundGradioComponents:
     opacity: gr.Slider
 
 
+@dataclass
+class FontDisplayGradioComponents:
+    """
+    A dataclass representing the components of how to display the font.
+    """
+    font: FontGradioComponents
+    drop_shadow: FontDropShadowGradioComponents
+    background: FontBackgroundGradioComponents
+
+
+@dataclass
+class ColorOpacityGradioComponents:
+    """
+    A dataclass representing the components of the color and opacity editor.
+    """
+    color: gr.ColorPicker
+    opacity: gr.Slider
+
+
+@dataclass
+class VideoOutputGradioComponents:
+    """
+    A dataclass representing the components of the video output.
+    """
+    video: gr.Video
+    name: gr.Textbox
+    suffix: gr.Dropdown
+    save: gr.Button
+
+
+@dataclass
+class Time:
+    """
+    A dataclass representing a time.
+    """
+    hours: int
+    minutes: int
+    seconds: int
+
+    def __int__(self) -> int:
+        """
+        Returns the time in seconds.
+        """
+        return self.hours * 3600 + self.minutes * 60 + self.seconds
+
+
 RGBColor = Union[str, tuple[int, int, int]]
diff --git a/utils/gradio.py b/utils/gradio.py
index 54353ff..fae5a02 100644
--- a/utils/gradio.py
+++ b/utils/gradio.py
@@ -5,18 +5,18 @@
 from utils import font_manager, dataclasses
 
 
-def render_color_opacity_picker(default_name_label: str = "Font") -> tuple[gr.ColorPicker, gr.Slider]:
+def render_color_opacity_picker(default_name_label: str = "Font") -> dataclasses.ColorOpacityGradioComponents:
     """
     Renders a color picker with the appropriate styling.
     :param default_name_label: The default name label to use.
-    :return: A tuple containing the color and opacity components.
+    :return: A class containing the color and opacity components.
     """
     with gr.Group():
         with gr.Row():
             color = gr.ColorPicker(label=f"{default_name_label} Color", scale=1, interactive=True)
             opacity = gr.Slider(0, 100, value=100, label="Opacity", scale=2, interactive=True)
 
-    return color, opacity
+    return dataclasses.ColorOpacityGradioComponents(color, opacity)
 
 
 def bind_checkbox_to_visibility(checkbox: gr.Checkbox, group: gr.Group):
@@ -58,10 +58,11 @@ def update_font_styles(selected_font_family):
 
         font_family.change(update_font_styles, inputs=[font_family], outputs=[font_style])
     with gr.Group():
-        font_color, font_opacity = render_color_opacity_picker()
+        font_color_opacity = render_color_opacity_picker()
         font_size = gr.Number(default_font_size, label="Font Size", interactive=True)
 
-    return dataclasses.FontGradioComponents(font_family, font_style, font_color, font_opacity, font_size)
+    return dataclasses.FontGradioComponents(font_family, font_style, font_color_opacity.color,
+                                            font_color_opacity.opacity, font_size)
 
 
 def render_tool_description(description: str):
diff --git a/utils/image.py b/utils/image.py
index b070a04..42f59e4 100644
--- a/utils/image.py
+++ b/utils/image.py
@@ -2,6 +2,8 @@
 This file contains functions for image processing.
 """
 from typing import Tuple
+import cv2
+import numpy as np
 from utils import dataclasses
 
 
@@ -43,3 +45,45 @@ def get_bgra(color: dataclasses.RGBColor, opacity: int) -> Tuple[int, int, int,
         color = tuple(int(color[i:i + 2], 16) for i in (0, 2, 4))
 
     return color[2], color[1], color[0], get_alpha_from_opacity(opacity)
+
+
+def open_image_as_rgba(image_path: str) -> np.ndarray:
+    """
+    Opens an image as RGBA.
+    :param image_path: The path to the image.
+    :return: The image as RGBA.
+    """
+    img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
+    if img.shape[2] == 3:
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA)
+    else:
+        img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA)
+
+    return img
+
+
+def blend_alphas(a: np.ndarray, b: np.ndarray) -> np.ndarray:
+    """
+    Blends two images together using alpha blending.
+    :param a: The first image.
+    :param b: The second image.
+    :return: The blended image.
+    """
+    if a.shape != b.shape:
+        raise ValueError("both images must have the same shape to blend alphas")
+    if a.shape[2] != 4 or b.shape[2] != 4:
+        raise ValueError("both images must have 4 channels to blend alphas")
+
+    alpha_text = a[:, :, 3] / 255.0
+    alpha_canvas = b[:, :, 3] / 255.0
+    alpha_final = alpha_text + alpha_canvas * (1 - alpha_text)
+
+    final = np.zeros_like(b)
+    # alpha blend
+    for c in range(3):  # Loop over color (non-alpha) channels
+        final[:, :, c] = (alpha_text * a[:, :, c] + alpha_canvas * (1 - alpha_text) *
+                                 b[:, :, c]) / alpha_final
+    final[:, :, 3] = alpha_final * 255
+    final[:, :, :3][alpha_final == 0] = 0
+
+    return final
diff --git a/utils/visualizer.py b/utils/visualizer.py
index 8e53285..2beb839 100644
--- a/utils/visualizer.py
+++ b/utils/visualizer.py
@@ -4,7 +4,7 @@
 from typing import Dict, Optional
 import numpy as np
 import cv2
-from utils import dataclasses
+from utils import dataclasses, image as image_utils
 
 
 class Visualizer:
@@ -12,6 +12,7 @@ class Visualizer:
     This class is used to draw the visualizer on the canvas.
     Will be replaced with a more general solution in the future to allow for more customization.
     """
+
     def __init__(self, dot_size: dataclasses.MinMax, color, dot_count: dataclasses.RowCol, size: dataclasses.Size):
         self.dot_size = dot_size
         self.color = color
@@ -34,14 +35,11 @@ def initialize_static_values(self: "Visualizer") -> None:
         self.cached_dot_positions = [(grid_x[y, x], grid_y[y, x]) for x in range(self.dot_count.col) for y in
                                      range(self.dot_count.row)]
 
-    def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict[float, float],
-                        custom_drawing: Optional[np.ndarray] = None) -> None:
+    def _get_loudness(self, frequency_data: Dict[float, float]) -> (dataclasses.MinMax, Dict[int, int]):
         """
-        Draws the visualizer on the canvas (a single frame).
-        :param canvas: The canvas to draw on.
+        Calculates the loudness values for each column.
         :param frequency_data: The frequency data to use for drawing which correlates to the loudness + frequency.
-        :param custom_drawing: A custom drawing to use instead of the default circle.
-        :return: None.
+        :return: A tuple containing the loudness min/max and the loudness values for each column.
         """
         # Precompute log frequencies
         freq_keys = np.array(list(frequency_data.keys()))
@@ -51,66 +49,111 @@ def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict
 
         # Find the maximum and minimum loudness values, ignoring -80 dB
         freq_bands = np.array([frequency_data[key] for key in freq_keys if key > 0])  # Ignore 0 Hz
-        max_loudness = np.max(freq_bands)
         filtered_loudness = freq_bands[freq_bands > -80]
-        min_loudness = np.min(filtered_loudness) if filtered_loudness.size > 0 else -80
+        loudness_min_max = dataclasses.MinMax(np.min(filtered_loudness) if filtered_loudness.size > 0 else -80,
+                                              np.max(freq_bands))
 
         # Precompute loudness values
         loudness_values = {}
         for x in range(self.dot_count.col):
-            lower_bound = log_freqs[x]
-            upper_bound = log_freqs[x + 1] if x < self.dot_count.col - 1 else end_freq + 1
-            band_freqs = [freq for freq in freq_keys if lower_bound <= freq < upper_bound]
+            bounds = {
+                "lower": log_freqs[x],
+                "upper": log_freqs[x + 1] if x < self.dot_count.col - 1 else end_freq + 1
+            }
+            band_freqs = [freq for freq in freq_keys if bounds.get("lower") <= freq < bounds.get("upper")]
             if not band_freqs:
-                closest_freq = min(freq_keys, key=lambda f, lb=lower_bound: abs(f - lb))
+                closest_freq = min(freq_keys, key=lambda f, lb=bounds.get("lower"): abs(f - lb))
                 band_freqs = [closest_freq]
 
             band_loudness = [frequency_data[freq] for freq in band_freqs]
             avg_loudness = np.mean(band_loudness) if band_loudness else -80
             loudness_values[x] = avg_loudness
 
+        return loudness_min_max, loudness_values
+
+    def _calculate_dot_size(self: "Visualizer", column: int, loudness: dataclasses.MinMax,
+                            loudness_values: Dict[int, int]) -> int:
+        """
+        Calculates the dot size for a given column.
+        :param loudness: The loudness min/max.
+        :param loudness_values: The loudness values for each column.
+        :return: The dot size.
+        """
+        # Scale the loudness to the dot size
+        scaled_loudness = (loudness_values[column] - loudness.min) / (
+                loudness.max - loudness.min) if loudness.max != loudness.min else 0
+        dot_size = self.dot_size.min + scaled_loudness * (self.dot_size.max - self.dot_size.min)
+        return min(max(dot_size, self.dot_size.min), self.dot_size.max)
+
+    def _draw_custom_drawing(self: "Visualizer", canvas: np.ndarray, start_pos: dataclasses.Position,
+                             end_pos: dataclasses.Position, img_start_pos: dataclasses.Position,
+                             img_end_pos: dataclasses.Position, dot_size: int,
+                             custom_drawing_overlap: bool) -> np.ndarray:
+        """
+        Draws the custom drawing on the canvas.
+        :param canvas: The canvas to draw on.
+        :param start_pos: The start position on the canvas.
+        :param end_pos: The end position on the canvas.
+        :param img_start_pos: The start position on the resized image.
+        :param img_end_pos: The end position on the resized image.
+        :param dot_size: The dot size.
+        :param custom_drawing_overlap: Whether overlapped custom drawings should alpha blend.
+        :return: The canvas with the custom drawing drawn on it.
+        """
+        drawing_slice = self.cached_resized_drawing[dot_size][img_start_pos.y:img_end_pos.y,
+                        img_start_pos.x:img_end_pos.x]
+
+        if custom_drawing_overlap:
+            canvas_slice = canvas[start_pos.y:end_pos.y, start_pos.x:end_pos.x]
+            return image_utils.blend_alphas(canvas_slice, drawing_slice)
+
+        return drawing_slice
+
+    def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict[float, float],
+                        custom_drawing: Optional[np.ndarray] = None, custom_drawing_overlap: bool = False) -> None:
+        """
+        Draws the visualizer on the canvas (a single frame).
+        :param custom_drawing_overlap: Whether to overlap the custom drawing should alpha blend when overlapping.
+        :param canvas: The canvas to draw on.
+        :param frequency_data: The frequency data to use for drawing which correlates to the loudness + frequency.
+        :param custom_drawing: A custom drawing to use instead of the default circle.
+        :return: None.
+        """
+        loudness, loudness_values = self._get_loudness(frequency_data)
+
         cached_dot_sizes = {}
         for i, (pos_x, pos_y) in enumerate(self.cached_dot_positions):
             column = i // self.dot_count.row  # Ensure the correct column is computed
 
             if column not in cached_dot_sizes:
-                avg_loudness = loudness_values[column]
-                # Scale the loudness to the dot size
-                scaled_loudness = (avg_loudness - min_loudness) / (
-                        max_loudness - min_loudness) if max_loudness != min_loudness else 0
-                dot_size = self.dot_size.min + scaled_loudness * (self.dot_size.max - self.dot_size.min)
-                dot_size = min(max(dot_size, self.dot_size.min), self.dot_size.max)
-
-                cached_dot_sizes[column] = dot_size
-            else:
-                dot_size = cached_dot_sizes[column]
+                cached_dot_sizes[column] = self._calculate_dot_size(column, loudness, loudness_values)
 
-            # Convert dot size to integer and calculate the center position
-            dot_size = int(dot_size)
-            center = (int(pos_x), int(pos_y))
+            dot_size = int(cached_dot_sizes[column])
+            center_pos = dataclasses.Position(int(pos_x), int(pos_y))
             if custom_drawing is not None:
                 if dot_size not in self.cached_resized_drawing:
-                    self.cached_resized_drawing[dot_size] = cv2.resize(custom_drawing, (dot_size, dot_size),
-                                                                       interpolation=cv2.INTER_LANCZOS4)
-                resized_custom_drawing = self.cached_resized_drawing[dot_size]
+                    if dot_size == 0:
+                        self.cached_resized_drawing[dot_size] = np.zeros((1, 1, 4), dtype=np.uint8)
+                    else:
+                        self.cached_resized_drawing[dot_size] = cv2.resize(custom_drawing, (dot_size, dot_size),
+                                                                           interpolation=cv2.INTER_LANCZOS4)
 
-                center_x, center_y = int(pos_x), int(pos_y)
                 half_dot_size = dot_size // 2
-
                 # Calculate bounds on the canvas
-                start_x = max(center_x - half_dot_size, 0)
-                end_x = min(center_x + half_dot_size, canvas.shape[1])
-                start_y = max(center_y - half_dot_size, 0)
-                end_y = min(center_y + half_dot_size, canvas.shape[0])
+                start_pos = dataclasses.Position(max(center_pos.x - half_dot_size, 0),
+                                                 max(center_pos.y - half_dot_size, 0))
+                end_pos = dataclasses.Position(min(center_pos.x + half_dot_size, canvas.shape[1]), min(
+                    center_pos.y + half_dot_size, canvas.shape[0]))
 
                 # Calculate corresponding bounds on the resized image
-                img_start_x = max(half_dot_size - (center_x - start_x), 0)
-                img_end_x = img_start_x + (end_x - start_x)
-                img_start_y = max(half_dot_size - (center_y - start_y), 0)
-                img_end_y = img_start_y + (end_y - start_y)
-
-                # Place the image slice onto the canvas
-                canvas[start_y:end_y, start_x:end_x] = resized_custom_drawing[img_start_y:img_end_y,
-                                                                              img_start_x:img_end_x]
+                img_start_pos = dataclasses.Position(max(half_dot_size - (center_pos.x - start_pos.x), 0),
+                                                     max(half_dot_size - (center_pos.y - start_pos.y), 0))
+                img_end_pos = dataclasses.Position(img_start_pos.x + (end_pos.x - start_pos.x),
+                                                   img_start_pos.y + (end_pos.y - start_pos.y))
+
+                canvas[start_pos.y:end_pos.y, start_pos.x:end_pos.x] = self._draw_custom_drawing(canvas, start_pos,
+                                                                                                 end_pos, img_start_pos,
+                                                                                                 img_end_pos, dot_size,
+                                                                                                 custom_drawing_overlap)
             else:
-                cv2.circle(canvas, center, dot_size // 2, self.color, -1)
+                cv2.circle(canvas, (center_pos.x, center_pos.y), dot_size // 2, self.color, -1)