From 04a622b36de81be339d76bd7aa060f2a2649b722 Mon Sep 17 00:00:00 2001
From: Fabian Gonzalez <fabiangonz98@gmail.com>
Date: Tue, 16 Jan 2024 19:42:27 -0500
Subject: [PATCH] 100% Linter

---
 .github/workflows/pylint.yml |   4 +-
 .pylintrc                    |   1 +
 processing/image.py          |  42 +++--
 processing/video.py          |   6 +-
 requirements.txt             |   4 +-
 ui/listicles/interface.py    |  68 +++++---
 ui/listicles/utils.py        |   1 +
 ui/music/interface.py        | 116 ++++++++-----
 ui/music/utils.py            | 317 +++++++++++++++++++----------------
 utils/dataclasses.py         |  68 +++++++-
 utils/gradio.py              |  11 +-
 utils/image.py               |  44 +++++
 utils/visualizer.py          | 131 ++++++++++-----
 13 files changed, 530 insertions(+), 283 deletions(-)

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
index aa949d3..e6dde2a 100644
--- a/.github/workflows/pylint.yml
+++ b/.github/workflows/pylint.yml
@@ -1,11 +1,11 @@
 name: Python linter
 on: [push]
 jobs:
-  build:
+  lint:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.10", "3.11"]
+        python-version: ["3.11"]
     steps:
     - uses: actions/checkout@v3
     - name: Set up Python ${{ matrix.python-version }}
diff --git a/.pylintrc b/.pylintrc
index 2083c87..b390428 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,6 +1,7 @@
 [MAIN]
 max-line-length=120
 max-attributes=10
+max-locals=20
 # Currently, this is added because gradio Inputs don't support passing tuples/dataclasses/etc. as arguments, meaning I
 # can't shorten some methods that take a lot of arguments.
 disable=too-many-arguments
diff --git a/processing/image.py b/processing/image.py
index eb97bc1..2c35dfb 100644
--- a/processing/image.py
+++ b/processing/image.py
@@ -35,9 +35,7 @@ def render_image_output() -> (gr.Image, gr.Textbox, gr.Dropdown, gr.Button):
     return image_output, image_name, image_suffix, save_image_button
 
 
-def render_text_editor_parameters(name: str) -> (dataclasses.FontGradioComponents,
-                                                 dataclasses.FontDropShadowGradioComponents,
-                                                 dataclasses.FontBackgroundGradioComponents):
+def render_text_editor_parameters(name: str) -> dataclasses.FontDisplayGradioComponents:
     """
     Renders the text editor parameters.
     :param name: The name of the text editor parameters. This is used as the label for the accordion.
@@ -49,18 +47,21 @@ def render_text_editor_parameters(name: str) -> (dataclasses.FontGradioComponent
             with gr.Group():
                 drop_shadow_enabled = gr.Checkbox(False, label="Enable Drop Shadow", interactive=True)
                 with gr.Group(visible=drop_shadow_enabled.value) as additional_options:
-                    drop_shadow_color, drop_shadow_opacity = gru.render_color_opacity_picker()
+                    drop_shadow_color_opacity = gru.render_color_opacity_picker()
                     drop_shadow_radius = gr.Number(0, label="Shadow Radius")
                     gru.bind_checkbox_to_visibility(drop_shadow_enabled, additional_options)
             with gr.Group():
                 background_enabled = gr.Checkbox(False, label="Enable Background", interactive=True)
                 with gr.Group(visible=background_enabled.value) as additional_options:
-                    background_color, background_opacity = gru.render_color_opacity_picker()
+                    background_color_opacity = gru.render_color_opacity_picker()
                     gru.bind_checkbox_to_visibility(background_enabled, additional_options)
 
-    return (font_data, dataclasses.FontDropShadowGradioComponents(drop_shadow_enabled, drop_shadow_color,
-                                                                  drop_shadow_opacity, drop_shadow_radius),
-            dataclasses.FontBackgroundGradioComponents(background_enabled, background_color, background_opacity))
+    drop_shadow_data = dataclasses.FontDropShadowGradioComponents(drop_shadow_enabled, drop_shadow_color_opacity.color,
+                                                                  drop_shadow_color_opacity.opacity, drop_shadow_radius)
+    background_data = dataclasses.FontBackgroundGradioComponents(background_enabled, background_color_opacity.color,
+                                                                 background_color_opacity.opacity)
+
+    return dataclasses.FontDisplayGradioComponents(font_data, drop_shadow_data, background_data)
 
 
 def add_background(image_pil: Image, draw: ImageDraw, position: tuple[int, int], text: str, font: ImageFont,
@@ -216,7 +217,23 @@ def save_image_to_disk(image_path: str, name: Optional[str] = None, save_dir: st
     return save_dir
 
 
-# Function to add text to an image with custom font, size, and wrapping
+def _get_lines(text: str, max_width: Optional[int] = None) -> list[str]:
+    """
+    Gets the lines of text from a string.
+    :param text: The text to get the lines from.
+    :param max_width: The maximum width of the text before wrapping.
+    :return: A list of lines.
+    """
+    if max_width:  # Prepare for text wrapping if max_width is provided
+        wrapped_text = textwrap.fill(text, width=max_width)
+    else:
+        wrapped_text = text
+
+    return wrapped_text.split('\n')
+
+
+# A lot of the reported variables come from the parameters
+# pylint: disable=too-many-locals
 def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[int, int], font_path: str,
              font_size: int, font_color: Tuple[int, int, int, int] = (255, 255, 255, 255),
              shadow_color: Tuple[int, int, int, int] = (255, 255, 255, 255),
@@ -256,12 +273,7 @@ def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[i
     font = ImageFont.truetype(font_path, font_size)
     draw = ImageDraw.Draw(txt_layer)
 
-    if max_width:  # Prepare for text wrapping if max_width is provided
-        wrapped_text = textwrap.fill(text, width=max_width)
-    else:
-        wrapped_text = text
-
-    lines = wrapped_text.split('\n')
+    lines = _get_lines(text, max_width)
 
     y_offset = 0
     # max_line_width = 0  # Keep track of the widest line
diff --git a/processing/video.py b/processing/video.py
index 3d19e59..cbb4036 100644
--- a/processing/video.py
+++ b/processing/video.py
@@ -8,13 +8,13 @@
 from typing import Optional, Literal
 import gradio as gr
 from moviepy.editor import VideoFileClip
-from utils import path_handler
+from utils import path_handler, dataclasses
 
 VIDEO_FOLDER = "videos"
 default_path = os.path.join(path_handler.get_default_path(), VIDEO_FOLDER)
 
 
-def render_video_output() -> (gr.Video, gr.Textbox, gr.Dropdown, gr.Button):
+def render_video_output() -> dataclasses.VideoOutputGradioComponents:
     """
     Creates and returns a set of Gradio interface components for video output.
 
@@ -32,7 +32,7 @@ def render_video_output() -> (gr.Video, gr.Textbox, gr.Dropdown, gr.Button):
         video_suffix = gr.Dropdown([".mp4", ".mov"], value=".mp4", label="File Type", allow_custom_value=False)
     save_video_button = gr.Button("Save To Disk", variant="primary")
 
-    return video_output, video_name, video_suffix, save_video_button
+    return dataclasses.VideoOutputGradioComponents(video_output, video_name, video_suffix, save_video_button)
 
 
 def save_video_to_disk(video_path: str, name: Optional[str] = None, video_suffix: Literal[".mp4", ".mov"] = ".mp4",
diff --git a/requirements.txt b/requirements.txt
index 7922ce1..7614d6e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
 gradio~=4.12.0
 inflect~=7.0.0
 openai~=1.6.1
-numpy~=1.23.5
-Pillow~=8.4.0
+numpy~=1.26.3
+Pillow~=10.2.0
 opencv-python-headless~=4.8.1.78
 fonttools~=4.47.0
 moviepy~=1.0.3
diff --git a/ui/listicles/interface.py b/ui/listicles/interface.py
index bb15613..87d386a 100644
--- a/ui/listicles/interface.py
+++ b/ui/listicles/interface.py
@@ -81,11 +81,11 @@ def set_json(json_file):
         with gr.Column(scale=3):
             gr.Markdown("# Parameters")
             with gr.Row(equal_height=False):
-                name_font, name_shadow, name_background = image_processing.render_text_editor_parameters("Name")
-                desc_font, desc_shadow, desc_background = image_processing.render_text_editor_parameters("Description")
+                name_font_display = image_processing.render_text_editor_parameters("Name")
+                desc_font_display = image_processing.render_text_editor_parameters("Description")
             with gr.Row(equal_height=False):
-                asc_font, asc_shadow, asc_background = image_processing.render_text_editor_parameters("Association")
-                rate_font, rate_shadow, rate_background = image_processing.render_text_editor_parameters("Rating")
+                asc_font_display = image_processing.render_text_editor_parameters("Association")
+                rate_font_display = image_processing.render_text_editor_parameters("Rating")
 
         with gr.Column(scale=1):
             gr.Markdown("# Output")
@@ -99,26 +99,46 @@ def set_json(json_file):
     save_button.click(image_processing.save_images_to_disk, inputs=[output_preview, image_type],
                       outputs=[])
     process_button.click(listicle_utils.process, inputs=[input_batch_images, input_batch_json,
-                                                         name_font.family, name_font.style, name_font.size,
-                                                         name_font.color, name_font.opacity, name_shadow.enabled,
-                                                         name_shadow.color, name_shadow.opacity, name_shadow.radius,
-                                                         name_background.enabled, name_background.color,
-                                                         name_background.opacity,
-                                                         desc_font.family, desc_font.style, desc_font.size,
-                                                         desc_font.color, desc_font.opacity, desc_shadow.enabled,
-                                                         desc_shadow.color, desc_shadow.opacity, desc_shadow.radius,
-                                                         desc_background.enabled, desc_background.color,
-                                                         desc_background.opacity,
-                                                         asc_font.family, asc_font.style, asc_font.size,
-                                                         asc_font.color, asc_font.opacity, asc_shadow.enabled,
-                                                         asc_shadow.color, asc_shadow.opacity, asc_shadow.radius,
-                                                         asc_background.enabled, asc_background.color,
-                                                         asc_background.opacity,
-                                                         rate_font.family, rate_font.style, rate_font.size,
-                                                         rate_font.color, rate_font.opacity, rate_shadow.enabled,
-                                                         rate_shadow.color, rate_shadow.opacity, rate_shadow.radius,
-                                                         rate_background.enabled, rate_background.color,
-                                                         rate_background.opacity,
+                                                         name_font_display.font.family, name_font_display.font.style,
+                                                         name_font_display.font.size, name_font_display.font.color,
+                                                         name_font_display.font.opacity,
+                                                         name_font_display.drop_shadow.enabled,
+                                                         name_font_display.drop_shadow.color,
+                                                         name_font_display.drop_shadow.opacity,
+                                                         name_font_display.drop_shadow.radius,
+                                                         name_font_display.background.enabled,
+                                                         name_font_display.background.color,
+                                                         name_font_display.background.opacity,
+                                                         desc_font_display.font.family, desc_font_display.font.style,
+                                                         desc_font_display.font.size, desc_font_display.font.color,
+                                                         desc_font_display.font.opacity,
+                                                         desc_font_display.drop_shadow.enabled,
+                                                         desc_font_display.drop_shadow.color,
+                                                         desc_font_display.drop_shadow.opacity,
+                                                         desc_font_display.drop_shadow.radius,
+                                                         desc_font_display.background.enabled,
+                                                         desc_font_display.background.color,
+                                                         desc_font_display.background.opacity,
+                                                         asc_font_display.font.family, asc_font_display.font.style,
+                                                         asc_font_display.font.size, asc_font_display.font.color,
+                                                         asc_font_display.font.opacity,
+                                                         asc_font_display.drop_shadow.enabled,
+                                                         asc_font_display.drop_shadow.color,
+                                                         asc_font_display.drop_shadow.opacity,
+                                                         asc_font_display.drop_shadow.radius,
+                                                         asc_font_display.background.enabled,
+                                                         asc_font_display.background.color,
+                                                         asc_font_display.background.opacity,
+                                                         rate_font_display.font.family, rate_font_display.font.style,
+                                                         rate_font_display.font.size, rate_font_display.font.color,
+                                                         rate_font_display.font.opacity,
+                                                         rate_font_display.drop_shadow.enabled,
+                                                         rate_font_display.drop_shadow.color,
+                                                         rate_font_display.drop_shadow.opacity,
+                                                         rate_font_display.drop_shadow.radius,
+                                                         rate_font_display.background.enabled,
+                                                         rate_font_display.background.color,
+                                                         rate_font_display.background.opacity,
                                                          ], outputs=[output_preview])
 
     return input_batch_images, input_batch_json
diff --git a/ui/listicles/utils.py b/ui/listicles/utils.py
index 747c6c4..01a7898 100644
--- a/ui/listicles/utils.py
+++ b/ui/listicles/utils.py
@@ -11,6 +11,7 @@
 import api.chatgpt as chatgpt_api
 
 
+# pylint: disable=too-many-locals
 def process(image_files: list[Any], json_data: str,
             nf_family: str, nf_style: str, nfs: int, nfc: dataclasses.RGBColor, nfo: int, nse: bool,
             nsc: dataclasses.RGBColor, nso: int, nsr, nbe: bool, nbc: dataclasses.RGBColor, nbo: int,
diff --git a/ui/music/interface.py b/ui/music/interface.py
index 8808022..c6e199b 100644
--- a/ui/music/interface.py
+++ b/ui/music/interface.py
@@ -7,6 +7,7 @@
 import processing.video as video_processing
 import processing.image as image_processing
 import ui.components.openai as openai_components
+from utils import dataclasses
 
 
 def render_music_section() -> None:
@@ -71,7 +72,7 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image):
         display component for displaying the cover image before processing, and an image display component for
         displaying the cover image after processing.
     """
-    with (gr.Column()):
+    with gr.Column():
         gr.Markdown("## Input")
         with gr.Group():
             input_image = gr.Image(sources=["upload"], label="Cover Image (png)", type="filepath",
@@ -81,13 +82,11 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image):
         with gr.Row(equal_height=False):
             with gr.Group():
                 artist_name = gr.Textbox(label="Artist Name", lines=1, max_lines=1, scale=1)
-                artist_font, artist_shadow, artist_background = image_processing.render_text_editor_parameters(
-                    "Artist Text Parameters")
+                artist_font_display = image_processing.render_text_editor_parameters("Artist Text Parameters")
 
             with gr.Group():
                 song_name = gr.Textbox(label="Song Title", lines=1, max_lines=1, scale=2)
-                song_font, song_shadow, song_background = \
-                    image_processing.render_text_editor_parameters("Song Text Parameters")
+                song_font_display = image_processing.render_text_editor_parameters("Song Text Parameters")
 
         process_button = gr.Button("Process", variant="primary")
 
@@ -98,14 +97,20 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image):
                 send_to_create_video_button = gr.Button("Send Image to 'Create Music Video'", variant="secondary")
 
     process_button.click(process, inputs=[input_image, artist_name, song_name,
-                                          artist_font.family, artist_font.style, artist_font.size, artist_font.color,
-                                          artist_font.opacity, artist_shadow.enabled, artist_shadow.color,
-                                          artist_shadow.opacity, artist_shadow.radius, artist_background.enabled,
-                                          artist_background.color, artist_background.opacity, song_font.family,
-                                          song_font.style, song_font.size, song_font.color, song_font.opacity,
-                                          song_shadow.enabled, song_shadow.color, song_shadow.opacity,
-                                          song_shadow.radius, song_background.enabled, song_background.color,
-                                          song_background.opacity],
+                                          artist_font_display.font.family, artist_font_display.font.style,
+                                          artist_font_display.font.size, artist_font_display.font.color,
+                                          artist_font_display.font.opacity, artist_font_display.drop_shadow.enabled,
+                                          artist_font_display.drop_shadow.color,
+                                          artist_font_display.drop_shadow.opacity,
+                                          artist_font_display.drop_shadow.radius,
+                                          artist_font_display.background.enabled,
+                                          artist_font_display.background.color, artist_font_display.background.opacity,
+                                          song_font_display.font.family, song_font_display.font.style,
+                                          song_font_display.font.size, song_font_display.font.color,
+                                          song_font_display.font.opacity, song_font_display.drop_shadow.enabled,
+                                          song_font_display.drop_shadow.color, song_font_display.drop_shadow.opacity,
+                                          song_font_display.drop_shadow.radius, song_font_display.background.enabled,
+                                          song_font_display.background.color, song_font_display.background.opacity],
                          outputs=[image_output])
     save_image_button.click(image_processing.save_image_to_disk,
                             inputs=[image_output, image_name, image_suffix], outputs=[])
@@ -124,14 +129,13 @@ def render_music_video_creation() -> gr.Image:
                                show_share_button=False, show_download_button=False, scale=2, image_mode="RGBA")
         audio_filepath = gr.File(label="Audio", file_types=["audio"], scale=1, height=100)
     with gr.Column():
-        background_color, background_opacity = gru.render_color_opacity_picker(default_name_label="Background")
+        background_color_opacity = gru.render_color_opacity_picker(default_name_label="Background")
         with gr.Group():
             artist_name = gr.Textbox(label="Artist Name", lines=1, max_lines=1, scale=1)
-            artist_font, artist_shadow, artist_background = \
-                image_processing.render_text_editor_parameters("Text Parameters")
+            artist_font_display = image_processing.render_text_editor_parameters("Text Parameters")
         with gr.Group():
             song_title = gr.Textbox(label="Song Title", lines=1, max_lines=1, scale=2)
-            song_font, song_shadow, song_background = image_processing.render_text_editor_parameters("Text Parameters")
+            song_font_display = image_processing.render_text_editor_parameters("Text Parameters")
         with gr.Column():
             # Defaulting to 1. It's a still image, but may expand by adding some effects (grain, and not sure what else)
             fps = gr.Number(value=1, label="FPS", minimum=1, maximum=144)
@@ -140,48 +144,70 @@ def render_music_video_creation() -> gr.Image:
                 generate_audio_visualizer_button = gr.Checkbox(value=False, label="Generate Audio Visualizer",
                                                                interactive=True)
                 with gr.Group() as audio_visualizer_group:
-                    audio_visualizer_color, audio_visualizer_opacity = \
-                        gru.render_color_opacity_picker("Audio Visualizer")
+                    audio_visualizer_color_opacity = gru.render_color_opacity_picker("Audio Visualizer")
                     with gr.Group():
                         with gr.Row():
-                            audio_visualizer_num_rows = gr.Number(value=90, label="Number of Rows",
-                                                                  minimum=1, maximum=100)
-                            audio_visualizer_num_columns = gr.Number(value=65, label="Number of Columns",
-                                                                     minimum=1, maximum=100)
+                            audio_visualizer_amount = dataclasses.RowColGradioComponents(
+                                row=gr.Number(value=90, label="Number of Rows", minimum=1,
+                                                                  maximum=100),
+                                col=gr.Number(value=65, label="Number of Columns", minimum=1,
+                                                                     maximum=100)
+                            )
                         with gr.Row():
-                            audio_visualizer_min_size = gr.Number(value=1, label="Minimum Size", minimum=1, maximum=100)
-                            audio_visualizer_max_size = gr.Number(value=7, label="Maximum Size", minimum=1, maximum=200)
-                    # Must be a PNG file to support transparency. The idea for this is more-so to have shapes that can
-                    # be rendered for the visualizer, and ideally they have transparent backgrounds, so using RGBA.
+                            audio_visualizer_dot_size = dataclasses.MinMaxGradioComponents(
+                                min=gr.Number(value=1, label="Minimum Size", minimum=1, maximum=100),
+                                max=gr.Number(value=7, label="Maximum Size", minimum=1, maximum=200)
+                            )
                     audio_visualizer_drawing = gr.Image(label="Visualizer Drawing (png)", type="filepath",
                                                         sources=["upload"], show_share_button=False,
                                                         show_download_button=False, scale=2, height=150,
                                                         image_mode="RGBA")
+                    visualizer_overlay_checkbox = gr.Checkbox(value=False, label="Overlay Visualizer on One-Another",
+                                                              info="If checked, alpha-blending will be applied, which "
+                                                                   "is noticeable on larger pngs where each drawing "
+                                                                   "overlaps. This is only important for transparent"
+                                                                   "images and is very slow. If the image is not "
+                                                                   "transparent, leave this unchecked.")
             gru.bind_checkbox_to_visibility(generate_audio_visualizer_button, audio_visualizer_group)
 
     create_video_button = gr.Button("Create Music Video", variant="primary")
 
     gr.Markdown("## Output")
     with gr.Group():
-        video_output, video_name, video_suffix, save_video_button = video_processing.render_video_output()
+        video_data = video_processing.render_video_output()
 
     create_video_button.click(create_music_video, inputs=[cover_image, audio_filepath, fps, artist_name,
-                                                          artist_font.family, artist_font.style, artist_font.size,
-                                                          artist_font.color, artist_font.opacity, artist_shadow.enabled,
-                                                          artist_shadow.color, artist_shadow.opacity,
-                                                          artist_shadow.radius, artist_background.enabled,
-                                                          artist_background.color, artist_background.opacity,
-                                                          song_title, song_font.family, song_font.style, song_font.size,
-                                                          song_font.color, song_font.opacity, song_shadow.enabled,
-                                                          song_shadow.color, song_shadow.opacity, song_shadow.radius,
-                                                          song_background.enabled, song_background.color,
-                                                          song_background.opacity, background_color, background_opacity,
-                                                          generate_audio_visualizer_button, audio_visualizer_color,
-                                                          audio_visualizer_opacity, audio_visualizer_drawing,
-                                                          audio_visualizer_num_rows, audio_visualizer_num_columns,
-                                                          audio_visualizer_min_size, audio_visualizer_max_size],
-                              outputs=[video_output])
-    save_video_button.click(video_processing.save_video_to_disk,
-                            inputs=[video_output, video_name, video_suffix], outputs=[])
+                                                          artist_font_display.font.family,
+                                                          artist_font_display.font.style, artist_font_display.font.size,
+                                                          artist_font_display.font.color,
+                                                          artist_font_display.font.opacity,
+                                                          artist_font_display.drop_shadow.enabled,
+                                                          artist_font_display.drop_shadow.color,
+                                                          artist_font_display.drop_shadow.opacity,
+                                                          artist_font_display.drop_shadow.radius,
+                                                          artist_font_display.background.enabled,
+                                                          artist_font_display.background.color,
+                                                          artist_font_display.background.opacity,
+                                                          song_title, song_font_display.font.family,
+                                                          song_font_display.font.style, song_font_display.font.size,
+                                                          song_font_display.font.color, song_font_display.font.opacity,
+                                                          song_font_display.drop_shadow.enabled,
+                                                          song_font_display.drop_shadow.color,
+                                                          song_font_display.drop_shadow.opacity,
+                                                          song_font_display.drop_shadow.radius,
+                                                          song_font_display.background.enabled,
+                                                          song_font_display.background.color,
+                                                          song_font_display.background.opacity,
+                                                          background_color_opacity.color,
+                                                          background_color_opacity.opacity,
+                                                          generate_audio_visualizer_button,
+                                                          audio_visualizer_color_opacity.color,
+                                                          audio_visualizer_color_opacity.opacity,
+                                                          audio_visualizer_drawing, visualizer_overlay_checkbox,
+                                                          audio_visualizer_amount.row, audio_visualizer_amount.col,
+                                                          audio_visualizer_dot_size.min, audio_visualizer_dot_size.max],
+                              outputs=[video_data.video])
+    video_data.save.click(video_processing.save_video_to_disk, inputs=[video_data.video, video_data.name,
+                                                                       video_data.suffix], outputs=[])
 
     return cover_image
diff --git a/ui/music/utils.py b/ui/music/utils.py
index 10726b9..1ffdb90 100644
--- a/ui/music/utils.py
+++ b/ui/music/utils.py
@@ -7,13 +7,14 @@
 import time
 import tempfile
 from typing import List, Dict, Optional
+from dataclasses import dataclass
 import cv2
 from moviepy.editor import AudioFileClip
 import numpy as np
 import librosa
 from api import chatgpt as chatgpt_api
 from processing import image as image_processing
-from utils import progress, visualizer, font_manager, image as image_utils, dataclasses
+from utils import progress, visualizer, font_manager, image as image_utils, dataclasses as local_dataclasses
 
 
 def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]], np.ndarray):
@@ -45,16 +46,25 @@ def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]]
     return downsampled_frequency_loudness, downsampled_times
 
 
-def _audio_visualizer_generator(frame_size: dataclasses.Size, audio_path: str, audio_length: int, fps: int,
-                                audio_visualizer: dataclasses.RGBOpacity, dot_size: dataclasses.MinMax,
-                                dot_count: dataclasses.RowCol, visualizer_drawing: Optional[str] = None) -> str:
-    print("Generating audio visualizer...")
+@dataclass
+class AudioVisualizerDotData:
+    """
+    A dataclass representing the data for the audio visualizer's dots.
+    """
+    size: local_dataclasses.MinMax
+    count: local_dataclasses.RowCol
+    color: local_dataclasses.RGBColor
+    opacity: int
+    visualizer_drawing: Optional[str] = None
+    visualizer_drawing_overlap: bool = False
 
-    audio_visualizer_color_and_opacity = image_utils.get_rgba(audio_visualizer.color, audio_visualizer.opacity)
 
+def _audio_visualizer_generator(frame_size: local_dataclasses.Size, audio_path: str, audio_length: int, fps: int,
+                                dot_data: AudioVisualizerDotData) -> str:
+    print("Generating audio visualizer...")
     custom_drawing = None
-    if visualizer_drawing is not None and visualizer_drawing != "":
-        custom_drawing = cv2.imread(visualizer_drawing, cv2.IMREAD_UNCHANGED)
+    if dot_data.visualizer_drawing is not None and dot_data.visualizer_drawing != "":
+        custom_drawing = cv2.imread(dot_data.visualizer_drawing, cv2.IMREAD_UNCHANGED)
         if custom_drawing.shape[2] == 3:
             custom_drawing = cv2.cvtColor(custom_drawing, cv2.COLOR_BGR2RGBA)
         else:
@@ -65,9 +75,9 @@ def _audio_visualizer_generator(frame_size: dataclasses.Size, audio_path: str, a
 
     total_iterations = len(times)
     start_time = time.time()
-    vis = visualizer.Visualizer(size=dataclasses.Size(frame_size.width, frame_size.height),
-                                dot_size=dot_size, color=audio_visualizer_color_and_opacity,
-                                dot_count=dataclasses.RowCol(dot_count.row, dot_count.col))
+    vis = visualizer.Visualizer(size=local_dataclasses.Size(frame_size.width, frame_size.height),
+                                dot_size=dot_data.size, color=image_utils.get_rgba(dot_data.color, dot_data.opacity),
+                                dot_count=local_dataclasses.RowCol(dot_data.count.row, dot_data.count.col))
     vis.initialize_static_values()
     temp_visualizer_images_dir = tempfile.mkdtemp()
     os.makedirs(temp_visualizer_images_dir, exist_ok=True)
@@ -75,7 +85,8 @@ def _audio_visualizer_generator(frame_size: dataclasses.Size, audio_path: str, a
         if time_point > audio_length:
             break
         frame = frame_cache.copy()
-        vis.draw_visualizer(frame, frequency_loudness[i], custom_drawing=custom_drawing)
+        vis.draw_visualizer(frame, frequency_loudness[i], custom_drawing=custom_drawing,
+                            custom_drawing_overlap=dot_data.visualizer_drawing_overlap)
         frame_np = np.array(frame)
         frame_np = cv2.cvtColor(frame_np, cv2.COLOR_RGBA2BGRA)
         frame_filename = f'{temp_visualizer_images_dir}/frame_{i:05d}.png'
@@ -87,23 +98,133 @@ def _audio_visualizer_generator(frame_size: dataclasses.Size, audio_path: str, a
     return temp_visualizer_images_dir
 
 
-def create_music_video(
+def _get_video_background(image_path: str, frame_size: local_dataclasses.Size,
+                          background_overlay_color_opacity: local_dataclasses.RGBOpacity) -> np.ndarray:
+    """
+    Gets the background for the video, which is a gaussian blurred version of the cover image stretched with a color
+    overlay.
+    :param image_path: The path to the image to use background.
+    :param frame_size: The size of the frame to use for the background.
+    :param background_overlay_color_opacity: The color and opacity to use for the background overlay.
+    :return:
+    """
+    background = cv2.imread(image_path)
+    background = cv2.resize(background, (frame_size.width, frame_size.height))
+    background = cv2.GaussianBlur(background, (49, 49), 0)
+    if background.shape[2] == 3:
+        background = cv2.cvtColor(background, cv2.COLOR_BGR2BGRA)
+    overlay = np.full((frame_size.height, frame_size.width, 4),
+                      image_utils.get_bgra(background_overlay_color_opacity.color,
+                                           background_overlay_color_opacity.opacity),
+                      dtype=np.uint8)
+    alpha_overlay = overlay[:, :, 3] / 255.0
+    alpha_background = background[:, :, 3] / 255.0
+    for c in range(0, 3):
+        background[:, :, c] = (alpha_overlay * overlay[:, :, c] +
+                               alpha_background * (1 - alpha_overlay) * background[:, :, c])
+    background[:, :, 3] = (alpha_overlay + alpha_background * (1 - alpha_overlay)) * 255
+    return background
+
+
+def _generate_final_video(background_image_path: str, visualizer_frames_dir: Optional[str], cover_image_path: str,
+                          audio_path: str, fps: int) -> str:
+    """
+    Generates the final video using the given parameters with ffmpeg.
+    :param background_image_path: The path to the background image to use for the video.
+    :param visualizer_frames_dir: The path to the directory containing the audio visualizer frames.
+    :param cover_image_path: The path to the cover image to use for the video.
+    :param audio_path: The path to the audio file to use for the video.
+    :param fps: The frames per second to use for the video.
+    :return:
+    """
+    temp_final_video_path = tempfile.mktemp(suffix=".mp4")
+
+    audio_clip = AudioFileClip(audio_path)
+    ffmpeg_commands = [
+        "ffmpeg", "-y",
+        "-loop", "1",
+        "-i", background_image_path,
+    ]
+
+    if visualizer_frames_dir is not None:
+        ffmpeg_commands.extend([
+            "-framerate", str(fps),
+            "-i", f'{visualizer_frames_dir}/frame_%05d.png',
+        ])
+        filter_complex = "[0][1]overlay=format=auto[bg];[bg][2]overlay=format=auto"
+    else:
+        filter_complex = "[0][1]overlay=format=auto"
+
+    ffmpeg_commands.extend([
+        "-framerate", str(fps),
+        "-i", cover_image_path,
+        "-i", audio_path,
+        "-filter_complex", filter_complex,
+        "-map", "3:a" if visualizer_frames_dir is not None else "2:a",
+        "-c:v", "libx264",
+        "-c:a", "aac",
+        "-strict", "experimental",
+        "-t", str(audio_clip.duration),
+        "-hide_banner",
+        "-framerate", str(fps),
+        '-pix_fmt', 'yuv420p',
+        temp_final_video_path
+    ])
+    print("Generating final video...")
+
+    duration_regex = re.compile(r"Duration: (\d\d):(\d\d):(\d\d)\.\d\d")
+    time_regex = re.compile(r"time=(\d\d):(\d\d):(\d\d)\.\d\d")
+
+    ffmpeg_start_time = time.time()
+    with subprocess.Popen(ffmpeg_commands, stderr=subprocess.PIPE, text=True) as ffmpeg_process:
+        for line in ffmpeg_process.stderr:
+            # Extract total duration of the video
+            duration_match = duration_regex.search(line)
+            if duration_match:
+                duration_match_groups = duration_match.groups()
+                curr_duration = local_dataclasses.Time(
+                    hours=int(duration_match_groups[0]),
+                    minutes=int(duration_match_groups[1]),
+                    seconds=int(duration_match_groups[2])
+                )
+
+            # Extract current time of encoding
+            time_match = time_regex.search(line)
+            if time_match and int(curr_duration) > 0:
+                time_match_groups = time_match.groups()
+                curr_time = local_dataclasses.Time(
+                    hours=int(time_match_groups[0]),
+                    minutes=int(time_match_groups[1]),
+                    seconds=int(time_match_groups[2])
+                )
+                progress.print_progress_bar(int(curr_time), int(curr_duration), start_time=ffmpeg_start_time)
+
+    progress.print_progress_bar(100, 100, end='\n', start_time=ffmpeg_start_time)
+
+    return temp_final_video_path
+
+
+def create_music_video(  # pylint: disable=too-many-locals
         image_path: str, audio_path: str, fps: int,
         artist: str, artist_font_type: str, artist_font_style: str, artist_font_size: int,
-        artist_font_color: dataclasses.RGBColor, artist_font_opacity: int, artist_shadow_enabled: bool,
-        artist_shadow_color: dataclasses.RGBColor, artist_shadow_opacity: int, artist_shadow_radius: int,
-        artist_background_enabled: bool, artist_background_color: dataclasses.RGBColor, artist_background_opacity: int,
-        song: str, song_font_type: str, song_font_style: str, song_font_size: int,
-        song_font_color: dataclasses.RGBColor, song_font_opacity: int, song_shadow_enabled: bool,
-        song_shadow_color: dataclasses.RGBColor, song_shadow_opacity: int, song_shadow_radius: int,
-        song_background_enabled: bool, song_background_color: dataclasses.RGBColor, song_background_opacity: int,
-        background_color: dataclasses.RGBColor = (0, 0, 0), background_opacity: int = 66,
-        generate_audio_visualizer: bool = False, audio_visualizer_color: dataclasses.RGBColor = (255, 255, 255),
+        artist_font_color: local_dataclasses.RGBColor, artist_font_opacity: int, artist_shadow_enabled: bool,
+        artist_shadow_color: local_dataclasses.RGBColor, artist_shadow_opacity: int, artist_shadow_radius: int,
+        artist_background_enabled: bool, artist_background_color: local_dataclasses.RGBColor,
+        artist_background_opacity: int, song: str, song_font_type: str, song_font_style: str, song_font_size: int,
+        song_font_color: local_dataclasses.RGBColor, song_font_opacity: int, song_shadow_enabled: bool,
+        song_shadow_color: local_dataclasses.RGBColor, song_shadow_opacity: int, song_shadow_radius: int,
+        song_background_enabled: bool, song_background_color: local_dataclasses.RGBColor, song_background_opacity: int,
+        background_color: local_dataclasses.RGBColor = (0, 0, 0), background_opacity: int = 66,
+        generate_audio_visualizer: bool = False, audio_visualizer_color: local_dataclasses.RGBColor = (255, 255, 255),
         audio_visualizer_opacity: int = 100, visualizer_drawing: Optional[str] = None,
-        audio_visualizer_num_rows: int = 90, audio_visualizer_num_columns: int = 65, audio_visualizer_min_size: int = 1,
+        visualizer_drawing_overlap: bool = False, audio_visualizer_num_rows: int = 90,
+        audio_visualizer_num_columns: int = 65, audio_visualizer_min_size: int = 1,
         audio_visualizer_max_size: int = 7) -> Optional[str]:
     """
     Creates a music video using the given parameters.
+    :param visualizer_drawing_overlap: Whether to overlap the visualizer drawings onto one-another with alpha-blending.
+      This is only noticeable on images with transparency and is a slow process, so if your visualizer drawings are
+      not transparent, it is recommended to set this to False.
     :param image_path: The path to the image to use as the cover + background for the video.
     :param audio_path: The path to the audio file to use for the video.
     :param fps: The frames per second to use for the video.
@@ -145,29 +266,22 @@ def create_music_video(
     :param audio_visualizer_max_size: The maximum size to use for the audio visualizer's drawings (peak loudness).
     :return: The path to the generated video, or None if there was an error.
     """
-    if image_path is None:
-        print("No cover image for the video.")
-        return None
-    if audio_path is None:
-        print("No audio to add to the video.")
+    if image_path is None or audio_path is None:
+        print("No cover image and/or audio for the video.")
         return None
 
     # Could probably expand to 4k, but unnecessary for this type of music video
     # Maybe in a future iteration it could be worth it
-    frame_size = dataclasses.Size(1920, 1080)
+    frame_size = local_dataclasses.Size(1920, 1080)
 
     # Set up cover
-    cover = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
-    if cover.shape[2] == 3:
-        cover = cv2.cvtColor(cover, cv2.COLOR_BGR2RGBA)
-    else:
-        cover = cv2.cvtColor(cover, cv2.COLOR_BGRA2RGBA)
+    cover = image_utils.open_image_as_rgba(image_path)
 
     # Create canvas with 4 channels (RGBA)
     canvas = np.zeros((frame_size.height, frame_size.width, 4), dtype=np.uint8)
 
     # Calculate dimensions for resizing the cover to fit within the canvas while maintaining its aspect ratio
-    cover_size = dataclasses.Size(cover.shape[1], cover.shape[0])
+    cover_size = local_dataclasses.Size(cover.shape[1], cover.shape[0])
     resize_factor = min(frame_size.width / cover_size.width, frame_size.height / cover_size.height)
     resize_factor *= (7 / 10)
     cover_size.width = int(cover_size.width * resize_factor)
@@ -183,32 +297,28 @@ def create_music_video(
     audio_clip = AudioFileClip(audio_path)
 
     # Add video background
-    background = cv2.imread(image_path)
-    background = cv2.resize(background, (frame_size.width, frame_size.height))
-    background = cv2.GaussianBlur(background, (49, 49), 0)
-    if background.shape[2] == 3:
-        background = cv2.cvtColor(background, cv2.COLOR_BGR2BGRA)
-    background_color_overlay = image_utils.get_bgra(background_color, background_opacity)
-    overlay = np.full((frame_size.height, frame_size.width, 4), background_color_overlay, dtype=np.uint8)
-    alpha_overlay = overlay[:, :, 3] / 255.0
-    alpha_background = background[:, :, 3] / 255.0
-    for c in range(0, 3):
-        background[:, :, c] = (alpha_overlay * overlay[:, :, c] +
-                               alpha_background * (1 - alpha_overlay) * background[:, :, c])
-    background[:, :, 3] = (alpha_overlay + alpha_background * (1 - alpha_overlay)) * 255
+    background = _get_video_background(image_path, frame_size,
+                                       local_dataclasses.RGBOpacity(background_color, background_opacity))
     background_bgr = cv2.cvtColor(background, cv2.COLOR_BGRA2BGR)
     tmp_background_image_path = tempfile.mktemp(suffix=".png")
     cv2.imwrite(tmp_background_image_path, background_bgr)
 
+    temp_visualizer_images_dir = None
     if generate_audio_visualizer:
         temp_visualizer_images_dir = _audio_visualizer_generator(frame_size, audio_path, audio_clip.duration, fps,
-                                                                 dataclasses.RGBOpacity(audio_visualizer_color,
-                                                                                        audio_visualizer_opacity),
-                                                                 dataclasses.MinMax(audio_visualizer_min_size,
-                                                                                    audio_visualizer_max_size),
-                                                                 dataclasses.RowCol(audio_visualizer_num_rows,
-                                                                                    audio_visualizer_num_columns),
-                                                                 visualizer_drawing=visualizer_drawing)
+                                                                 AudioVisualizerDotData(
+                                                                     size=local_dataclasses.MinMax(
+                                                                         audio_visualizer_min_size,
+                                                                         audio_visualizer_max_size),
+                                                                     color=audio_visualizer_color,
+                                                                     opacity=audio_visualizer_opacity,
+                                                                     count=local_dataclasses.RowCol(
+                                                                         audio_visualizer_num_rows,
+                                                                         audio_visualizer_num_columns),
+                                                                     visualizer_drawing=visualizer_drawing,
+                                                                     visualizer_drawing_overlap=\
+                                                                         visualizer_drawing_overlap)
+                                                                 )
 
     # Add text
     font_families = font_manager.get_fonts()
@@ -240,96 +350,19 @@ def create_music_video(
                                                                                       artist_shadow_opacity),
                                                     show_background=artist_background_enabled,
                                                     background_color=image_utils.get_rgba(
-                                                                    artist_background_color, artist_background_opacity))
-
-    text_np = np.array(text_canvas)
-    np_canvas = np.array(canvas)
-    # Normalize the alpha channels
-    alpha_text = text_np[:, :, 3] / 255.0
-    alpha_canvas = np_canvas[:, :, 3] / 255.0
-    alpha_final = alpha_text + alpha_canvas * (1 - alpha_text)
-
-    canvas_final = np.zeros_like(np_canvas)
-    # alpha blend
-    for c in range(3): # Loop over color (non-alpha) channels
-        canvas_final[:, :, c] = (alpha_text * text_np[:, :, c] + alpha_canvas * (1 - alpha_text) *
-                                 np_canvas[:, :, c]) / alpha_final
-    canvas_final[:, :, 3] = alpha_final * 255
-    canvas_final[:, :, :3][alpha_final == 0] = 0
+                                                        artist_background_color, artist_background_opacity))
 
+    canvas_final = image_utils.blend_alphas(np.array(text_canvas), np.array(canvas))
     temp_canvas_image_path = tempfile.mktemp(suffix=".png")
     # Convert to BGR for OpenCV
     canvas_final = cv2.cvtColor(canvas_final, cv2.COLOR_RGBA2BGRA)
     cv2.imwrite(temp_canvas_image_path, canvas_final)
 
-    temp_final_video_path = tempfile.mktemp(suffix=".mp4")
-
-    # set up the background video commands
-    ffmpeg_commands = [
-        "ffmpeg", "-y",
-        "-loop", "1",
-        "-i", tmp_background_image_path,
-    ]
+    temp_final_video_path = _generate_final_video(tmp_background_image_path, temp_visualizer_images_dir,
+                                                  temp_canvas_image_path, audio_path, fps)
 
-    if generate_audio_visualizer:
-        ffmpeg_commands.extend([
-            "-framerate", str(fps),
-            "-i", f'{temp_visualizer_images_dir}/frame_%05d.png',
-        ])
-        filter_complex = "[0][1]overlay=format=auto[bg];[bg][2]overlay=format=auto"
-        audio_input_map = "3:a"
-    else:
-        filter_complex = "[0][1]overlay=format=auto"
-        audio_input_map = "2:a"
-
-    ffmpeg_commands.extend([
-        "-framerate", str(fps),
-        "-i", temp_canvas_image_path,
-        "-i", audio_path,
-        "-filter_complex", filter_complex,
-        "-map", audio_input_map,
-        "-c:v", "libx264",
-        "-c:a", "aac",
-        "-strict", "experimental",
-        "-t", str(audio_clip.duration),
-        "-hide_banner",
-        "-framerate", str(fps),
-        '-pix_fmt', 'yuv420p',
-        temp_final_video_path
-    ])
-    print("Generating final video...")
-    ffmpeg_process = subprocess.Popen(ffmpeg_commands, stderr=subprocess.PIPE, text=True)
-
-    duration_regex = re.compile(r"Duration: (\d\d):(\d\d):(\d\d)\.\d\d")
-    time_regex = re.compile(r"time=(\d\d):(\d\d):(\d\d)\.\d\d")
-    total_duration_in_seconds = 0
-
-    ffmpeg_start_time = time.time()
-    while True:
-        line = ffmpeg_process.stderr.readline()
-        if not line:
-            break
-
-        # Extract total duration of the video
-        duration_match = duration_regex.search(line)
-        if duration_match:
-            hours, minutes, seconds = map(int, duration_match.groups())
-            total_duration_in_seconds = hours * 3600 + minutes * 60 + seconds
-
-        # Extract current time of encoding
-        time_match = time_regex.search(line)
-        if time_match and total_duration_in_seconds > 0:
-            hours, minutes, seconds = map(int, time_match.groups())
-            current_time = hours * 3600 + minutes * 60 + seconds
-            progress.print_progress_bar(current_time, total_duration_in_seconds, start_time=ffmpeg_start_time)
-
-    ffmpeg_process.wait()
-    if ffmpeg_process.returncode != 0:
-        raise subprocess.CalledProcessError(ffmpeg_process.returncode, ffmpeg_commands)
-    progress.print_progress_bar(100, 100, end='\n', start_time=ffmpeg_start_time)
-    print("Done generating final video!\n")
     # clean up the original frames
-    if generate_audio_visualizer:
+    if temp_visualizer_images_dir is not None:
         for file in os.listdir(temp_visualizer_images_dir):
             os.remove(os.path.join(temp_visualizer_images_dir, file))
         os.rmdir(temp_visualizer_images_dir)
@@ -353,11 +386,13 @@ def generate_cover_image(api_key: str, api_model: str, prompt: str) -> Optional[
     return chatgpt_api.url_to_gradio_image_name(image_url)
 
 
+# pylint: disable=too-many-locals
 def process(image_path: str, artist: str, song: str,
-            af_family: str, af_style: str, afs: int, afc: dataclasses.RGBColor, afo: int, ase: bool,
-            asc: dataclasses.RGBColor, aso: int, asr: Optional[int], abe: bool, abc: dataclasses.RGBColor, abo: int,
-            sf_family: str, sf_style: str, sfs: int, sfc: dataclasses.RGBColor, sfo: int, sse: bool,
-            ssc: dataclasses.RGBColor, sso: int, ssr: Optional[int], sbe: bool, sbc: dataclasses.RGBColor, sbo: int) \
+            af_family: str, af_style: str, afs: int, afc: local_dataclasses.RGBColor, afo: int, ase: bool,
+            asc: local_dataclasses.RGBColor, aso: int, asr: Optional[int], abe: bool, abc: local_dataclasses.RGBColor,
+            abo: int, sf_family: str, sf_style: str, sfs: int, sfc: local_dataclasses.RGBColor, sfo: int, sse: bool,
+            ssc: local_dataclasses.RGBColor, sso: int, ssr: Optional[int], sbe: bool, sbc: local_dataclasses.RGBColor,
+            sbo: int) \
         -> Optional[np.ndarray]:
     """
     Processes the image at the given path (by adding the requested text) and returns the processed image.
diff --git a/utils/dataclasses.py b/utils/dataclasses.py
index 77c3531..0755858 100644
--- a/utils/dataclasses.py
+++ b/utils/dataclasses.py
@@ -59,8 +59,17 @@ class MinMax:
     """
     A dataclass representing a minimum and maximum value.
     """
-    min: int
-    max: int
+    min: Union[int, float]
+    max: Union[int, float]
+
+
+@dataclass
+class MinMaxGradioComponents:
+    """
+    A dataclass representing the components of a minimum and maximum value editor.
+    """
+    min: gr.Number
+    max: gr.Number
 
 
 @dataclass
@@ -72,6 +81,15 @@ class RowCol:
     col: int
 
 
+@dataclass
+class RowColGradioComponents:
+    """
+    A dataclass representing the components of a row and column editor.
+    """
+    row: gr.Number
+    col: gr.Number
+
+
 @dataclass
 class FontGradioComponents:
     """
@@ -105,4 +123,50 @@ class FontBackgroundGradioComponents:
     opacity: gr.Slider
 
 
+@dataclass
+class FontDisplayGradioComponents:
+    """
+    A dataclass representing the components of how to display the font.
+    """
+    font: FontGradioComponents
+    drop_shadow: FontDropShadowGradioComponents
+    background: FontBackgroundGradioComponents
+
+
+@dataclass
+class ColorOpacityGradioComponents:
+    """
+    A dataclass representing the components of the color and opacity editor.
+    """
+    color: gr.ColorPicker
+    opacity: gr.Slider
+
+
+@dataclass
+class VideoOutputGradioComponents:
+    """
+    A dataclass representing the components of the video output.
+    """
+    video: gr.Video
+    name: gr.Textbox
+    suffix: gr.Dropdown
+    save: gr.Button
+
+
+@dataclass
+class Time:
+    """
+    A dataclass representing a time.
+    """
+    hours: int
+    minutes: int
+    seconds: int
+
+    def __int__(self) -> int:
+        """
+        Returns the time in seconds.
+        """
+        return self.hours * 3600 + self.minutes * 60 + self.seconds
+
+
 RGBColor = Union[str, tuple[int, int, int]]
diff --git a/utils/gradio.py b/utils/gradio.py
index 54353ff..fae5a02 100644
--- a/utils/gradio.py
+++ b/utils/gradio.py
@@ -5,18 +5,18 @@
 from utils import font_manager, dataclasses
 
 
-def render_color_opacity_picker(default_name_label: str = "Font") -> tuple[gr.ColorPicker, gr.Slider]:
+def render_color_opacity_picker(default_name_label: str = "Font") -> dataclasses.ColorOpacityGradioComponents:
     """
     Renders a color picker with the appropriate styling.
     :param default_name_label: The default name label to use.
-    :return: A tuple containing the color and opacity components.
+    :return: A class containing the color and opacity components.
     """
     with gr.Group():
         with gr.Row():
             color = gr.ColorPicker(label=f"{default_name_label} Color", scale=1, interactive=True)
             opacity = gr.Slider(0, 100, value=100, label="Opacity", scale=2, interactive=True)
 
-    return color, opacity
+    return dataclasses.ColorOpacityGradioComponents(color, opacity)
 
 
 def bind_checkbox_to_visibility(checkbox: gr.Checkbox, group: gr.Group):
@@ -58,10 +58,11 @@ def update_font_styles(selected_font_family):
 
         font_family.change(update_font_styles, inputs=[font_family], outputs=[font_style])
     with gr.Group():
-        font_color, font_opacity = render_color_opacity_picker()
+        font_color_opacity = render_color_opacity_picker()
         font_size = gr.Number(default_font_size, label="Font Size", interactive=True)
 
-    return dataclasses.FontGradioComponents(font_family, font_style, font_color, font_opacity, font_size)
+    return dataclasses.FontGradioComponents(font_family, font_style, font_color_opacity.color,
+                                            font_color_opacity.opacity, font_size)
 
 
 def render_tool_description(description: str):
diff --git a/utils/image.py b/utils/image.py
index b070a04..42f59e4 100644
--- a/utils/image.py
+++ b/utils/image.py
@@ -2,6 +2,8 @@
 This file contains functions for image processing.
 """
 from typing import Tuple
+import cv2
+import numpy as np
 from utils import dataclasses
 
 
@@ -43,3 +45,45 @@ def get_bgra(color: dataclasses.RGBColor, opacity: int) -> Tuple[int, int, int,
         color = tuple(int(color[i:i + 2], 16) for i in (0, 2, 4))
 
     return color[2], color[1], color[0], get_alpha_from_opacity(opacity)
+
+
+def open_image_as_rgba(image_path: str) -> np.ndarray:
+    """
+    Opens an image as RGBA.
+    :param image_path: The path to the image.
+    :return: The image as RGBA.
+    """
+    img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
+    if img.shape[2] == 3:
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA)
+    else:
+        img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA)
+
+    return img
+
+
+def blend_alphas(a: np.ndarray, b: np.ndarray) -> np.ndarray:
+    """
+    Blends two images together using alpha blending.
+    :param a: The first image.
+    :param b: The second image.
+    :return: The blended image.
+    """
+    if a.shape != b.shape:
+        raise ValueError("both images must have the same shape to blend alphas")
+    if a.shape[2] != 4 or b.shape[2] != 4:
+        raise ValueError("both images must have 4 channels to blend alphas")
+
+    alpha_text = a[:, :, 3] / 255.0
+    alpha_canvas = b[:, :, 3] / 255.0
+    alpha_final = alpha_text + alpha_canvas * (1 - alpha_text)
+
+    final = np.zeros_like(b)
+    # alpha blend
+    for c in range(3):  # Loop over color (non-alpha) channels
+        final[:, :, c] = (alpha_text * a[:, :, c] + alpha_canvas * (1 - alpha_text) *
+                                 b[:, :, c]) / alpha_final
+    final[:, :, 3] = alpha_final * 255
+    final[:, :, :3][alpha_final == 0] = 0
+
+    return final
diff --git a/utils/visualizer.py b/utils/visualizer.py
index 8e53285..2beb839 100644
--- a/utils/visualizer.py
+++ b/utils/visualizer.py
@@ -4,7 +4,7 @@
 from typing import Dict, Optional
 import numpy as np
 import cv2
-from utils import dataclasses
+from utils import dataclasses, image as image_utils
 
 
 class Visualizer:
@@ -12,6 +12,7 @@ class Visualizer:
     This class is used to draw the visualizer on the canvas.
     Will be replaced with a more general solution in the future to allow for more customization.
     """
+
     def __init__(self, dot_size: dataclasses.MinMax, color, dot_count: dataclasses.RowCol, size: dataclasses.Size):
         self.dot_size = dot_size
         self.color = color
@@ -34,14 +35,11 @@ def initialize_static_values(self: "Visualizer") -> None:
         self.cached_dot_positions = [(grid_x[y, x], grid_y[y, x]) for x in range(self.dot_count.col) for y in
                                      range(self.dot_count.row)]
 
-    def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict[float, float],
-                        custom_drawing: Optional[np.ndarray] = None) -> None:
+    def _get_loudness(self, frequency_data: Dict[float, float]) -> (dataclasses.MinMax, Dict[int, int]):
         """
-        Draws the visualizer on the canvas (a single frame).
-        :param canvas: The canvas to draw on.
+        Calculates the loudness values for each column.
         :param frequency_data: The frequency data to use for drawing which correlates to the loudness + frequency.
-        :param custom_drawing: A custom drawing to use instead of the default circle.
-        :return: None.
+        :return: A tuple containing the loudness min/max and the loudness values for each column.
         """
         # Precompute log frequencies
         freq_keys = np.array(list(frequency_data.keys()))
@@ -51,66 +49,111 @@ def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict
 
         # Find the maximum and minimum loudness values, ignoring -80 dB
         freq_bands = np.array([frequency_data[key] for key in freq_keys if key > 0])  # Ignore 0 Hz
-        max_loudness = np.max(freq_bands)
         filtered_loudness = freq_bands[freq_bands > -80]
-        min_loudness = np.min(filtered_loudness) if filtered_loudness.size > 0 else -80
+        loudness_min_max = dataclasses.MinMax(np.min(filtered_loudness) if filtered_loudness.size > 0 else -80,
+                                              np.max(freq_bands))
 
         # Precompute loudness values
         loudness_values = {}
         for x in range(self.dot_count.col):
-            lower_bound = log_freqs[x]
-            upper_bound = log_freqs[x + 1] if x < self.dot_count.col - 1 else end_freq + 1
-            band_freqs = [freq for freq in freq_keys if lower_bound <= freq < upper_bound]
+            bounds = {
+                "lower": log_freqs[x],
+                "upper": log_freqs[x + 1] if x < self.dot_count.col - 1 else end_freq + 1
+            }
+            band_freqs = [freq for freq in freq_keys if bounds.get("lower") <= freq < bounds.get("upper")]
             if not band_freqs:
-                closest_freq = min(freq_keys, key=lambda f, lb=lower_bound: abs(f - lb))
+                closest_freq = min(freq_keys, key=lambda f, lb=bounds.get("lower"): abs(f - lb))
                 band_freqs = [closest_freq]
 
             band_loudness = [frequency_data[freq] for freq in band_freqs]
             avg_loudness = np.mean(band_loudness) if band_loudness else -80
             loudness_values[x] = avg_loudness
 
+        return loudness_min_max, loudness_values
+
+    def _calculate_dot_size(self: "Visualizer", column: int, loudness: dataclasses.MinMax,
+                            loudness_values: Dict[int, int]) -> int:
+        """
+        Calculates the dot size for a given column.
+        :param loudness: The loudness min/max.
+        :param loudness_values: The loudness values for each column.
+        :return: The dot size.
+        """
+        # Scale the loudness to the dot size
+        scaled_loudness = (loudness_values[column] - loudness.min) / (
+                loudness.max - loudness.min) if loudness.max != loudness.min else 0
+        dot_size = self.dot_size.min + scaled_loudness * (self.dot_size.max - self.dot_size.min)
+        return min(max(dot_size, self.dot_size.min), self.dot_size.max)
+
+    def _draw_custom_drawing(self: "Visualizer", canvas: np.ndarray, start_pos: dataclasses.Position,
+                             end_pos: dataclasses.Position, img_start_pos: dataclasses.Position,
+                             img_end_pos: dataclasses.Position, dot_size: int,
+                             custom_drawing_overlap: bool) -> np.ndarray:
+        """
+        Draws the custom drawing on the canvas.
+        :param canvas: The canvas to draw on.
+        :param start_pos: The start position on the canvas.
+        :param end_pos: The end position on the canvas.
+        :param img_start_pos: The start position on the resized image.
+        :param img_end_pos: The end position on the resized image.
+        :param dot_size: The dot size.
+        :param custom_drawing_overlap: Whether overlapped custom drawings should alpha blend.
+        :return: The canvas with the custom drawing drawn on it.
+        """
+        drawing_slice = self.cached_resized_drawing[dot_size][img_start_pos.y:img_end_pos.y,
+                        img_start_pos.x:img_end_pos.x]
+
+        if custom_drawing_overlap:
+            canvas_slice = canvas[start_pos.y:end_pos.y, start_pos.x:end_pos.x]
+            return image_utils.blend_alphas(canvas_slice, drawing_slice)
+
+        return drawing_slice
+
+    def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict[float, float],
+                        custom_drawing: Optional[np.ndarray] = None, custom_drawing_overlap: bool = False) -> None:
+        """
+        Draws the visualizer on the canvas (a single frame).
+        :param custom_drawing_overlap: Whether to overlap the custom drawing should alpha blend when overlapping.
+        :param canvas: The canvas to draw on.
+        :param frequency_data: The frequency data to use for drawing which correlates to the loudness + frequency.
+        :param custom_drawing: A custom drawing to use instead of the default circle.
+        :return: None.
+        """
+        loudness, loudness_values = self._get_loudness(frequency_data)
+
         cached_dot_sizes = {}
         for i, (pos_x, pos_y) in enumerate(self.cached_dot_positions):
             column = i // self.dot_count.row  # Ensure the correct column is computed
 
             if column not in cached_dot_sizes:
-                avg_loudness = loudness_values[column]
-                # Scale the loudness to the dot size
-                scaled_loudness = (avg_loudness - min_loudness) / (
-                        max_loudness - min_loudness) if max_loudness != min_loudness else 0
-                dot_size = self.dot_size.min + scaled_loudness * (self.dot_size.max - self.dot_size.min)
-                dot_size = min(max(dot_size, self.dot_size.min), self.dot_size.max)
-
-                cached_dot_sizes[column] = dot_size
-            else:
-                dot_size = cached_dot_sizes[column]
+                cached_dot_sizes[column] = self._calculate_dot_size(column, loudness, loudness_values)
 
-            # Convert dot size to integer and calculate the center position
-            dot_size = int(dot_size)
-            center = (int(pos_x), int(pos_y))
+            dot_size = int(cached_dot_sizes[column])
+            center_pos = dataclasses.Position(int(pos_x), int(pos_y))
             if custom_drawing is not None:
                 if dot_size not in self.cached_resized_drawing:
-                    self.cached_resized_drawing[dot_size] = cv2.resize(custom_drawing, (dot_size, dot_size),
-                                                                       interpolation=cv2.INTER_LANCZOS4)
-                resized_custom_drawing = self.cached_resized_drawing[dot_size]
+                    if dot_size == 0:
+                        self.cached_resized_drawing[dot_size] = np.zeros((1, 1, 4), dtype=np.uint8)
+                    else:
+                        self.cached_resized_drawing[dot_size] = cv2.resize(custom_drawing, (dot_size, dot_size),
+                                                                           interpolation=cv2.INTER_LANCZOS4)
 
-                center_x, center_y = int(pos_x), int(pos_y)
                 half_dot_size = dot_size // 2
-
                 # Calculate bounds on the canvas
-                start_x = max(center_x - half_dot_size, 0)
-                end_x = min(center_x + half_dot_size, canvas.shape[1])
-                start_y = max(center_y - half_dot_size, 0)
-                end_y = min(center_y + half_dot_size, canvas.shape[0])
+                start_pos = dataclasses.Position(max(center_pos.x - half_dot_size, 0),
+                                                 max(center_pos.y - half_dot_size, 0))
+                end_pos = dataclasses.Position(min(center_pos.x + half_dot_size, canvas.shape[1]), min(
+                    center_pos.y + half_dot_size, canvas.shape[0]))
 
                 # Calculate corresponding bounds on the resized image
-                img_start_x = max(half_dot_size - (center_x - start_x), 0)
-                img_end_x = img_start_x + (end_x - start_x)
-                img_start_y = max(half_dot_size - (center_y - start_y), 0)
-                img_end_y = img_start_y + (end_y - start_y)
-
-                # Place the image slice onto the canvas
-                canvas[start_y:end_y, start_x:end_x] = resized_custom_drawing[img_start_y:img_end_y,
-                                                                              img_start_x:img_end_x]
+                img_start_pos = dataclasses.Position(max(half_dot_size - (center_pos.x - start_pos.x), 0),
+                                                     max(half_dot_size - (center_pos.y - start_pos.y), 0))
+                img_end_pos = dataclasses.Position(img_start_pos.x + (end_pos.x - start_pos.x),
+                                                   img_start_pos.y + (end_pos.y - start_pos.y))
+
+                canvas[start_pos.y:end_pos.y, start_pos.x:end_pos.x] = self._draw_custom_drawing(canvas, start_pos,
+                                                                                                 end_pos, img_start_pos,
+                                                                                                 img_end_pos, dot_size,
+                                                                                                 custom_drawing_overlap)
             else:
-                cv2.circle(canvas, center, dot_size // 2, self.color, -1)
+                cv2.circle(canvas, (center_pos.x, center_pos.y), dot_size // 2, self.color, -1)