From 04a622b36de81be339d76bd7aa060f2a2649b722 Mon Sep 17 00:00:00 2001 From: Fabian Gonzalez Date: Tue, 16 Jan 2024 19:42:27 -0500 Subject: [PATCH] 100% Linter --- .github/workflows/pylint.yml | 4 +- .pylintrc | 1 + processing/image.py | 42 +++-- processing/video.py | 6 +- requirements.txt | 4 +- ui/listicles/interface.py | 68 +++++--- ui/listicles/utils.py | 1 + ui/music/interface.py | 116 ++++++++----- ui/music/utils.py | 317 +++++++++++++++++++---------------- utils/dataclasses.py | 68 +++++++- utils/gradio.py | 11 +- utils/image.py | 44 +++++ utils/visualizer.py | 131 ++++++++++----- 13 files changed, 530 insertions(+), 283 deletions(-) diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index aa949d3..e6dde2a 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -1,11 +1,11 @@ name: Python linter on: [push] jobs: - build: + lint: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.10", "3.11"] + python-version: ["3.11"] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} diff --git a/.pylintrc b/.pylintrc index 2083c87..b390428 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,6 +1,7 @@ [MAIN] max-line-length=120 max-attributes=10 +max-locals=20 # Currently, this is added because gradio Inputs don't support passing tuples/dataclasses/etc. as arguments, meaning I # can't shorten some methods that take a lot of arguments. disable=too-many-arguments diff --git a/processing/image.py b/processing/image.py index eb97bc1..2c35dfb 100644 --- a/processing/image.py +++ b/processing/image.py @@ -35,9 +35,7 @@ def render_image_output() -> (gr.Image, gr.Textbox, gr.Dropdown, gr.Button): return image_output, image_name, image_suffix, save_image_button -def render_text_editor_parameters(name: str) -> (dataclasses.FontGradioComponents, - dataclasses.FontDropShadowGradioComponents, - dataclasses.FontBackgroundGradioComponents): +def render_text_editor_parameters(name: str) -> dataclasses.FontDisplayGradioComponents: """ Renders the text editor parameters. :param name: The name of the text editor parameters. This is used as the label for the accordion. @@ -49,18 +47,21 @@ def render_text_editor_parameters(name: str) -> (dataclasses.FontGradioComponent with gr.Group(): drop_shadow_enabled = gr.Checkbox(False, label="Enable Drop Shadow", interactive=True) with gr.Group(visible=drop_shadow_enabled.value) as additional_options: - drop_shadow_color, drop_shadow_opacity = gru.render_color_opacity_picker() + drop_shadow_color_opacity = gru.render_color_opacity_picker() drop_shadow_radius = gr.Number(0, label="Shadow Radius") gru.bind_checkbox_to_visibility(drop_shadow_enabled, additional_options) with gr.Group(): background_enabled = gr.Checkbox(False, label="Enable Background", interactive=True) with gr.Group(visible=background_enabled.value) as additional_options: - background_color, background_opacity = gru.render_color_opacity_picker() + background_color_opacity = gru.render_color_opacity_picker() gru.bind_checkbox_to_visibility(background_enabled, additional_options) - return (font_data, dataclasses.FontDropShadowGradioComponents(drop_shadow_enabled, drop_shadow_color, - drop_shadow_opacity, drop_shadow_radius), - dataclasses.FontBackgroundGradioComponents(background_enabled, background_color, background_opacity)) + drop_shadow_data = dataclasses.FontDropShadowGradioComponents(drop_shadow_enabled, drop_shadow_color_opacity.color, + drop_shadow_color_opacity.opacity, drop_shadow_radius) + background_data = dataclasses.FontBackgroundGradioComponents(background_enabled, background_color_opacity.color, + background_color_opacity.opacity) + + return dataclasses.FontDisplayGradioComponents(font_data, drop_shadow_data, background_data) def add_background(image_pil: Image, draw: ImageDraw, position: tuple[int, int], text: str, font: ImageFont, @@ -216,7 +217,23 @@ def save_image_to_disk(image_path: str, name: Optional[str] = None, save_dir: st return save_dir -# Function to add text to an image with custom font, size, and wrapping +def _get_lines(text: str, max_width: Optional[int] = None) -> list[str]: + """ + Gets the lines of text from a string. + :param text: The text to get the lines from. + :param max_width: The maximum width of the text before wrapping. + :return: A list of lines. + """ + if max_width: # Prepare for text wrapping if max_width is provided + wrapped_text = textwrap.fill(text, width=max_width) + else: + wrapped_text = text + + return wrapped_text.split('\n') + + +# A lot of the reported variables come from the parameters +# pylint: disable=too-many-locals def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[int, int], font_path: str, font_size: int, font_color: Tuple[int, int, int, int] = (255, 255, 255, 255), shadow_color: Tuple[int, int, int, int] = (255, 255, 255, 255), @@ -256,12 +273,7 @@ def add_text(image: Union[Image.Image, np.ndarray], text: str, position: Tuple[i font = ImageFont.truetype(font_path, font_size) draw = ImageDraw.Draw(txt_layer) - if max_width: # Prepare for text wrapping if max_width is provided - wrapped_text = textwrap.fill(text, width=max_width) - else: - wrapped_text = text - - lines = wrapped_text.split('\n') + lines = _get_lines(text, max_width) y_offset = 0 # max_line_width = 0 # Keep track of the widest line diff --git a/processing/video.py b/processing/video.py index 3d19e59..cbb4036 100644 --- a/processing/video.py +++ b/processing/video.py @@ -8,13 +8,13 @@ from typing import Optional, Literal import gradio as gr from moviepy.editor import VideoFileClip -from utils import path_handler +from utils import path_handler, dataclasses VIDEO_FOLDER = "videos" default_path = os.path.join(path_handler.get_default_path(), VIDEO_FOLDER) -def render_video_output() -> (gr.Video, gr.Textbox, gr.Dropdown, gr.Button): +def render_video_output() -> dataclasses.VideoOutputGradioComponents: """ Creates and returns a set of Gradio interface components for video output. @@ -32,7 +32,7 @@ def render_video_output() -> (gr.Video, gr.Textbox, gr.Dropdown, gr.Button): video_suffix = gr.Dropdown([".mp4", ".mov"], value=".mp4", label="File Type", allow_custom_value=False) save_video_button = gr.Button("Save To Disk", variant="primary") - return video_output, video_name, video_suffix, save_video_button + return dataclasses.VideoOutputGradioComponents(video_output, video_name, video_suffix, save_video_button) def save_video_to_disk(video_path: str, name: Optional[str] = None, video_suffix: Literal[".mp4", ".mov"] = ".mp4", diff --git a/requirements.txt b/requirements.txt index 7922ce1..7614d6e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ gradio~=4.12.0 inflect~=7.0.0 openai~=1.6.1 -numpy~=1.23.5 -Pillow~=8.4.0 +numpy~=1.26.3 +Pillow~=10.2.0 opencv-python-headless~=4.8.1.78 fonttools~=4.47.0 moviepy~=1.0.3 diff --git a/ui/listicles/interface.py b/ui/listicles/interface.py index bb15613..87d386a 100644 --- a/ui/listicles/interface.py +++ b/ui/listicles/interface.py @@ -81,11 +81,11 @@ def set_json(json_file): with gr.Column(scale=3): gr.Markdown("# Parameters") with gr.Row(equal_height=False): - name_font, name_shadow, name_background = image_processing.render_text_editor_parameters("Name") - desc_font, desc_shadow, desc_background = image_processing.render_text_editor_parameters("Description") + name_font_display = image_processing.render_text_editor_parameters("Name") + desc_font_display = image_processing.render_text_editor_parameters("Description") with gr.Row(equal_height=False): - asc_font, asc_shadow, asc_background = image_processing.render_text_editor_parameters("Association") - rate_font, rate_shadow, rate_background = image_processing.render_text_editor_parameters("Rating") + asc_font_display = image_processing.render_text_editor_parameters("Association") + rate_font_display = image_processing.render_text_editor_parameters("Rating") with gr.Column(scale=1): gr.Markdown("# Output") @@ -99,26 +99,46 @@ def set_json(json_file): save_button.click(image_processing.save_images_to_disk, inputs=[output_preview, image_type], outputs=[]) process_button.click(listicle_utils.process, inputs=[input_batch_images, input_batch_json, - name_font.family, name_font.style, name_font.size, - name_font.color, name_font.opacity, name_shadow.enabled, - name_shadow.color, name_shadow.opacity, name_shadow.radius, - name_background.enabled, name_background.color, - name_background.opacity, - desc_font.family, desc_font.style, desc_font.size, - desc_font.color, desc_font.opacity, desc_shadow.enabled, - desc_shadow.color, desc_shadow.opacity, desc_shadow.radius, - desc_background.enabled, desc_background.color, - desc_background.opacity, - asc_font.family, asc_font.style, asc_font.size, - asc_font.color, asc_font.opacity, asc_shadow.enabled, - asc_shadow.color, asc_shadow.opacity, asc_shadow.radius, - asc_background.enabled, asc_background.color, - asc_background.opacity, - rate_font.family, rate_font.style, rate_font.size, - rate_font.color, rate_font.opacity, rate_shadow.enabled, - rate_shadow.color, rate_shadow.opacity, rate_shadow.radius, - rate_background.enabled, rate_background.color, - rate_background.opacity, + name_font_display.font.family, name_font_display.font.style, + name_font_display.font.size, name_font_display.font.color, + name_font_display.font.opacity, + name_font_display.drop_shadow.enabled, + name_font_display.drop_shadow.color, + name_font_display.drop_shadow.opacity, + name_font_display.drop_shadow.radius, + name_font_display.background.enabled, + name_font_display.background.color, + name_font_display.background.opacity, + desc_font_display.font.family, desc_font_display.font.style, + desc_font_display.font.size, desc_font_display.font.color, + desc_font_display.font.opacity, + desc_font_display.drop_shadow.enabled, + desc_font_display.drop_shadow.color, + desc_font_display.drop_shadow.opacity, + desc_font_display.drop_shadow.radius, + desc_font_display.background.enabled, + desc_font_display.background.color, + desc_font_display.background.opacity, + asc_font_display.font.family, asc_font_display.font.style, + asc_font_display.font.size, asc_font_display.font.color, + asc_font_display.font.opacity, + asc_font_display.drop_shadow.enabled, + asc_font_display.drop_shadow.color, + asc_font_display.drop_shadow.opacity, + asc_font_display.drop_shadow.radius, + asc_font_display.background.enabled, + asc_font_display.background.color, + asc_font_display.background.opacity, + rate_font_display.font.family, rate_font_display.font.style, + rate_font_display.font.size, rate_font_display.font.color, + rate_font_display.font.opacity, + rate_font_display.drop_shadow.enabled, + rate_font_display.drop_shadow.color, + rate_font_display.drop_shadow.opacity, + rate_font_display.drop_shadow.radius, + rate_font_display.background.enabled, + rate_font_display.background.color, + rate_font_display.background.opacity, ], outputs=[output_preview]) return input_batch_images, input_batch_json diff --git a/ui/listicles/utils.py b/ui/listicles/utils.py index 747c6c4..01a7898 100644 --- a/ui/listicles/utils.py +++ b/ui/listicles/utils.py @@ -11,6 +11,7 @@ import api.chatgpt as chatgpt_api +# pylint: disable=too-many-locals def process(image_files: list[Any], json_data: str, nf_family: str, nf_style: str, nfs: int, nfc: dataclasses.RGBColor, nfo: int, nse: bool, nsc: dataclasses.RGBColor, nso: int, nsr, nbe: bool, nbc: dataclasses.RGBColor, nbo: int, diff --git a/ui/music/interface.py b/ui/music/interface.py index 8808022..c6e199b 100644 --- a/ui/music/interface.py +++ b/ui/music/interface.py @@ -7,6 +7,7 @@ import processing.video as video_processing import processing.image as image_processing import ui.components.openai as openai_components +from utils import dataclasses def render_music_section() -> None: @@ -71,7 +72,7 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image): display component for displaying the cover image before processing, and an image display component for displaying the cover image after processing. """ - with (gr.Column()): + with gr.Column(): gr.Markdown("## Input") with gr.Group(): input_image = gr.Image(sources=["upload"], label="Cover Image (png)", type="filepath", @@ -81,13 +82,11 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image): with gr.Row(equal_height=False): with gr.Group(): artist_name = gr.Textbox(label="Artist Name", lines=1, max_lines=1, scale=1) - artist_font, artist_shadow, artist_background = image_processing.render_text_editor_parameters( - "Artist Text Parameters") + artist_font_display = image_processing.render_text_editor_parameters("Artist Text Parameters") with gr.Group(): song_name = gr.Textbox(label="Song Title", lines=1, max_lines=1, scale=2) - song_font, song_shadow, song_background = \ - image_processing.render_text_editor_parameters("Song Text Parameters") + song_font_display = image_processing.render_text_editor_parameters("Song Text Parameters") process_button = gr.Button("Process", variant="primary") @@ -98,14 +97,20 @@ def render_process_cover() -> (gr.Button, gr.Image, gr.Image): send_to_create_video_button = gr.Button("Send Image to 'Create Music Video'", variant="secondary") process_button.click(process, inputs=[input_image, artist_name, song_name, - artist_font.family, artist_font.style, artist_font.size, artist_font.color, - artist_font.opacity, artist_shadow.enabled, artist_shadow.color, - artist_shadow.opacity, artist_shadow.radius, artist_background.enabled, - artist_background.color, artist_background.opacity, song_font.family, - song_font.style, song_font.size, song_font.color, song_font.opacity, - song_shadow.enabled, song_shadow.color, song_shadow.opacity, - song_shadow.radius, song_background.enabled, song_background.color, - song_background.opacity], + artist_font_display.font.family, artist_font_display.font.style, + artist_font_display.font.size, artist_font_display.font.color, + artist_font_display.font.opacity, artist_font_display.drop_shadow.enabled, + artist_font_display.drop_shadow.color, + artist_font_display.drop_shadow.opacity, + artist_font_display.drop_shadow.radius, + artist_font_display.background.enabled, + artist_font_display.background.color, artist_font_display.background.opacity, + song_font_display.font.family, song_font_display.font.style, + song_font_display.font.size, song_font_display.font.color, + song_font_display.font.opacity, song_font_display.drop_shadow.enabled, + song_font_display.drop_shadow.color, song_font_display.drop_shadow.opacity, + song_font_display.drop_shadow.radius, song_font_display.background.enabled, + song_font_display.background.color, song_font_display.background.opacity], outputs=[image_output]) save_image_button.click(image_processing.save_image_to_disk, inputs=[image_output, image_name, image_suffix], outputs=[]) @@ -124,14 +129,13 @@ def render_music_video_creation() -> gr.Image: show_share_button=False, show_download_button=False, scale=2, image_mode="RGBA") audio_filepath = gr.File(label="Audio", file_types=["audio"], scale=1, height=100) with gr.Column(): - background_color, background_opacity = gru.render_color_opacity_picker(default_name_label="Background") + background_color_opacity = gru.render_color_opacity_picker(default_name_label="Background") with gr.Group(): artist_name = gr.Textbox(label="Artist Name", lines=1, max_lines=1, scale=1) - artist_font, artist_shadow, artist_background = \ - image_processing.render_text_editor_parameters("Text Parameters") + artist_font_display = image_processing.render_text_editor_parameters("Text Parameters") with gr.Group(): song_title = gr.Textbox(label="Song Title", lines=1, max_lines=1, scale=2) - song_font, song_shadow, song_background = image_processing.render_text_editor_parameters("Text Parameters") + song_font_display = image_processing.render_text_editor_parameters("Text Parameters") with gr.Column(): # Defaulting to 1. It's a still image, but may expand by adding some effects (grain, and not sure what else) fps = gr.Number(value=1, label="FPS", minimum=1, maximum=144) @@ -140,48 +144,70 @@ def render_music_video_creation() -> gr.Image: generate_audio_visualizer_button = gr.Checkbox(value=False, label="Generate Audio Visualizer", interactive=True) with gr.Group() as audio_visualizer_group: - audio_visualizer_color, audio_visualizer_opacity = \ - gru.render_color_opacity_picker("Audio Visualizer") + audio_visualizer_color_opacity = gru.render_color_opacity_picker("Audio Visualizer") with gr.Group(): with gr.Row(): - audio_visualizer_num_rows = gr.Number(value=90, label="Number of Rows", - minimum=1, maximum=100) - audio_visualizer_num_columns = gr.Number(value=65, label="Number of Columns", - minimum=1, maximum=100) + audio_visualizer_amount = dataclasses.RowColGradioComponents( + row=gr.Number(value=90, label="Number of Rows", minimum=1, + maximum=100), + col=gr.Number(value=65, label="Number of Columns", minimum=1, + maximum=100) + ) with gr.Row(): - audio_visualizer_min_size = gr.Number(value=1, label="Minimum Size", minimum=1, maximum=100) - audio_visualizer_max_size = gr.Number(value=7, label="Maximum Size", minimum=1, maximum=200) - # Must be a PNG file to support transparency. The idea for this is more-so to have shapes that can - # be rendered for the visualizer, and ideally they have transparent backgrounds, so using RGBA. + audio_visualizer_dot_size = dataclasses.MinMaxGradioComponents( + min=gr.Number(value=1, label="Minimum Size", minimum=1, maximum=100), + max=gr.Number(value=7, label="Maximum Size", minimum=1, maximum=200) + ) audio_visualizer_drawing = gr.Image(label="Visualizer Drawing (png)", type="filepath", sources=["upload"], show_share_button=False, show_download_button=False, scale=2, height=150, image_mode="RGBA") + visualizer_overlay_checkbox = gr.Checkbox(value=False, label="Overlay Visualizer on One-Another", + info="If checked, alpha-blending will be applied, which " + "is noticeable on larger pngs where each drawing " + "overlaps. This is only important for transparent" + "images and is very slow. If the image is not " + "transparent, leave this unchecked.") gru.bind_checkbox_to_visibility(generate_audio_visualizer_button, audio_visualizer_group) create_video_button = gr.Button("Create Music Video", variant="primary") gr.Markdown("## Output") with gr.Group(): - video_output, video_name, video_suffix, save_video_button = video_processing.render_video_output() + video_data = video_processing.render_video_output() create_video_button.click(create_music_video, inputs=[cover_image, audio_filepath, fps, artist_name, - artist_font.family, artist_font.style, artist_font.size, - artist_font.color, artist_font.opacity, artist_shadow.enabled, - artist_shadow.color, artist_shadow.opacity, - artist_shadow.radius, artist_background.enabled, - artist_background.color, artist_background.opacity, - song_title, song_font.family, song_font.style, song_font.size, - song_font.color, song_font.opacity, song_shadow.enabled, - song_shadow.color, song_shadow.opacity, song_shadow.radius, - song_background.enabled, song_background.color, - song_background.opacity, background_color, background_opacity, - generate_audio_visualizer_button, audio_visualizer_color, - audio_visualizer_opacity, audio_visualizer_drawing, - audio_visualizer_num_rows, audio_visualizer_num_columns, - audio_visualizer_min_size, audio_visualizer_max_size], - outputs=[video_output]) - save_video_button.click(video_processing.save_video_to_disk, - inputs=[video_output, video_name, video_suffix], outputs=[]) + artist_font_display.font.family, + artist_font_display.font.style, artist_font_display.font.size, + artist_font_display.font.color, + artist_font_display.font.opacity, + artist_font_display.drop_shadow.enabled, + artist_font_display.drop_shadow.color, + artist_font_display.drop_shadow.opacity, + artist_font_display.drop_shadow.radius, + artist_font_display.background.enabled, + artist_font_display.background.color, + artist_font_display.background.opacity, + song_title, song_font_display.font.family, + song_font_display.font.style, song_font_display.font.size, + song_font_display.font.color, song_font_display.font.opacity, + song_font_display.drop_shadow.enabled, + song_font_display.drop_shadow.color, + song_font_display.drop_shadow.opacity, + song_font_display.drop_shadow.radius, + song_font_display.background.enabled, + song_font_display.background.color, + song_font_display.background.opacity, + background_color_opacity.color, + background_color_opacity.opacity, + generate_audio_visualizer_button, + audio_visualizer_color_opacity.color, + audio_visualizer_color_opacity.opacity, + audio_visualizer_drawing, visualizer_overlay_checkbox, + audio_visualizer_amount.row, audio_visualizer_amount.col, + audio_visualizer_dot_size.min, audio_visualizer_dot_size.max], + outputs=[video_data.video]) + video_data.save.click(video_processing.save_video_to_disk, inputs=[video_data.video, video_data.name, + video_data.suffix], outputs=[]) return cover_image diff --git a/ui/music/utils.py b/ui/music/utils.py index 10726b9..1ffdb90 100644 --- a/ui/music/utils.py +++ b/ui/music/utils.py @@ -7,13 +7,14 @@ import time import tempfile from typing import List, Dict, Optional +from dataclasses import dataclass import cv2 from moviepy.editor import AudioFileClip import numpy as np import librosa from api import chatgpt as chatgpt_api from processing import image as image_processing -from utils import progress, visualizer, font_manager, image as image_utils, dataclasses +from utils import progress, visualizer, font_manager, image as image_utils, dataclasses as local_dataclasses def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]], np.ndarray): @@ -45,16 +46,25 @@ def analyze_audio(audio_path: str, target_fps: int) -> (List[Dict[float, float]] return downsampled_frequency_loudness, downsampled_times -def _audio_visualizer_generator(frame_size: dataclasses.Size, audio_path: str, audio_length: int, fps: int, - audio_visualizer: dataclasses.RGBOpacity, dot_size: dataclasses.MinMax, - dot_count: dataclasses.RowCol, visualizer_drawing: Optional[str] = None) -> str: - print("Generating audio visualizer...") +@dataclass +class AudioVisualizerDotData: + """ + A dataclass representing the data for the audio visualizer's dots. + """ + size: local_dataclasses.MinMax + count: local_dataclasses.RowCol + color: local_dataclasses.RGBColor + opacity: int + visualizer_drawing: Optional[str] = None + visualizer_drawing_overlap: bool = False - audio_visualizer_color_and_opacity = image_utils.get_rgba(audio_visualizer.color, audio_visualizer.opacity) +def _audio_visualizer_generator(frame_size: local_dataclasses.Size, audio_path: str, audio_length: int, fps: int, + dot_data: AudioVisualizerDotData) -> str: + print("Generating audio visualizer...") custom_drawing = None - if visualizer_drawing is not None and visualizer_drawing != "": - custom_drawing = cv2.imread(visualizer_drawing, cv2.IMREAD_UNCHANGED) + if dot_data.visualizer_drawing is not None and dot_data.visualizer_drawing != "": + custom_drawing = cv2.imread(dot_data.visualizer_drawing, cv2.IMREAD_UNCHANGED) if custom_drawing.shape[2] == 3: custom_drawing = cv2.cvtColor(custom_drawing, cv2.COLOR_BGR2RGBA) else: @@ -65,9 +75,9 @@ def _audio_visualizer_generator(frame_size: dataclasses.Size, audio_path: str, a total_iterations = len(times) start_time = time.time() - vis = visualizer.Visualizer(size=dataclasses.Size(frame_size.width, frame_size.height), - dot_size=dot_size, color=audio_visualizer_color_and_opacity, - dot_count=dataclasses.RowCol(dot_count.row, dot_count.col)) + vis = visualizer.Visualizer(size=local_dataclasses.Size(frame_size.width, frame_size.height), + dot_size=dot_data.size, color=image_utils.get_rgba(dot_data.color, dot_data.opacity), + dot_count=local_dataclasses.RowCol(dot_data.count.row, dot_data.count.col)) vis.initialize_static_values() temp_visualizer_images_dir = tempfile.mkdtemp() os.makedirs(temp_visualizer_images_dir, exist_ok=True) @@ -75,7 +85,8 @@ def _audio_visualizer_generator(frame_size: dataclasses.Size, audio_path: str, a if time_point > audio_length: break frame = frame_cache.copy() - vis.draw_visualizer(frame, frequency_loudness[i], custom_drawing=custom_drawing) + vis.draw_visualizer(frame, frequency_loudness[i], custom_drawing=custom_drawing, + custom_drawing_overlap=dot_data.visualizer_drawing_overlap) frame_np = np.array(frame) frame_np = cv2.cvtColor(frame_np, cv2.COLOR_RGBA2BGRA) frame_filename = f'{temp_visualizer_images_dir}/frame_{i:05d}.png' @@ -87,23 +98,133 @@ def _audio_visualizer_generator(frame_size: dataclasses.Size, audio_path: str, a return temp_visualizer_images_dir -def create_music_video( +def _get_video_background(image_path: str, frame_size: local_dataclasses.Size, + background_overlay_color_opacity: local_dataclasses.RGBOpacity) -> np.ndarray: + """ + Gets the background for the video, which is a gaussian blurred version of the cover image stretched with a color + overlay. + :param image_path: The path to the image to use background. + :param frame_size: The size of the frame to use for the background. + :param background_overlay_color_opacity: The color and opacity to use for the background overlay. + :return: + """ + background = cv2.imread(image_path) + background = cv2.resize(background, (frame_size.width, frame_size.height)) + background = cv2.GaussianBlur(background, (49, 49), 0) + if background.shape[2] == 3: + background = cv2.cvtColor(background, cv2.COLOR_BGR2BGRA) + overlay = np.full((frame_size.height, frame_size.width, 4), + image_utils.get_bgra(background_overlay_color_opacity.color, + background_overlay_color_opacity.opacity), + dtype=np.uint8) + alpha_overlay = overlay[:, :, 3] / 255.0 + alpha_background = background[:, :, 3] / 255.0 + for c in range(0, 3): + background[:, :, c] = (alpha_overlay * overlay[:, :, c] + + alpha_background * (1 - alpha_overlay) * background[:, :, c]) + background[:, :, 3] = (alpha_overlay + alpha_background * (1 - alpha_overlay)) * 255 + return background + + +def _generate_final_video(background_image_path: str, visualizer_frames_dir: Optional[str], cover_image_path: str, + audio_path: str, fps: int) -> str: + """ + Generates the final video using the given parameters with ffmpeg. + :param background_image_path: The path to the background image to use for the video. + :param visualizer_frames_dir: The path to the directory containing the audio visualizer frames. + :param cover_image_path: The path to the cover image to use for the video. + :param audio_path: The path to the audio file to use for the video. + :param fps: The frames per second to use for the video. + :return: + """ + temp_final_video_path = tempfile.mktemp(suffix=".mp4") + + audio_clip = AudioFileClip(audio_path) + ffmpeg_commands = [ + "ffmpeg", "-y", + "-loop", "1", + "-i", background_image_path, + ] + + if visualizer_frames_dir is not None: + ffmpeg_commands.extend([ + "-framerate", str(fps), + "-i", f'{visualizer_frames_dir}/frame_%05d.png', + ]) + filter_complex = "[0][1]overlay=format=auto[bg];[bg][2]overlay=format=auto" + else: + filter_complex = "[0][1]overlay=format=auto" + + ffmpeg_commands.extend([ + "-framerate", str(fps), + "-i", cover_image_path, + "-i", audio_path, + "-filter_complex", filter_complex, + "-map", "3:a" if visualizer_frames_dir is not None else "2:a", + "-c:v", "libx264", + "-c:a", "aac", + "-strict", "experimental", + "-t", str(audio_clip.duration), + "-hide_banner", + "-framerate", str(fps), + '-pix_fmt', 'yuv420p', + temp_final_video_path + ]) + print("Generating final video...") + + duration_regex = re.compile(r"Duration: (\d\d):(\d\d):(\d\d)\.\d\d") + time_regex = re.compile(r"time=(\d\d):(\d\d):(\d\d)\.\d\d") + + ffmpeg_start_time = time.time() + with subprocess.Popen(ffmpeg_commands, stderr=subprocess.PIPE, text=True) as ffmpeg_process: + for line in ffmpeg_process.stderr: + # Extract total duration of the video + duration_match = duration_regex.search(line) + if duration_match: + duration_match_groups = duration_match.groups() + curr_duration = local_dataclasses.Time( + hours=int(duration_match_groups[0]), + minutes=int(duration_match_groups[1]), + seconds=int(duration_match_groups[2]) + ) + + # Extract current time of encoding + time_match = time_regex.search(line) + if time_match and int(curr_duration) > 0: + time_match_groups = time_match.groups() + curr_time = local_dataclasses.Time( + hours=int(time_match_groups[0]), + minutes=int(time_match_groups[1]), + seconds=int(time_match_groups[2]) + ) + progress.print_progress_bar(int(curr_time), int(curr_duration), start_time=ffmpeg_start_time) + + progress.print_progress_bar(100, 100, end='\n', start_time=ffmpeg_start_time) + + return temp_final_video_path + + +def create_music_video( # pylint: disable=too-many-locals image_path: str, audio_path: str, fps: int, artist: str, artist_font_type: str, artist_font_style: str, artist_font_size: int, - artist_font_color: dataclasses.RGBColor, artist_font_opacity: int, artist_shadow_enabled: bool, - artist_shadow_color: dataclasses.RGBColor, artist_shadow_opacity: int, artist_shadow_radius: int, - artist_background_enabled: bool, artist_background_color: dataclasses.RGBColor, artist_background_opacity: int, - song: str, song_font_type: str, song_font_style: str, song_font_size: int, - song_font_color: dataclasses.RGBColor, song_font_opacity: int, song_shadow_enabled: bool, - song_shadow_color: dataclasses.RGBColor, song_shadow_opacity: int, song_shadow_radius: int, - song_background_enabled: bool, song_background_color: dataclasses.RGBColor, song_background_opacity: int, - background_color: dataclasses.RGBColor = (0, 0, 0), background_opacity: int = 66, - generate_audio_visualizer: bool = False, audio_visualizer_color: dataclasses.RGBColor = (255, 255, 255), + artist_font_color: local_dataclasses.RGBColor, artist_font_opacity: int, artist_shadow_enabled: bool, + artist_shadow_color: local_dataclasses.RGBColor, artist_shadow_opacity: int, artist_shadow_radius: int, + artist_background_enabled: bool, artist_background_color: local_dataclasses.RGBColor, + artist_background_opacity: int, song: str, song_font_type: str, song_font_style: str, song_font_size: int, + song_font_color: local_dataclasses.RGBColor, song_font_opacity: int, song_shadow_enabled: bool, + song_shadow_color: local_dataclasses.RGBColor, song_shadow_opacity: int, song_shadow_radius: int, + song_background_enabled: bool, song_background_color: local_dataclasses.RGBColor, song_background_opacity: int, + background_color: local_dataclasses.RGBColor = (0, 0, 0), background_opacity: int = 66, + generate_audio_visualizer: bool = False, audio_visualizer_color: local_dataclasses.RGBColor = (255, 255, 255), audio_visualizer_opacity: int = 100, visualizer_drawing: Optional[str] = None, - audio_visualizer_num_rows: int = 90, audio_visualizer_num_columns: int = 65, audio_visualizer_min_size: int = 1, + visualizer_drawing_overlap: bool = False, audio_visualizer_num_rows: int = 90, + audio_visualizer_num_columns: int = 65, audio_visualizer_min_size: int = 1, audio_visualizer_max_size: int = 7) -> Optional[str]: """ Creates a music video using the given parameters. + :param visualizer_drawing_overlap: Whether to overlap the visualizer drawings onto one-another with alpha-blending. + This is only noticeable on images with transparency and is a slow process, so if your visualizer drawings are + not transparent, it is recommended to set this to False. :param image_path: The path to the image to use as the cover + background for the video. :param audio_path: The path to the audio file to use for the video. :param fps: The frames per second to use for the video. @@ -145,29 +266,22 @@ def create_music_video( :param audio_visualizer_max_size: The maximum size to use for the audio visualizer's drawings (peak loudness). :return: The path to the generated video, or None if there was an error. """ - if image_path is None: - print("No cover image for the video.") - return None - if audio_path is None: - print("No audio to add to the video.") + if image_path is None or audio_path is None: + print("No cover image and/or audio for the video.") return None # Could probably expand to 4k, but unnecessary for this type of music video # Maybe in a future iteration it could be worth it - frame_size = dataclasses.Size(1920, 1080) + frame_size = local_dataclasses.Size(1920, 1080) # Set up cover - cover = cv2.imread(image_path, cv2.IMREAD_UNCHANGED) - if cover.shape[2] == 3: - cover = cv2.cvtColor(cover, cv2.COLOR_BGR2RGBA) - else: - cover = cv2.cvtColor(cover, cv2.COLOR_BGRA2RGBA) + cover = image_utils.open_image_as_rgba(image_path) # Create canvas with 4 channels (RGBA) canvas = np.zeros((frame_size.height, frame_size.width, 4), dtype=np.uint8) # Calculate dimensions for resizing the cover to fit within the canvas while maintaining its aspect ratio - cover_size = dataclasses.Size(cover.shape[1], cover.shape[0]) + cover_size = local_dataclasses.Size(cover.shape[1], cover.shape[0]) resize_factor = min(frame_size.width / cover_size.width, frame_size.height / cover_size.height) resize_factor *= (7 / 10) cover_size.width = int(cover_size.width * resize_factor) @@ -183,32 +297,28 @@ def create_music_video( audio_clip = AudioFileClip(audio_path) # Add video background - background = cv2.imread(image_path) - background = cv2.resize(background, (frame_size.width, frame_size.height)) - background = cv2.GaussianBlur(background, (49, 49), 0) - if background.shape[2] == 3: - background = cv2.cvtColor(background, cv2.COLOR_BGR2BGRA) - background_color_overlay = image_utils.get_bgra(background_color, background_opacity) - overlay = np.full((frame_size.height, frame_size.width, 4), background_color_overlay, dtype=np.uint8) - alpha_overlay = overlay[:, :, 3] / 255.0 - alpha_background = background[:, :, 3] / 255.0 - for c in range(0, 3): - background[:, :, c] = (alpha_overlay * overlay[:, :, c] + - alpha_background * (1 - alpha_overlay) * background[:, :, c]) - background[:, :, 3] = (alpha_overlay + alpha_background * (1 - alpha_overlay)) * 255 + background = _get_video_background(image_path, frame_size, + local_dataclasses.RGBOpacity(background_color, background_opacity)) background_bgr = cv2.cvtColor(background, cv2.COLOR_BGRA2BGR) tmp_background_image_path = tempfile.mktemp(suffix=".png") cv2.imwrite(tmp_background_image_path, background_bgr) + temp_visualizer_images_dir = None if generate_audio_visualizer: temp_visualizer_images_dir = _audio_visualizer_generator(frame_size, audio_path, audio_clip.duration, fps, - dataclasses.RGBOpacity(audio_visualizer_color, - audio_visualizer_opacity), - dataclasses.MinMax(audio_visualizer_min_size, - audio_visualizer_max_size), - dataclasses.RowCol(audio_visualizer_num_rows, - audio_visualizer_num_columns), - visualizer_drawing=visualizer_drawing) + AudioVisualizerDotData( + size=local_dataclasses.MinMax( + audio_visualizer_min_size, + audio_visualizer_max_size), + color=audio_visualizer_color, + opacity=audio_visualizer_opacity, + count=local_dataclasses.RowCol( + audio_visualizer_num_rows, + audio_visualizer_num_columns), + visualizer_drawing=visualizer_drawing, + visualizer_drawing_overlap=\ + visualizer_drawing_overlap) + ) # Add text font_families = font_manager.get_fonts() @@ -240,96 +350,19 @@ def create_music_video( artist_shadow_opacity), show_background=artist_background_enabled, background_color=image_utils.get_rgba( - artist_background_color, artist_background_opacity)) - - text_np = np.array(text_canvas) - np_canvas = np.array(canvas) - # Normalize the alpha channels - alpha_text = text_np[:, :, 3] / 255.0 - alpha_canvas = np_canvas[:, :, 3] / 255.0 - alpha_final = alpha_text + alpha_canvas * (1 - alpha_text) - - canvas_final = np.zeros_like(np_canvas) - # alpha blend - for c in range(3): # Loop over color (non-alpha) channels - canvas_final[:, :, c] = (alpha_text * text_np[:, :, c] + alpha_canvas * (1 - alpha_text) * - np_canvas[:, :, c]) / alpha_final - canvas_final[:, :, 3] = alpha_final * 255 - canvas_final[:, :, :3][alpha_final == 0] = 0 + artist_background_color, artist_background_opacity)) + canvas_final = image_utils.blend_alphas(np.array(text_canvas), np.array(canvas)) temp_canvas_image_path = tempfile.mktemp(suffix=".png") # Convert to BGR for OpenCV canvas_final = cv2.cvtColor(canvas_final, cv2.COLOR_RGBA2BGRA) cv2.imwrite(temp_canvas_image_path, canvas_final) - temp_final_video_path = tempfile.mktemp(suffix=".mp4") - - # set up the background video commands - ffmpeg_commands = [ - "ffmpeg", "-y", - "-loop", "1", - "-i", tmp_background_image_path, - ] + temp_final_video_path = _generate_final_video(tmp_background_image_path, temp_visualizer_images_dir, + temp_canvas_image_path, audio_path, fps) - if generate_audio_visualizer: - ffmpeg_commands.extend([ - "-framerate", str(fps), - "-i", f'{temp_visualizer_images_dir}/frame_%05d.png', - ]) - filter_complex = "[0][1]overlay=format=auto[bg];[bg][2]overlay=format=auto" - audio_input_map = "3:a" - else: - filter_complex = "[0][1]overlay=format=auto" - audio_input_map = "2:a" - - ffmpeg_commands.extend([ - "-framerate", str(fps), - "-i", temp_canvas_image_path, - "-i", audio_path, - "-filter_complex", filter_complex, - "-map", audio_input_map, - "-c:v", "libx264", - "-c:a", "aac", - "-strict", "experimental", - "-t", str(audio_clip.duration), - "-hide_banner", - "-framerate", str(fps), - '-pix_fmt', 'yuv420p', - temp_final_video_path - ]) - print("Generating final video...") - ffmpeg_process = subprocess.Popen(ffmpeg_commands, stderr=subprocess.PIPE, text=True) - - duration_regex = re.compile(r"Duration: (\d\d):(\d\d):(\d\d)\.\d\d") - time_regex = re.compile(r"time=(\d\d):(\d\d):(\d\d)\.\d\d") - total_duration_in_seconds = 0 - - ffmpeg_start_time = time.time() - while True: - line = ffmpeg_process.stderr.readline() - if not line: - break - - # Extract total duration of the video - duration_match = duration_regex.search(line) - if duration_match: - hours, minutes, seconds = map(int, duration_match.groups()) - total_duration_in_seconds = hours * 3600 + minutes * 60 + seconds - - # Extract current time of encoding - time_match = time_regex.search(line) - if time_match and total_duration_in_seconds > 0: - hours, minutes, seconds = map(int, time_match.groups()) - current_time = hours * 3600 + minutes * 60 + seconds - progress.print_progress_bar(current_time, total_duration_in_seconds, start_time=ffmpeg_start_time) - - ffmpeg_process.wait() - if ffmpeg_process.returncode != 0: - raise subprocess.CalledProcessError(ffmpeg_process.returncode, ffmpeg_commands) - progress.print_progress_bar(100, 100, end='\n', start_time=ffmpeg_start_time) - print("Done generating final video!\n") # clean up the original frames - if generate_audio_visualizer: + if temp_visualizer_images_dir is not None: for file in os.listdir(temp_visualizer_images_dir): os.remove(os.path.join(temp_visualizer_images_dir, file)) os.rmdir(temp_visualizer_images_dir) @@ -353,11 +386,13 @@ def generate_cover_image(api_key: str, api_model: str, prompt: str) -> Optional[ return chatgpt_api.url_to_gradio_image_name(image_url) +# pylint: disable=too-many-locals def process(image_path: str, artist: str, song: str, - af_family: str, af_style: str, afs: int, afc: dataclasses.RGBColor, afo: int, ase: bool, - asc: dataclasses.RGBColor, aso: int, asr: Optional[int], abe: bool, abc: dataclasses.RGBColor, abo: int, - sf_family: str, sf_style: str, sfs: int, sfc: dataclasses.RGBColor, sfo: int, sse: bool, - ssc: dataclasses.RGBColor, sso: int, ssr: Optional[int], sbe: bool, sbc: dataclasses.RGBColor, sbo: int) \ + af_family: str, af_style: str, afs: int, afc: local_dataclasses.RGBColor, afo: int, ase: bool, + asc: local_dataclasses.RGBColor, aso: int, asr: Optional[int], abe: bool, abc: local_dataclasses.RGBColor, + abo: int, sf_family: str, sf_style: str, sfs: int, sfc: local_dataclasses.RGBColor, sfo: int, sse: bool, + ssc: local_dataclasses.RGBColor, sso: int, ssr: Optional[int], sbe: bool, sbc: local_dataclasses.RGBColor, + sbo: int) \ -> Optional[np.ndarray]: """ Processes the image at the given path (by adding the requested text) and returns the processed image. diff --git a/utils/dataclasses.py b/utils/dataclasses.py index 77c3531..0755858 100644 --- a/utils/dataclasses.py +++ b/utils/dataclasses.py @@ -59,8 +59,17 @@ class MinMax: """ A dataclass representing a minimum and maximum value. """ - min: int - max: int + min: Union[int, float] + max: Union[int, float] + + +@dataclass +class MinMaxGradioComponents: + """ + A dataclass representing the components of a minimum and maximum value editor. + """ + min: gr.Number + max: gr.Number @dataclass @@ -72,6 +81,15 @@ class RowCol: col: int +@dataclass +class RowColGradioComponents: + """ + A dataclass representing the components of a row and column editor. + """ + row: gr.Number + col: gr.Number + + @dataclass class FontGradioComponents: """ @@ -105,4 +123,50 @@ class FontBackgroundGradioComponents: opacity: gr.Slider +@dataclass +class FontDisplayGradioComponents: + """ + A dataclass representing the components of how to display the font. + """ + font: FontGradioComponents + drop_shadow: FontDropShadowGradioComponents + background: FontBackgroundGradioComponents + + +@dataclass +class ColorOpacityGradioComponents: + """ + A dataclass representing the components of the color and opacity editor. + """ + color: gr.ColorPicker + opacity: gr.Slider + + +@dataclass +class VideoOutputGradioComponents: + """ + A dataclass representing the components of the video output. + """ + video: gr.Video + name: gr.Textbox + suffix: gr.Dropdown + save: gr.Button + + +@dataclass +class Time: + """ + A dataclass representing a time. + """ + hours: int + minutes: int + seconds: int + + def __int__(self) -> int: + """ + Returns the time in seconds. + """ + return self.hours * 3600 + self.minutes * 60 + self.seconds + + RGBColor = Union[str, tuple[int, int, int]] diff --git a/utils/gradio.py b/utils/gradio.py index 54353ff..fae5a02 100644 --- a/utils/gradio.py +++ b/utils/gradio.py @@ -5,18 +5,18 @@ from utils import font_manager, dataclasses -def render_color_opacity_picker(default_name_label: str = "Font") -> tuple[gr.ColorPicker, gr.Slider]: +def render_color_opacity_picker(default_name_label: str = "Font") -> dataclasses.ColorOpacityGradioComponents: """ Renders a color picker with the appropriate styling. :param default_name_label: The default name label to use. - :return: A tuple containing the color and opacity components. + :return: A class containing the color and opacity components. """ with gr.Group(): with gr.Row(): color = gr.ColorPicker(label=f"{default_name_label} Color", scale=1, interactive=True) opacity = gr.Slider(0, 100, value=100, label="Opacity", scale=2, interactive=True) - return color, opacity + return dataclasses.ColorOpacityGradioComponents(color, opacity) def bind_checkbox_to_visibility(checkbox: gr.Checkbox, group: gr.Group): @@ -58,10 +58,11 @@ def update_font_styles(selected_font_family): font_family.change(update_font_styles, inputs=[font_family], outputs=[font_style]) with gr.Group(): - font_color, font_opacity = render_color_opacity_picker() + font_color_opacity = render_color_opacity_picker() font_size = gr.Number(default_font_size, label="Font Size", interactive=True) - return dataclasses.FontGradioComponents(font_family, font_style, font_color, font_opacity, font_size) + return dataclasses.FontGradioComponents(font_family, font_style, font_color_opacity.color, + font_color_opacity.opacity, font_size) def render_tool_description(description: str): diff --git a/utils/image.py b/utils/image.py index b070a04..42f59e4 100644 --- a/utils/image.py +++ b/utils/image.py @@ -2,6 +2,8 @@ This file contains functions for image processing. """ from typing import Tuple +import cv2 +import numpy as np from utils import dataclasses @@ -43,3 +45,45 @@ def get_bgra(color: dataclasses.RGBColor, opacity: int) -> Tuple[int, int, int, color = tuple(int(color[i:i + 2], 16) for i in (0, 2, 4)) return color[2], color[1], color[0], get_alpha_from_opacity(opacity) + + +def open_image_as_rgba(image_path: str) -> np.ndarray: + """ + Opens an image as RGBA. + :param image_path: The path to the image. + :return: The image as RGBA. + """ + img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED) + if img.shape[2] == 3: + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA) + else: + img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGBA) + + return img + + +def blend_alphas(a: np.ndarray, b: np.ndarray) -> np.ndarray: + """ + Blends two images together using alpha blending. + :param a: The first image. + :param b: The second image. + :return: The blended image. + """ + if a.shape != b.shape: + raise ValueError("both images must have the same shape to blend alphas") + if a.shape[2] != 4 or b.shape[2] != 4: + raise ValueError("both images must have 4 channels to blend alphas") + + alpha_text = a[:, :, 3] / 255.0 + alpha_canvas = b[:, :, 3] / 255.0 + alpha_final = alpha_text + alpha_canvas * (1 - alpha_text) + + final = np.zeros_like(b) + # alpha blend + for c in range(3): # Loop over color (non-alpha) channels + final[:, :, c] = (alpha_text * a[:, :, c] + alpha_canvas * (1 - alpha_text) * + b[:, :, c]) / alpha_final + final[:, :, 3] = alpha_final * 255 + final[:, :, :3][alpha_final == 0] = 0 + + return final diff --git a/utils/visualizer.py b/utils/visualizer.py index 8e53285..2beb839 100644 --- a/utils/visualizer.py +++ b/utils/visualizer.py @@ -4,7 +4,7 @@ from typing import Dict, Optional import numpy as np import cv2 -from utils import dataclasses +from utils import dataclasses, image as image_utils class Visualizer: @@ -12,6 +12,7 @@ class Visualizer: This class is used to draw the visualizer on the canvas. Will be replaced with a more general solution in the future to allow for more customization. """ + def __init__(self, dot_size: dataclasses.MinMax, color, dot_count: dataclasses.RowCol, size: dataclasses.Size): self.dot_size = dot_size self.color = color @@ -34,14 +35,11 @@ def initialize_static_values(self: "Visualizer") -> None: self.cached_dot_positions = [(grid_x[y, x], grid_y[y, x]) for x in range(self.dot_count.col) for y in range(self.dot_count.row)] - def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict[float, float], - custom_drawing: Optional[np.ndarray] = None) -> None: + def _get_loudness(self, frequency_data: Dict[float, float]) -> (dataclasses.MinMax, Dict[int, int]): """ - Draws the visualizer on the canvas (a single frame). - :param canvas: The canvas to draw on. + Calculates the loudness values for each column. :param frequency_data: The frequency data to use for drawing which correlates to the loudness + frequency. - :param custom_drawing: A custom drawing to use instead of the default circle. - :return: None. + :return: A tuple containing the loudness min/max and the loudness values for each column. """ # Precompute log frequencies freq_keys = np.array(list(frequency_data.keys())) @@ -51,66 +49,111 @@ def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict # Find the maximum and minimum loudness values, ignoring -80 dB freq_bands = np.array([frequency_data[key] for key in freq_keys if key > 0]) # Ignore 0 Hz - max_loudness = np.max(freq_bands) filtered_loudness = freq_bands[freq_bands > -80] - min_loudness = np.min(filtered_loudness) if filtered_loudness.size > 0 else -80 + loudness_min_max = dataclasses.MinMax(np.min(filtered_loudness) if filtered_loudness.size > 0 else -80, + np.max(freq_bands)) # Precompute loudness values loudness_values = {} for x in range(self.dot_count.col): - lower_bound = log_freqs[x] - upper_bound = log_freqs[x + 1] if x < self.dot_count.col - 1 else end_freq + 1 - band_freqs = [freq for freq in freq_keys if lower_bound <= freq < upper_bound] + bounds = { + "lower": log_freqs[x], + "upper": log_freqs[x + 1] if x < self.dot_count.col - 1 else end_freq + 1 + } + band_freqs = [freq for freq in freq_keys if bounds.get("lower") <= freq < bounds.get("upper")] if not band_freqs: - closest_freq = min(freq_keys, key=lambda f, lb=lower_bound: abs(f - lb)) + closest_freq = min(freq_keys, key=lambda f, lb=bounds.get("lower"): abs(f - lb)) band_freqs = [closest_freq] band_loudness = [frequency_data[freq] for freq in band_freqs] avg_loudness = np.mean(band_loudness) if band_loudness else -80 loudness_values[x] = avg_loudness + return loudness_min_max, loudness_values + + def _calculate_dot_size(self: "Visualizer", column: int, loudness: dataclasses.MinMax, + loudness_values: Dict[int, int]) -> int: + """ + Calculates the dot size for a given column. + :param loudness: The loudness min/max. + :param loudness_values: The loudness values for each column. + :return: The dot size. + """ + # Scale the loudness to the dot size + scaled_loudness = (loudness_values[column] - loudness.min) / ( + loudness.max - loudness.min) if loudness.max != loudness.min else 0 + dot_size = self.dot_size.min + scaled_loudness * (self.dot_size.max - self.dot_size.min) + return min(max(dot_size, self.dot_size.min), self.dot_size.max) + + def _draw_custom_drawing(self: "Visualizer", canvas: np.ndarray, start_pos: dataclasses.Position, + end_pos: dataclasses.Position, img_start_pos: dataclasses.Position, + img_end_pos: dataclasses.Position, dot_size: int, + custom_drawing_overlap: bool) -> np.ndarray: + """ + Draws the custom drawing on the canvas. + :param canvas: The canvas to draw on. + :param start_pos: The start position on the canvas. + :param end_pos: The end position on the canvas. + :param img_start_pos: The start position on the resized image. + :param img_end_pos: The end position on the resized image. + :param dot_size: The dot size. + :param custom_drawing_overlap: Whether overlapped custom drawings should alpha blend. + :return: The canvas with the custom drawing drawn on it. + """ + drawing_slice = self.cached_resized_drawing[dot_size][img_start_pos.y:img_end_pos.y, + img_start_pos.x:img_end_pos.x] + + if custom_drawing_overlap: + canvas_slice = canvas[start_pos.y:end_pos.y, start_pos.x:end_pos.x] + return image_utils.blend_alphas(canvas_slice, drawing_slice) + + return drawing_slice + + def draw_visualizer(self: "Visualizer", canvas: np.ndarray, frequency_data: Dict[float, float], + custom_drawing: Optional[np.ndarray] = None, custom_drawing_overlap: bool = False) -> None: + """ + Draws the visualizer on the canvas (a single frame). + :param custom_drawing_overlap: Whether to overlap the custom drawing should alpha blend when overlapping. + :param canvas: The canvas to draw on. + :param frequency_data: The frequency data to use for drawing which correlates to the loudness + frequency. + :param custom_drawing: A custom drawing to use instead of the default circle. + :return: None. + """ + loudness, loudness_values = self._get_loudness(frequency_data) + cached_dot_sizes = {} for i, (pos_x, pos_y) in enumerate(self.cached_dot_positions): column = i // self.dot_count.row # Ensure the correct column is computed if column not in cached_dot_sizes: - avg_loudness = loudness_values[column] - # Scale the loudness to the dot size - scaled_loudness = (avg_loudness - min_loudness) / ( - max_loudness - min_loudness) if max_loudness != min_loudness else 0 - dot_size = self.dot_size.min + scaled_loudness * (self.dot_size.max - self.dot_size.min) - dot_size = min(max(dot_size, self.dot_size.min), self.dot_size.max) - - cached_dot_sizes[column] = dot_size - else: - dot_size = cached_dot_sizes[column] + cached_dot_sizes[column] = self._calculate_dot_size(column, loudness, loudness_values) - # Convert dot size to integer and calculate the center position - dot_size = int(dot_size) - center = (int(pos_x), int(pos_y)) + dot_size = int(cached_dot_sizes[column]) + center_pos = dataclasses.Position(int(pos_x), int(pos_y)) if custom_drawing is not None: if dot_size not in self.cached_resized_drawing: - self.cached_resized_drawing[dot_size] = cv2.resize(custom_drawing, (dot_size, dot_size), - interpolation=cv2.INTER_LANCZOS4) - resized_custom_drawing = self.cached_resized_drawing[dot_size] + if dot_size == 0: + self.cached_resized_drawing[dot_size] = np.zeros((1, 1, 4), dtype=np.uint8) + else: + self.cached_resized_drawing[dot_size] = cv2.resize(custom_drawing, (dot_size, dot_size), + interpolation=cv2.INTER_LANCZOS4) - center_x, center_y = int(pos_x), int(pos_y) half_dot_size = dot_size // 2 - # Calculate bounds on the canvas - start_x = max(center_x - half_dot_size, 0) - end_x = min(center_x + half_dot_size, canvas.shape[1]) - start_y = max(center_y - half_dot_size, 0) - end_y = min(center_y + half_dot_size, canvas.shape[0]) + start_pos = dataclasses.Position(max(center_pos.x - half_dot_size, 0), + max(center_pos.y - half_dot_size, 0)) + end_pos = dataclasses.Position(min(center_pos.x + half_dot_size, canvas.shape[1]), min( + center_pos.y + half_dot_size, canvas.shape[0])) # Calculate corresponding bounds on the resized image - img_start_x = max(half_dot_size - (center_x - start_x), 0) - img_end_x = img_start_x + (end_x - start_x) - img_start_y = max(half_dot_size - (center_y - start_y), 0) - img_end_y = img_start_y + (end_y - start_y) - - # Place the image slice onto the canvas - canvas[start_y:end_y, start_x:end_x] = resized_custom_drawing[img_start_y:img_end_y, - img_start_x:img_end_x] + img_start_pos = dataclasses.Position(max(half_dot_size - (center_pos.x - start_pos.x), 0), + max(half_dot_size - (center_pos.y - start_pos.y), 0)) + img_end_pos = dataclasses.Position(img_start_pos.x + (end_pos.x - start_pos.x), + img_start_pos.y + (end_pos.y - start_pos.y)) + + canvas[start_pos.y:end_pos.y, start_pos.x:end_pos.x] = self._draw_custom_drawing(canvas, start_pos, + end_pos, img_start_pos, + img_end_pos, dot_size, + custom_drawing_overlap) else: - cv2.circle(canvas, center, dot_size // 2, self.color, -1) + cv2.circle(canvas, (center_pos.x, center_pos.y), dot_size // 2, self.color, -1)