From fd8998b770350b96b08fe06a251e58e6cd488bb4 Mon Sep 17 00:00:00 2001 From: clement-pages Date: Tue, 17 Sep 2024 13:58:03 +0200 Subject: [PATCH 1/2] add audio spectrogram on the audio player --- .../backend/gryannote_audio/audio_labeling.py | 5 ++++ gryannote/audio/demo/app.py | 15 +++++------ gryannote/audio/frontend/Index.svelte | 3 +++ .../InteractiveAudioLabeling.svelte | 2 ++ .../player/AudioPlayerWithAnnotation.svelte | 26 +++++++++++++++++-- .../static/StaticAudioLabeling.svelte | 2 ++ 6 files changed, 42 insertions(+), 11 deletions(-) diff --git a/gryannote/audio/backend/gryannote_audio/audio_labeling.py b/gryannote/audio/backend/gryannote_audio/audio_labeling.py index 7db30fd..9ad628f 100644 --- a/gryannote/audio/backend/gryannote_audio/audio_labeling.py +++ b/gryannote/audio/backend/gryannote_audio/audio_labeling.py @@ -102,6 +102,7 @@ def __init__( autoplay: bool = False, show_download_button=True, show_share_button: bool | None = None, + show_spectrogram: bool = True, editable: bool = True, min_length: int | None = None, max_length: int | None = None, @@ -131,6 +132,9 @@ def __init__( show_download_button: If True, will show a download button in the corner of the component for saving audio. If False, icon does not appear. show_share_button: If True, will show a share icon in the corner of the component that allows user to share outputs to Hugging Face Spaces Discussions. If False, icon does not appear. If set to None (default behavior), then the icon appears if this Gradio app is launched on Spaces, but not otherwise. editable: If True, allows users to manipulate the audio file (if the component is interactive). + show_spectrogram: bool, optional + Whether to show audio spectrogram on the interface. + Default to True. min_length: The minimum length of audio (in seconds) that the user can pass into the prediction function. If None, there is no minimum length. max_length: The maximum length of audio (in seconds) that the user can pass into the prediction function. If None, there is no maximum length. waveform_options: A dictionary of options for the waveform display. Options include: waveform_color (str), waveform_progress_color (str), show_controls (bool), skip_length (int). Default is None, which uses the default values for these options. @@ -174,6 +178,7 @@ def __init__( if show_share_button is None else show_share_button ) + self.show_spectrogram = show_spectrogram self.editable = editable diff --git a/gryannote/audio/demo/app.py b/gryannote/audio/demo/app.py index de1a710..d42f953 100644 --- a/gryannote/audio/demo/app.py +++ b/gryannote/audio/demo/app.py @@ -1,17 +1,14 @@ import gradio as gr -from gryannote_audio import AudioLabeling +from gryannote_audio import AudioLabeling, Player from pyannote.audio import Pipeline +from pyannote.database.util import load_rttm -audio_labeling = AudioLabeling(type="filepath", interactive=True) +audio = "/home/clement-pages/gryannote/sample.wav" +annotations = load_rttm("/home/clement-pages/gryannote/sample.rttm")["sample"] +player = AudioLabeling(audio=audio, annotations=annotations, show_spectrogram=False) -def apply_pipeline(audio): - pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1") - annotations = pipeline(audio) - return (audio, annotations) - - -demo = gr.Interface(apply_pipeline, inputs=audio_labeling, outputs=audio_labeling) +demo = gr.Interface(lambda x: x, inputs=None, outputs=player) if __name__ == "__main__": diff --git a/gryannote/audio/frontend/Index.svelte b/gryannote/audio/frontend/Index.svelte index 3701549..87c2592 100644 --- a/gryannote/audio/frontend/Index.svelte +++ b/gryannote/audio/frontend/Index.svelte @@ -24,6 +24,7 @@ export let root: string; export let show_label: boolean; export let show_download_button: boolean = true; + export let show_spectrogram: boolean = true; export let container = true; export let scale: number | null = null; export let min_width: number | undefined = undefined; @@ -133,6 +134,7 @@ i18n={gradio.i18n} {show_label} {show_download_button} + {show_spectrogram} {value} {label} {waveform_settings} @@ -165,6 +167,7 @@ {label} {show_label} {show_download_button} + {show_spectrogram} {value} on:change={({ detail }) => (value = detail)} on:stream={({ detail }) => { diff --git a/gryannote/audio/frontend/interactive/InteractiveAudioLabeling.svelte b/gryannote/audio/frontend/interactive/InteractiveAudioLabeling.svelte index 7fa2c8a..4b7f6f2 100644 --- a/gryannote/audio/frontend/interactive/InteractiveAudioLabeling.svelte +++ b/gryannote/audio/frontend/interactive/InteractiveAudioLabeling.svelte @@ -24,6 +24,7 @@ export let root: string; export let show_label = true; export let show_download_button: boolean = true; + export let show_spectrogram: boolean = true; export let sources: | ["microphone"] | ["upload"] @@ -275,6 +276,7 @@ {value} {label} {i18n} + {show_spectrogram} {waveform_settings} {waveform_options} {editable} diff --git a/gryannote/audio/frontend/player/AudioPlayerWithAnnotation.svelte b/gryannote/audio/frontend/player/AudioPlayerWithAnnotation.svelte index 284d520..8b4f9a2 100644 --- a/gryannote/audio/frontend/player/AudioPlayerWithAnnotation.svelte +++ b/gryannote/audio/frontend/player/AudioPlayerWithAnnotation.svelte @@ -14,6 +14,7 @@ type ButtonEvent, type AxeEvent, } from "wavesurfer.js/dist/plugins/gamepad.js"; + import Spectrogram from "wavesurfer.js/dist/plugins/spectrogram" import WaveformControls from "../shared/WaveformControls.svelte"; import { Empty } from "@gradio/atoms"; import { resolve_wasm_src } from "@gradio/wasm/svelte"; @@ -28,15 +29,17 @@ export let i18n: I18nFormatter; export let interactive = true; export let editable = true; + export let show_spectrogram = true; export let waveform_settings: Record; export let waveform_options: WaveformOptions; export let mode = ""; export let isDialogOpen: boolean; let container: HTMLDivElement; - let waveform: WaveSurfer | undefined; + let waveform: WaveSurfer; let wsRegions: RegionsPlugin; let wsGamepad: GamepadPlugin; + let wsSpectro: Spectrogram; let activeRegion: Region | null = null; let leftRegionHandle: HTMLDivElement | null; let rightRegionHandle: HTMLDivElement | null; @@ -128,6 +131,12 @@ let top = regionColor * (100. / numColors); let height = 100. / numColors; + // if spectrogram is displayed, only the top 50% of the container is available for waveform + if(show_spectrogram){ + top = top / 2; + height = height / 2; + } + // update region alignment style: region.element.style.top = top.toString() + "%"; region.element.style.height = height.toString() + "%"; @@ -618,6 +627,7 @@ onGamepadAxePushed(e); }); } + if(wsRegions === undefined ){ wsRegions = waveform.registerPlugin(RegionsPlugin.create()); if(interactive){ @@ -635,6 +645,14 @@ }); } } + + if(show_spectrogram && !wsSpectro){ + wsSpectro = waveform.registerPlugin(Spectrogram.create({ + labels: true, + splitChannels: true, + })); + } + if (!waveform_settings.autoplay) { waveform?.stop(); } else { @@ -893,7 +911,11 @@ } :global(::part(wrapper)) { - margin-bottom: var(--size-2); + display: flex; + flex-direction: column; + justify-content: space-between; + gap: 20px; + margin-bottom: var(--size-2); } .timestamps { diff --git a/gryannote/audio/frontend/static/StaticAudioLabeling.svelte b/gryannote/audio/frontend/static/StaticAudioLabeling.svelte index 613d692..e96cbeb 100644 --- a/gryannote/audio/frontend/static/StaticAudioLabeling.svelte +++ b/gryannote/audio/frontend/static/StaticAudioLabeling.svelte @@ -14,6 +14,7 @@ export let label: string; export let show_label = true; export let show_download_button: boolean = true; + export let show_spectrogram: boolean = true; export let i18n: I18nFormatter; export let waveform_settings: Record; export let waveform_options: WaveformOptions; @@ -70,6 +71,7 @@ interactive={false} {label} {i18n} + {show_spectrogram} {waveform_settings} {waveform_options} on:pause From de5005a3e570f4d3681744f9af50d65fa4017916 Mon Sep 17 00:00:00 2001 From: clement-pages Date: Tue, 17 Sep 2024 15:16:38 +0200 Subject: [PATCH 2/2] update audio component demo --- gryannote/audio/demo/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gryannote/audio/demo/app.py b/gryannote/audio/demo/app.py index d42f953..1905d05 100644 --- a/gryannote/audio/demo/app.py +++ b/gryannote/audio/demo/app.py @@ -6,7 +6,7 @@ audio = "/home/clement-pages/gryannote/sample.wav" annotations = load_rttm("/home/clement-pages/gryannote/sample.rttm")["sample"] -player = AudioLabeling(audio=audio, annotations=annotations, show_spectrogram=False) +player = AudioLabeling(audio=audio, annotations=annotations) demo = gr.Interface(lambda x: x, inputs=None, outputs=player)