Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add spectrogram on the audio player #76

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions gryannote/audio/backend/gryannote_audio/audio_labeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def __init__(
autoplay: bool = False,
show_download_button=True,
show_share_button: bool | None = None,
show_spectrogram: bool = True,
editable: bool = True,
min_length: int | None = None,
max_length: int | None = None,
Expand Down Expand Up @@ -131,6 +132,9 @@ def __init__(
show_download_button: If True, will show a download button in the corner of the component for saving audio. If False, icon does not appear.
show_share_button: If True, will show a share icon in the corner of the component that allows user to share outputs to Hugging Face Spaces Discussions. If False, icon does not appear. If set to None (default behavior), then the icon appears if this Gradio app is launched on Spaces, but not otherwise.
editable: If True, allows users to manipulate the audio file (if the component is interactive).
show_spectrogram: bool, optional
Whether to show audio spectrogram on the interface.
Default to True.
min_length: The minimum length of audio (in seconds) that the user can pass into the prediction function. If None, there is no minimum length.
max_length: The maximum length of audio (in seconds) that the user can pass into the prediction function. If None, there is no maximum length.
waveform_options: A dictionary of options for the waveform display. Options include: waveform_color (str), waveform_progress_color (str), show_controls (bool), skip_length (int). Default is None, which uses the default values for these options.
Expand Down Expand Up @@ -174,6 +178,7 @@ def __init__(
if show_share_button is None
else show_share_button
)
self.show_spectrogram = show_spectrogram

self.editable = editable

Expand Down
15 changes: 6 additions & 9 deletions gryannote/audio/demo/app.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
import gradio as gr
from gryannote_audio import AudioLabeling
from gryannote_audio import AudioLabeling, Player
from pyannote.audio import Pipeline
from pyannote.database.util import load_rttm

audio_labeling = AudioLabeling(type="filepath", interactive=True)
audio = "/home/clement-pages/gryannote/sample.wav"
annotations = load_rttm("/home/clement-pages/gryannote/sample.rttm")["sample"]

player = AudioLabeling(audio=audio, annotations=annotations)

def apply_pipeline(audio):
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1")
annotations = pipeline(audio)
return (audio, annotations)


demo = gr.Interface(apply_pipeline, inputs=audio_labeling, outputs=audio_labeling)
demo = gr.Interface(lambda x: x, inputs=None, outputs=player)


if __name__ == "__main__":
Expand Down
3 changes: 3 additions & 0 deletions gryannote/audio/frontend/Index.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
export let root: string;
export let show_label: boolean;
export let show_download_button: boolean = true;
export let show_spectrogram: boolean = true;
export let container = true;
export let scale: number | null = null;
export let min_width: number | undefined = undefined;
Expand Down Expand Up @@ -133,6 +134,7 @@
i18n={gradio.i18n}
{show_label}
{show_download_button}
{show_spectrogram}
{value}
{label}
{waveform_settings}
Expand Down Expand Up @@ -165,6 +167,7 @@
{label}
{show_label}
{show_download_button}
{show_spectrogram}
{value}
on:change={({ detail }) => (value = detail)}
on:stream={({ detail }) => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
export let root: string;
export let show_label = true;
export let show_download_button: boolean = true;
export let show_spectrogram: boolean = true;
export let sources:
| ["microphone"]
| ["upload"]
Expand Down Expand Up @@ -275,6 +276,7 @@
{value}
{label}
{i18n}
{show_spectrogram}
{waveform_settings}
{waveform_options}
{editable}
Expand Down
26 changes: 24 additions & 2 deletions gryannote/audio/frontend/player/AudioPlayerWithAnnotation.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
type ButtonEvent,
type AxeEvent,
} from "wavesurfer.js/dist/plugins/gamepad.js";
import Spectrogram from "wavesurfer.js/dist/plugins/spectrogram"
import WaveformControls from "../shared/WaveformControls.svelte";
import { Empty } from "@gradio/atoms";
import { resolve_wasm_src } from "@gradio/wasm/svelte";
Expand All @@ -28,15 +29,17 @@
export let i18n: I18nFormatter;
export let interactive = true;
export let editable = true;
export let show_spectrogram = true;
export let waveform_settings: Record<string, any>;
export let waveform_options: WaveformOptions;
export let mode = "";
export let isDialogOpen: boolean;

let container: HTMLDivElement;
let waveform: WaveSurfer | undefined;
let waveform: WaveSurfer;
let wsRegions: RegionsPlugin;
let wsGamepad: GamepadPlugin;
let wsSpectro: Spectrogram;
let activeRegion: Region | null = null;
let leftRegionHandle: HTMLDivElement | null;
let rightRegionHandle: HTMLDivElement | null;
Expand Down Expand Up @@ -128,6 +131,12 @@
let top = regionColor * (100. / numColors);
let height = 100. / numColors;

// if spectrogram is displayed, only the top 50% of the container is available for waveform
if(show_spectrogram){
top = top / 2;
height = height / 2;
}

// update region alignment style:
region.element.style.top = top.toString() + "%";
region.element.style.height = height.toString() + "%";
Expand Down Expand Up @@ -618,6 +627,7 @@
onGamepadAxePushed(e);
});
}

if(wsRegions === undefined ){
wsRegions = waveform.registerPlugin(RegionsPlugin.create());
if(interactive){
Expand All @@ -635,6 +645,14 @@
});
}
}

if(show_spectrogram && !wsSpectro){
wsSpectro = waveform.registerPlugin(Spectrogram.create({
labels: true,
splitChannels: true,
}));
}

if (!waveform_settings.autoplay) {
waveform?.stop();
} else {
Expand Down Expand Up @@ -893,7 +911,11 @@
}

:global(::part(wrapper)) {
margin-bottom: var(--size-2);
display: flex;
flex-direction: column;
justify-content: space-between;
gap: 20px;
margin-bottom: var(--size-2);
}

.timestamps {
Expand Down
2 changes: 2 additions & 0 deletions gryannote/audio/frontend/static/StaticAudioLabeling.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
export let label: string;
export let show_label = true;
export let show_download_button: boolean = true;
export let show_spectrogram: boolean = true;
export let i18n: I18nFormatter;
export let waveform_settings: Record<string, any>;
export let waveform_options: WaveformOptions;
Expand Down Expand Up @@ -70,6 +71,7 @@
interactive={false}
{label}
{i18n}
{show_spectrogram}
{waveform_settings}
{waveform_options}
on:pause
Expand Down