Skip to content

Commit

Permalink
version 0.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
rfilmyer committed Mar 18, 2021
1 parent 40e4c7a commit 2588e30
Show file tree
Hide file tree
Showing 11 changed files with 143 additions and 26 deletions.
7 changes: 5 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
[package]
name = "spectrobrowse"
version = "0.1.0"
authors = ["Roger Filmyer <roger.filmyer@gmail.com>"]
authors = ["Roger Filmyer <spectrobrowse@synolect.com>"]
edition = "2018"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
rodio = "0.13.0"
plotters = "0.3.0"
colorous = "1.0.3"
rustfft = "5.0.1"
ndarray = "0.14.0"
ndarray-stats = "0.4.0"
colorous = "1.0.3"
rayon = "1.5.0"
eyre = "0.6.5"
clap = "2.33.0"
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
# spectrobrowse
Browse through audio clips by viewing their spectrograms

# Current Status
* Usable as a program. Currently a CLI program that dumps image files to a directory. Use `--help` to get more details.

# Current Bugs
* Graphs are truncated instead of downscaled

# Current Todos
* Generate FFTs in log scale instead of linear scale
* CLI Ergonomics - command line args (`clap`) and progress bars and feedback (`indicatif`)

# Project Goals
* Load a directory of audio files (ogg vorbis is a requirement here)
* Turn each audio file into a waveform(?)
Expand All @@ -10,7 +20,7 @@ Browse through audio clips by viewing their spectrograms

# Ideas/Todos
## Libraries
* Will [`sonogram`](https://github.com/psiphi75/sonogram) be fast enough or do I have to roll my own with [`RustFFT`](https://github.com/ejmahler/RustFFT)?
* ~Will [`sonogram`](https://github.com/psiphi75/sonogram) be fast enough or do I have to roll my own with [`RustFFT`](https://github.com/ejmahler/RustFFT)?~
* Sonogram takes WAV files. Do I have to use something like [`rodio`](https://github.com/RustAudio/rodio) to convert OGG files?
* How do I generate the webpage (seems like the easiest UI option to start out with)? [`askama`](https://github.com/djc/askama) or `liquid` or something else?
* How messed up is that web page going to be?
Binary file added compare-with-sonogram/cometodaddy-mummymix.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
File renamed without changes
Binary file added compare-with-sonogram/wolf.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified output/Roundstart_MAIN-sharedassets2.assets-54.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added output/ft8 160m.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
17 changes: 11 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ use ndarray_stats::QuantileExt;
use plotters::{coord::Shift, prelude::*};
use rodio::{self, Decoder, source::Spatial};
use rustfft::{FftPlanner, num_complex::Complex};
use std::{error::Error};
use colorous;

pub fn load_soundfile_from_path<P: AsRef<Path>>(path: P) -> Result<Vec<i16>, Box<dyn Error>> {
use eyre::Result;

pub fn load_soundfile_from_path<P: AsRef<Path>>(path: P) -> Result<Vec<i16>, eyre::Report> {
let file = File::open(path)?;

let source = Decoder::new(BufReader::new(file))?;
Expand All @@ -25,7 +26,7 @@ pub fn load_soundfile_from_path<P: AsRef<Path>>(path: P) -> Result<Vec<i16>, Box

}

pub fn compute_spectrogram(waveform: Vec<i16>, window_size: usize, overlap: f64) -> Result<Array2<f32>, Box<dyn Error>> {
pub fn compute_spectrogram(waveform: Vec<i16>, window_size: usize, overlap: f64) -> Result<Array2<f32>, eyre::Report> {
let skip_size = (window_size as f64 * (1f64 - overlap)) as usize;

let waveform = Array::from(waveform);
Expand All @@ -45,8 +46,13 @@ pub fn compute_spectrogram(waveform: Vec<i16>, window_size: usize, overlap: f64)
let fft = planner.plan_fft_forward(window_size);

// Since we have a 2-D array of our windows with shape [window_size, (num_samples / window_size) - 1], we can run an FFT on every row.
let mut scratch_buffer = vec![Complex::new(0f32, 0f32); window_size];
windows.axis_iter_mut(Axis(0))
.for_each(|mut frame| { fft.process(frame.as_slice_mut().unwrap()); });
.for_each(|mut frame| {
if let Some(buffer) = frame.as_slice_mut() {
fft.process_with_scratch(buffer, scratch_buffer.as_mut_slice());
};
});

// Get the real component of those complex numbers we get back from the FFT
let windows = windows.map(|i| i.re);
Expand All @@ -69,8 +75,7 @@ pub fn plot_spectrogram<DB: DrawingBackend>(spectrogram: &Array2<f32>, drawing_a
_ => panic!("Spectrogram is a {}D array, expected a 2D array.
This should never happen (should not be possible to call function with anything but a 2d array)", spectrogram.ndim())
};

println!("Generating a {} wide x {} high image", num_samples, num_freq_bins);
println!("...from a spectrogram with {} samples x {} frequency bins.", num_samples, num_freq_bins);

let spectrogram_cells = drawing_area.split_evenly((num_freq_bins, num_samples));

Expand Down
133 changes: 116 additions & 17 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,125 @@
mod lib;

use std::{error::Error};
use std::{cmp::max, error::Error, ffi::OsStr, fs::read_dir, path::PathBuf};

use rayon::{iter::IntoParallelRefIterator, prelude::*};

use plotters::prelude::*;

use clap::{App, Arg};

fn main() -> Result<(), Box<dyn Error>> {
let waveform = lib::load_soundfile_from_path("sound_files_nodistrib/Roundstart_MAIN-sharedassets2.assets-54.wav")?;
let spectrogram = lib::compute_spectrogram(waveform, 2048, 0.75)?;

let (num_samples, num_freq_bins) = match spectrogram.shape() {
&[num_rows, num_columns] => (num_rows, num_columns),
_ => panic!("Windows is a {}D array, expected a 2D array", spectrogram.ndim())
};
println!("Generating a {} wide x {} high image", num_samples, num_freq_bins);
let image_dimensions: (u32, u32) = (num_samples as u32, num_freq_bins as u32);
let root =
BitMapBackend::new(
"output/Roundstart_MAIN-sharedassets2.assets-54.png",
image_dimensions, // width x height. Worth it if we ever want to resize the graph.
).into_drawing_area();

lib::plot_spectrogram(&spectrogram, &root);
let matches = App::new("Spectrobrowse")
.version("0.1.0")
.author("Roger Filmyer <[email protected]>")
.about("Generates spectrograms from audio files in a directory")
.arg(Arg::with_name("INPUT_DIR")
.help("Input Directory")
.required(true)
.takes_value(true))
.arg(Arg::with_name("output")
.help("Output directory. Defaults to the current directory.")
.short("o")
.default_value(".")
.takes_value(true))
.arg(Arg::with_name("window-size")
.help("Window size (# of frequencies for FFT). Default 2048")
.short("w")
.takes_value(true)
.default_value("2048"))
.arg(Arg::with_name("overlap-ratio")
.help("Window Overlap Ratio (default 0.75). Must be between 0 and 1, exclusive.")
.short("r")
.takes_value(true)
.default_value("0.75"))
.get_matches();

// parsing command line args
let directory_path = matches.value_of_os("INPUT_DIR").unwrap();
let output_path = matches.value_of_os("output").unwrap();
let window_size = matches.value_of("window-size")
.unwrap_or_else(|| panic!("A value must be specified for `window-size` or a default must be configured"))
.parse()
.unwrap();

let overlap = matches.value_of("overlap-ratio")
.unwrap_or_else(|| panic!("A value must be specified for `overlap-ratio` or a default must be configured"))
.parse()?;
if overlap >= 1.0 || overlap <= 0.0 {
panic!("Invalid value for `overlap`: {} (must be greater than 0, less than 1)", overlap);
}
// let directory_path = "sound_files_smaller";
let directory = read_dir(directory_path)?;

let filepaths = directory
.filter_map(|de| {
match de {
Ok(de) => Some(de),
Err(e) => {eprintln!("Error when listing directory: {}", e); None}
}
})
.map(|de| de.path());

// load waveforms
let waveforms = filepaths
.map(move |fp| (fp.clone(), lib::load_soundfile_from_path(fp)))
.filter_map(|(fp, wf)| {
match wf {
Ok(wf) => Some((fp, wf)),
Err(e) => {
let filepath = fp.to_string_lossy();
eprintln!("Problem reading {}: {}", filepath, e);
None
}
}
})
.collect::<Vec<(PathBuf, Vec<i16>)>>(); // I would *love* to not have to collect here but I'll deal with that later

// Multiprocessing begins here:
// FFT calculation
let spectrograms = waveforms
.par_iter()
.map(move |(filepath, waveform)| (filepath.to_owned(), lib::compute_spectrogram(waveform.to_owned(), window_size, overlap)))
.filter_map(|(filepath, spectrogram)| {
match spectrogram {
Ok(s) => Some((filepath, s)),
Err(e) => {
let filepath = filepath.to_string_lossy();
eprintln!("Problem calculating spectrogram for {}: {}", filepath, e);
None
}
}
});

// Graphing
spectrograms.for_each(move |(audio_filepath, spectrogram)| {
let output_path = std::path::Path::new(output_path)
.join(
audio_filepath
.with_extension("png")
.file_name()
.unwrap_or(OsStr::new("output.png"))
);

let (num_samples, num_freq_bins) = match spectrogram.shape() {
&[num_rows, num_columns] => (num_rows, num_columns),
_ => panic!("Windows is a {}D array, expected a 2D array", spectrogram.ndim())
};
let image_height = num_freq_bins;
let image_width = max(image_height, num_samples);
// Eventually I want to replace with this function below, currently I end up truncating the spectrogram
// let image_width = max(image_height, (f32::sqrt(image_height as f32) * f32::sqrt(num_samples as f32)).round() as usize);
println!("Generating a {} wide x {} high image at {}", image_width, image_height, output_path.to_string_lossy());

let image_dimensions: (u32, u32) = (image_width as u32, image_height as u32);
let root =
BitMapBackend::new(
&output_path,
image_dimensions, // width x height. Worth it if we ever want to resize the graph.
).into_drawing_area();

lib::plot_spectrogram(&spectrogram, &root);
});

Ok(())
}

0 comments on commit 2588e30

Please sign in to comment.