Skip to content

Commit

Permalink
Merge pull request #4 from colonelwatch/high-accuracy
Browse files Browse the repository at this point in the history
High accuracy
  • Loading branch information
colonelwatch authored Aug 16, 2022
2 parents 6b820fa + 5084e7b commit 08bb268
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 24 deletions.
20 changes: 11 additions & 9 deletions ESP32-oled-spectrum/ESP32-oled-spectrum.ino
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include "cq_kernel.h"

// End-user constants, adjust depending on your electrical configuration
const int dB_min = 5; // dB, minimum value to display
const int dB_min = 25; // dB, minimum value to display
const int dB_max = 45; // dB, maximum value to display
const int clip_pin = 15; // Connect LED to this pin to get a clipping indicator (TODO: reimplement)
const adc1_channel_t adc_channel = ADC1_CHANNEL_0; // Connect DC-biased line signal to this, see IDF docs for pin nums
Expand All @@ -33,10 +33,11 @@ const int N_samples = 6144; // FFT length, prime factorication should contain as
const int sampling_frequency = 44100; // Hz, I2S sampling frequency
const int max_freq = 14000; // Hz, last CQT center freq to display, ensure CQT kernels aren't degenerated when changing
const int min_freq = 40; // Hz, first CQT center freq to display, ensure CQT kernels aren't degenerated when changing
const float min_val = 0.225; // see Brown CQT paper for explanation
const int calc_rate = 142; // Hz, calcs pinned to this rate, artifacts on tone tests and fails to meet calc_rate if too high
const int N_columns = 64; // number of columns to display
const int col_width = 1; // px, width of each column
const enum window_type window_type = GAUSSIAN; // shape of CQT kernels
const float min_val = 0.02; // see Brown CQT paper for explanation
const int calc_rate = 120; // Hz, calcs pinned to this rate, artifacts on tone tests and fails to meet calc_rate if too high
const int N_columns = 32; // number of columns to display
const int col_width = 2; // px, width of each column
const int screen_width = 128; // px, width of screen
const int screen_height = 64; // px, height of screen

Expand All @@ -47,6 +48,7 @@ struct cq_kernel_cfg cq_cfg = { // accessed before all other tasks are started,
.fmin = min_freq,
.fmax = max_freq,
.fs = sampling_frequency,
.window_type = window_type,
.min_val = min_val
};
cq_kernels_t kernels; // will point to kernels allocated in dynamic memory
Expand Down Expand Up @@ -79,7 +81,7 @@ void screen_Task_routine(void *pvParameters){

while(true){
#ifdef SPI_SSD1306
const int interpolation_factor = 3; // 3x the "frame rate"
const int interpolation_factor = 2; // 2x the "frame rate"
const int update_rate = calc_rate*interpolation_factor;
if(cycle_state == interpolation_factor-1){
while(!colBuffer_swap_ready); // spin-wait until the buffer is ready
Expand Down Expand Up @@ -110,9 +112,9 @@ void screen_Task_routine(void *pvParameters){
// (called zero-order hold) then filtering
if(cycle_state == 0) x *= interpolation_factor;
else x = 0;
// 2nd-order Butterworth IIR with cutoff at 10Hz (426Hz "sampling") as an interpolator
y[i] = 0.004917646918866*x+0.009835293837732*x_1[i]+0.004917646918866*x_2[i] \
+1.792062605350460*y_1[i]-0.811733193025923*y_2[i];
// 2nd-order Butterworth IIR with cutoff at 10Hz (240Hz "sampling") as an interpolator
y[i] = 0.014401440346511*x+0.028802880693022*x_1[i]+0.014401440346511*x_2[i] \
+1.632993161855452*y_1[i]-0.690598923241497*y_2[i];
#else
// 2nd-order Butterworth IIR with cutoff at 10Hz (89Hz "sampling") as a filter
y[i] = 0.081926471866054*x+0.163852943732109*x_1[i]+0.081926471866054*x_2[i] \
Expand Down
34 changes: 26 additions & 8 deletions ESP32-oled-spectrum/cq_kernel.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,31 @@ void _generate_hamming(kiss_fft_scalar window[], int N){
}
}

// TODO: Offer option of placing hamming window at end of kernel for lower latency?
void _generate_kernel(kiss_fft_cpx kernel[], kiss_fftr_cfg cfg, float f, float fmin, float fs, int N){
// Generates window with hamming in the center and zero everywhere else
void _generate_guassian(kiss_fft_scalar window[], int N){
float sigma = 0.5; // makes a window accurate to -30dB from peak, but smaller sigma is more accurate
for(int i = 0; i < N; i++){
#ifdef FIXED_POINT // If fixed_point, represent window with integers
window[i] = SAMP_MAX*exp(-0.5*pow((i-N/2.0)/(sigma*N/2.0), 2));
#else // Else if floating point, represent window as-is
window[i] = exp(-0.5*pow((i-N/2.0)/(sigma*N/2.0), 2));
#endif
}
}

void _generate_kernel(kiss_fft_cpx kernel[], kiss_fftr_cfg cfg, enum window_type window_type, float f, float fmin, float fs, int N){
// Generates window in the center and zero everywhere else
float factor = f/fmin;
int hamming_N = N/factor; // Scales inversely with frequency (see CQT paper)
kiss_fft_scalar *time_K = calloc(N, sizeof(kiss_fft_scalar));
_generate_hamming(&time_K[(N-hamming_N)/2], hamming_N);
int N_window = N/factor; // Scales inversely with frequency (see CQT paper)
kiss_fft_scalar *time_K = (kiss_fft_scalar*)calloc(N, sizeof(kiss_fft_scalar));

switch(window_type){
case HAMMING:
_generate_hamming(&time_K[(N-N_window)/2], N_window);
break;
case GAUSSIAN:
_generate_guassian(&time_K[(N-N_window)/2], N_window);
break;
}

// Fills window with f Hz wave sampled at fs Hz
for(int i = 0; i < N; i++) time_K[i] *= cos(2*M_PI*(f/fs)*(i-N/2));
Expand All @@ -48,7 +66,7 @@ void _generate_kernel(kiss_fft_cpx kernel[], kiss_fftr_cfg cfg, float f, float f
kernel[i].i *= factor;
}
#else // Else if floating point, follow CQT paper more exactly (normalize with N before FFT)
for(int i = 0; i < N; i++) time_K[i] /= hamming_N;
for(int i = 0; i < N; i++) time_K[i] /= N_window;
kiss_fftr(cfg, time_K, kernel);
#endif

Expand All @@ -71,7 +89,7 @@ struct sparse_arr* generate_kernels(struct cq_kernel_cfg cfg){
// Clears temp_kernel before calling _generate_kernel on it
for(int i = 0; i < cfg.samples; i++) temp_kernel[i].r = temp_kernel[i].i = 0;

_generate_kernel(temp_kernel, fft_cfg, freq[i], cfg.fmin, cfg.fs, cfg.samples);
_generate_kernel(temp_kernel, fft_cfg, cfg.window_type, freq[i], cfg.fmin, cfg.fs, cfg.samples);

// Counts number of elements with a complex magnitude above cfg.min_val in temp_kernel
int n_elems = 0;
Expand Down
9 changes: 8 additions & 1 deletion ESP32-oled-spectrum/cq_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@
extern "C" {
#endif

enum window_type{
HAMMING,
GAUSSIAN
};

/*
* cq_kernel_cfg - holds parameters used by cq_kernel
*
Expand All @@ -34,6 +39,7 @@ struct cq_kernel_cfg{
float fmin;
float fmax;
float fs;
enum window_type window_type;
kiss_fft_scalar min_val; // sparse matrix threshold, see CQT paper
};

Expand All @@ -55,7 +61,8 @@ typedef struct sparse_arr *cq_kernels_t;
// private functions
void _generate_center_freqs(float freq[], int bands, float fmin, float fmax);
void _generate_hamming(kiss_fft_scalar window[], int N);
void _generate_kernel(kiss_fft_cpx K[], kiss_fftr_cfg cfg, float f, float fmin, float fs, int N);
void _generate_gaussian(kiss_fft_scalar window[], int N);
void _generate_kernel(kiss_fft_cpx K[], kiss_fftr_cfg cfg, enum window_type window_type, float f, float fmin, float fs, int N);
kiss_fft_scalar _mag(kiss_fft_cpx x);

// public functions
Expand Down
13 changes: 7 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@

⚠️ **Disclaimer!** ⚠️ This project depends on fixes introduced in the latest (as of writing) ESP32 v2.0.4 core. Upgrade the core through the Arduino boards manager.

ESP32-oled-spectrum is a high-performance, high-resolution audio spectrum visualizer, leveraging what the ESP32 microcontroller and the SSD1306 OLED module uniquely offer. Namely, it's 64 bars of logarithmically-spaced frequencies moving at *426* frames per second, produced from the following:
People generally cannot perceive frequencies beyond 20Hz and 20kHz, and the frequencies within that range are heard logarithmically. [Many recorded pieces of music have a dynamic range that doesn't exceed 23 dB](https://hub.yamaha.com/audio/music/what-is-dynamic-range-and-why-does-it-matter) (coincidentally the range of a VU meter), and we hear that variation in amplitude logarithmically too. [Rhythms beyond 600 BPM (10 Hz) approach the limits of human perception](https://www.youtube.com/watch?v=h3kqBX1j7f8).

ESP32-oled-spectrum is a project that tries to visually represent all music within these constraints--faithfully--as a high-performance, high-resolution audio spectrum visualizer. It leverages what the ESP32 microcontroller and the SSD1306 OLED module uniquely offer. Namely, it's 32 bars of logarithmically-spaced frequencies moving at ~107 fps, the typical maximum refresh rate of the SSD1306. To accomplish this, it involves the following:

* the I2S and I2C/SPI peripherals, not to mention both cores of the ESP32
* a Constant Q transform ([cq_kernel](https://github.com/colonelwatch/cq_kernel)) computed from a 6144-point FFT ([kissfft](https://github.com/mborgerding/kissfft)), 142 times a second
* the CQT essentially emulates sampling a bank of filters
* post-processing, including a 3x interpolation routine
* a Constant Q transform ([cq_kernel](https://github.com/colonelwatch/cq_kernel)) (used to emulate a bank of bandpass filters with Q of ~4.5 and equal group delay) computed from a 6144-point FFT ([kissfft](https://github.com/mborgerding/kissfft))
* post-processing and filters, including a 2x interpolation routine

So, please give this Arduino sketch an upload yourself. Its a project I started in 2019 then continued to improve over the years, and now, cameras literally cannot capture how nice this looks! The promised performance is on a SPI SSD1306 OLED module. I also have a routine for the ubiquitous I2C one, still at a respectable 89 frames per second. Below is an okay amplification circuit into pin 36/VP to get started with.
All that said, it's an Arduino sketch, so please give it an upload and see for yourself! The promised performance is on an SPI SSD1306, but I also have a routine for the ubiquitous I2C one that runs at 89 fps. Below is an okay amplification circuit into pin 36/VP to get started with.

![Amplification circuit](/images/amplification.png)

It presents an impedance of at least 5k, which should be okay for line-level and definitely good with a phone. Alternatively, you can feed a line-level signal biased at half of 3.3 volts. You may also add an LED to pin 15 to get a clipping indicator.
It presents an impedance of at least 5k, which should be okay for line-level and definitely good with a phone. Alternatively, you can plug in any signal that is 3V peak-to-peak and DC-biased at 1.65V. You may also add an LED to pin 15 to get a clipping indicator.
Binary file modified images/closeup.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 08bb268

Please sign in to comment.