From 7613968a01e51cfd36e1d04307d323ec1dab438d Mon Sep 17 00:00:00 2001 From: Yair Chuchem Date: Mon, 25 Mar 2024 15:56:47 +0200 Subject: [PATCH 1/2] DRY in FFT engines: legacy spectrum mirroring not part of the engine This makes the FFT engine wrappers simpler, deduplicates code (reducing potential for bugs in wrappers), at the cost of making the FallbackFFT's redundant work done again if the legacy flag is used. --- modules/juce_dsp/frequency/juce_FFT.cpp | 72 +++++++++---------------- 1 file changed, 25 insertions(+), 47 deletions(-) diff --git a/modules/juce_dsp/frequency/juce_FFT.cpp b/modules/juce_dsp/frequency/juce_FFT.cpp index 481b90537303..e142badc72ed 100644 --- a/modules/juce_dsp/frequency/juce_FFT.cpp +++ b/modules/juce_dsp/frequency/juce_FFT.cpp @@ -30,7 +30,7 @@ struct FFT::Instance { virtual ~Instance() = default; virtual void perform (const Complex* input, Complex* output, bool inverse) const noexcept = 0; - virtual void performRealOnlyForwardTransform (float*, bool) const noexcept = 0; + virtual void performRealOnlyForwardTransform (float*) const noexcept = 0; virtual void performRealOnlyInverseTransform (float*) const noexcept = 0; }; @@ -124,7 +124,7 @@ struct FFTFallback final : public FFT::Instance const size_t maxFFTScratchSpaceToAlloca = 256 * 1024; - void performRealOnlyForwardTransform (float* d, bool) const noexcept override + void performRealOnlyForwardTransform (float* d) const noexcept override { if (size == 1) return; @@ -471,7 +471,7 @@ struct AppleFFT final : public FFT::Instance vDSP_vsmul ((float*) output, 1, &factor, (float*) output, 1, static_cast (size << 1)); } - void performRealOnlyForwardTransform (float* inoutData, bool ignoreNegativeFreqs) const noexcept override + void performRealOnlyForwardTransform (float* inoutData) const noexcept override { auto size = (1 << order); auto* inout = reinterpret_cast*> (inoutData); @@ -481,7 +481,12 @@ struct AppleFFT final : public FFT::Instance vDSP_fft_zrip (fftSetup, &splitInOut, 2, order, kFFTDirection_Forward); vDSP_vsmul (inoutData, 1, &forwardNormalisation, inoutData, 1, static_cast (size << 1)); - mirrorResult (inout, ignoreNegativeFreqs); + // Imaginary part of nyquist and DC frequencies are always zero + // so Apple uses the imaginary part of the DC frequency to store + // the real part of the nyquist frequency + auto* out = reinterpret_cast*> (inoutData); + out[size >> 1] = { out[0].imag(), 0.0 }; + out[0] = { out[0].real(), 0.0 }; } void performRealOnlyInverseTransform (float* inoutData) const noexcept override @@ -503,22 +508,6 @@ struct AppleFFT final : public FFT::Instance private: //============================================================================== - void mirrorResult (Complex* out, bool ignoreNegativeFreqs) const noexcept - { - auto size = (1 << order); - auto i = size >> 1; - - // Imaginary part of nyquist and DC frequencies are always zero - // so Apple uses the imaginary part of the DC frequency to store - // the real part of the nyquist frequency - out[i++] = { out[0].imag(), 0.0 }; - out[0] = { out[0].real(), 0.0 }; - - if (! ignoreNegativeFreqs) - for (; i < size; ++i) - out[i] = std::conj (out[size - i]); - } - static DSPSplitComplex toSplitComplex (Complex* data) noexcept { // this assumes that Complex interleaves real and imaginary parts @@ -684,7 +673,7 @@ struct FFTWImpl : public FFT::Instance } } - void performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept override + void performRealOnlyForwardTransform (float* inputOutputData) const noexcept override { if (order == 0) return; @@ -692,12 +681,6 @@ struct FFTWImpl : public FFT::Instance auto* out = reinterpret_cast*> (inputOutputData); fftw.execute_r2c_fftw (r2c, inputOutputData, out); - - auto size = (1 << order); - - if (! ignoreNegativeFreqs) - for (int i = size >> 1; i < size; ++i) - out[i] = std::conj (out[size - i]); } void performRealOnlyInverseTransform (float* inputOutputData) const noexcept override @@ -784,19 +767,12 @@ struct IntelFFT final : public FFT::Instance DftiComputeForward (c2c, (void*) input, output); } - void performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept override + void performRealOnlyForwardTransform (float* inputOutputData) const noexcept override { if (order == 0) return; DftiComputeForward (c2r, inputOutputData); - - auto* out = reinterpret_cast*> (inputOutputData); - auto size = (1 << order); - - if (! ignoreNegativeFreqs) - for (int i = size >> 1; i < size; ++i) - out[i] = std::conj (out[size - i]); } void performRealOnlyInverseTransform (float* inputOutputData) const noexcept override @@ -851,19 +827,9 @@ class IntelPerformancePrimitivesFFT final : public FFT::Instance } } - void performRealOnlyForwardTransform (float* inoutData, bool ignoreNegativeFreqs) const noexcept override + void performRealOnlyForwardTransform (float* inoutData) const noexcept override { ippsFFTFwd_RToCCS_32f_I (inoutData, real.specPtr, real.workBuf.get()); - - if (order == 0) - return; - - auto* out = reinterpret_cast*> (inoutData); - const auto size = (1 << order); - - if (! ignoreNegativeFreqs) - for (auto i = size >> 1; i < size; ++i) - out[i] = std::conj (out[size - i]); } void performRealOnlyInverseTransform (float* inoutData) const noexcept override @@ -970,7 +936,19 @@ void FFT::perform (const Complex* input, Complex* output, bool inv void FFT::performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept { if (engine != nullptr) - engine->performRealOnlyForwardTransform (inputOutputData, ignoreNegativeFreqs); + engine->performRealOnlyForwardTransform (inputOutputData); + + if (! ignoreNegativeFreqs && size != 1) + { + // Preserve compatibility with legacy implementation + // where the redundant negative frequencies were also generated. + + auto* out = reinterpret_cast*> (inputOutputData); + + if (! ignoreNegativeFreqs) + for (auto i = size >> 1; i < size; ++i) + out[i] = std::conj (out[size - i]); + } } void FFT::performRealOnlyInverseTransform (float* inputOutputData) const noexcept From d722c6ba6a6cdf4997b9f0a6d57e701d4a6d79f9 Mon Sep 17 00:00:00 2001 From: Yair Chuchem Date: Mon, 25 Mar 2024 14:47:03 +0200 Subject: [PATCH 2/2] FFTFallback doesn't require real-FFT buffers to be oversized. Ideally rfft would be implemented more efficiently than doing a full FFT, but the FFTFallback is just a slow fill-in for juce::dsp::FFT to still work in the absence of using an external FFT engine. This makes it function like the other engines in not requiring an oversized buffer. --- modules/juce_dsp/frequency/juce_FFT.cpp | 48 ++++++++++++++++--------- modules/juce_dsp/frequency/juce_FFT.h | 29 +++++++-------- 2 files changed, 44 insertions(+), 33 deletions(-) diff --git a/modules/juce_dsp/frequency/juce_FFT.cpp b/modules/juce_dsp/frequency/juce_FFT.cpp index e142badc72ed..20abb8f9f0f9 100644 --- a/modules/juce_dsp/frequency/juce_FFT.cpp +++ b/modules/juce_dsp/frequency/juce_FFT.cpp @@ -131,16 +131,23 @@ struct FFTFallback final : public FFT::Instance const size_t scratchSize = 16 + (size_t) size * sizeof (Complex); - if (scratchSize < maxFFTScratchSpaceToAlloca) + if (scratchSize * 2 < maxFFTScratchSpaceToAlloca) { JUCE_BEGIN_IGNORE_WARNINGS_MSVC (6255) - performRealOnlyForwardTransform (static_cast*> (alloca (scratchSize)), d); + performRealOnlyForwardTransform ( + static_cast*> (alloca (scratchSize)), + static_cast*> (alloca (scratchSize)), + d); JUCE_END_IGNORE_WARNINGS_MSVC } else { - HeapBlock heapSpace (scratchSize); - performRealOnlyForwardTransform (unalignedPointerCast*> (heapSpace.getData()), d); + HeapBlock heapSpaceA (scratchSize); + HeapBlock heapSpaceB (scratchSize); + performRealOnlyForwardTransform ( + unalignedPointerCast*> (heapSpaceA.getData()), + unalignedPointerCast*> (heapSpaceB.getData()), + d); } } @@ -154,38 +161,45 @@ struct FFTFallback final : public FFT::Instance if (scratchSize < maxFFTScratchSpaceToAlloca) { JUCE_BEGIN_IGNORE_WARNINGS_MSVC (6255) - performRealOnlyInverseTransform (static_cast*> (alloca (scratchSize)), d); + performRealOnlyInverseTransform ( + static_cast*> (alloca (scratchSize)), + static_cast*> (alloca (scratchSize)), + d); JUCE_END_IGNORE_WARNINGS_MSVC } else { - HeapBlock heapSpace (scratchSize); - performRealOnlyInverseTransform (unalignedPointerCast*> (heapSpace.getData()), d); + HeapBlock heapSpaceA (scratchSize); + HeapBlock heapSpaceB (scratchSize); + performRealOnlyInverseTransform ( + unalignedPointerCast*> (heapSpaceA.getData()), + unalignedPointerCast*> (heapSpaceB.getData()), + d); } } - void performRealOnlyForwardTransform (Complex* scratch, float* d) const noexcept + void performRealOnlyForwardTransform (Complex* scratchA, Complex* scratchB, float* d) const noexcept { for (int i = 0; i < size; ++i) - scratch[i] = { d[i], 0 }; + scratchA[i] = { d[i], 0 }; - perform (scratch, reinterpret_cast*> (d), false); + perform (scratchA, scratchB, false); + memcpy (d, scratchB, sizeof(Complex) * ((size_t) size / 2 + 1)); } - void performRealOnlyInverseTransform (Complex* scratch, float* d) const noexcept + void performRealOnlyInverseTransform (Complex* scratchA, Complex* scratchB, float* d) const noexcept { auto* input = reinterpret_cast*> (d); + for (int i = 0; i < size >> 1; ++i) + scratchB[i] = input[i]; for (int i = size >> 1; i < size; ++i) - input[i] = std::conj (input[size - i]); + scratchB[i] = std::conj (input[size - i]); - perform (input, scratch, true); + perform (scratchB, scratchA, true); for (int i = 0; i < size; ++i) - { - d[i] = scratch[i].real(); - d[i + size] = scratch[i].imag(); - } + d[i] = scratchA[i].real(); } //============================================================================== diff --git a/modules/juce_dsp/frequency/juce_FFT.h b/modules/juce_dsp/frequency/juce_FFT.h index fd897ca69840..3584ca21c975 100644 --- a/modules/juce_dsp/frequency/juce_FFT.h +++ b/modules/juce_dsp/frequency/juce_FFT.h @@ -67,19 +67,17 @@ class JUCE_API FFT As the coefficients of the negative frequencies (frequencies higher than N/2 or pi) are the complex conjugate of their positive counterparts, - it may not be necessary to calculate them for your particular application. + it is usually unnecessary to calculate them. You can use onlyCalculateNonNegativeFrequencies to let the FFT - engine know that you do not plan on using them. Note that this is only a - hint: some FFT engines (currently only the Fallback engine), will still - calculate the negative frequencies even if onlyCalculateNonNegativeFrequencies - is true. - - The size of the array passed in must be 2 * getSize(), and the first half - should contain your raw input sample data. On return, if - onlyCalculateNonNegativeFrequencies is false, the array will contain size - complex real + imaginary parts data interleaved. If - onlyCalculateNonNegativeFrequencies is true, the array will contain at least - (size / 2) + 1 complex numbers. Both outputs can be passed to + engine know that you do not plan on using them. + + The array size must be large enough for the outputs, + which are of size (2 * getSize()) by default or (2 + getSize()) if enabling onlyCalculateNonNegativeFrequencies. + The first getSize() elements should contain your raw input sample data. + + On return, if the array will contain complex real + imaginary parts data interleaved. + If onlyCalculateNonNegativeFrequencies is true, the array will contain + (size / 2) + 1 complex numbers. The outputs can be passed to performRealOnlyInverseTransform() in order to convert it back to reals. */ void performRealOnlyForwardTransform (float* inputOutputData, @@ -87,10 +85,9 @@ class JUCE_API FFT /** Performs a reverse operation to data created in performRealOnlyForwardTransform(). - Although performRealOnlyInverseTransform will only use the first ((size / 2) + 1) - complex numbers, the size of the array passed in must still be 2 * getSize(), as some - FFT engines require the extra space for the calculation. On return, the first half of the - array will contain the reconstituted samples. + The size of the array passed in must still be getSize() + 2, + which contains the ((size / 2) + 1) complex numbers representing the non-negative frequencies of the DFT. + On return, the first getSize() elements of the array will contain the reconstituted samples. */ void performRealOnlyInverseTransform (float* inputOutputData) const noexcept;