Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFFT improvement: remove the double size memory requirement #1367

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 56 additions & 64 deletions modules/juce_dsp/frequency/juce_FFT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ struct FFT::Instance
{
virtual ~Instance() = default;
virtual void perform (const Complex<float>* input, Complex<float>* output, bool inverse) const noexcept = 0;
virtual void performRealOnlyForwardTransform (float*, bool) const noexcept = 0;
virtual void performRealOnlyForwardTransform (float*) const noexcept = 0;
virtual void performRealOnlyInverseTransform (float*) const noexcept = 0;
};

Expand Down Expand Up @@ -124,23 +124,30 @@ struct FFTFallback final : public FFT::Instance

const size_t maxFFTScratchSpaceToAlloca = 256 * 1024;

void performRealOnlyForwardTransform (float* d, bool) const noexcept override
void performRealOnlyForwardTransform (float* d) const noexcept override
{
if (size == 1)
return;

const size_t scratchSize = 16 + (size_t) size * sizeof (Complex<float>);

if (scratchSize < maxFFTScratchSpaceToAlloca)
if (scratchSize * 2 < maxFFTScratchSpaceToAlloca)
{
JUCE_BEGIN_IGNORE_WARNINGS_MSVC (6255)
performRealOnlyForwardTransform (static_cast<Complex<float>*> (alloca (scratchSize)), d);
performRealOnlyForwardTransform (
static_cast<Complex<float>*> (alloca (scratchSize)),
static_cast<Complex<float>*> (alloca (scratchSize)),
d);
JUCE_END_IGNORE_WARNINGS_MSVC
}
else
{
HeapBlock<char> heapSpace (scratchSize);
performRealOnlyForwardTransform (unalignedPointerCast<Complex<float>*> (heapSpace.getData()), d);
HeapBlock<char> heapSpaceA (scratchSize);
HeapBlock<char> heapSpaceB (scratchSize);
performRealOnlyForwardTransform (
unalignedPointerCast<Complex<float>*> (heapSpaceA.getData()),
unalignedPointerCast<Complex<float>*> (heapSpaceB.getData()),
d);
}
}

Expand All @@ -154,38 +161,45 @@ struct FFTFallback final : public FFT::Instance
if (scratchSize < maxFFTScratchSpaceToAlloca)
{
JUCE_BEGIN_IGNORE_WARNINGS_MSVC (6255)
performRealOnlyInverseTransform (static_cast<Complex<float>*> (alloca (scratchSize)), d);
performRealOnlyInverseTransform (
static_cast<Complex<float>*> (alloca (scratchSize)),
static_cast<Complex<float>*> (alloca (scratchSize)),
d);
JUCE_END_IGNORE_WARNINGS_MSVC
}
else
{
HeapBlock<char> heapSpace (scratchSize);
performRealOnlyInverseTransform (unalignedPointerCast<Complex<float>*> (heapSpace.getData()), d);
HeapBlock<char> heapSpaceA (scratchSize);
HeapBlock<char> heapSpaceB (scratchSize);
performRealOnlyInverseTransform (
unalignedPointerCast<Complex<float>*> (heapSpaceA.getData()),
unalignedPointerCast<Complex<float>*> (heapSpaceB.getData()),
d);
}
}

void performRealOnlyForwardTransform (Complex<float>* scratch, float* d) const noexcept
void performRealOnlyForwardTransform (Complex<float>* scratchA, Complex<float>* scratchB, float* d) const noexcept
{
for (int i = 0; i < size; ++i)
scratch[i] = { d[i], 0 };
scratchA[i] = { d[i], 0 };

perform (scratch, reinterpret_cast<Complex<float>*> (d), false);
perform (scratchA, scratchB, false);
memcpy (d, scratchB, sizeof(Complex<float>) * ((size_t) size / 2 + 1));
}

void performRealOnlyInverseTransform (Complex<float>* scratch, float* d) const noexcept
void performRealOnlyInverseTransform (Complex<float>* scratchA, Complex<float>* scratchB, float* d) const noexcept
{
auto* input = reinterpret_cast<Complex<float>*> (d);

for (int i = 0; i < size >> 1; ++i)
scratchB[i] = input[i];
for (int i = size >> 1; i < size; ++i)
input[i] = std::conj (input[size - i]);
scratchB[i] = std::conj (input[size - i]);

perform (input, scratch, true);
perform (scratchB, scratchA, true);

for (int i = 0; i < size; ++i)
{
d[i] = scratch[i].real();
d[i + size] = scratch[i].imag();
}
d[i] = scratchA[i].real();
}

//==============================================================================
Expand Down Expand Up @@ -471,7 +485,7 @@ struct AppleFFT final : public FFT::Instance
vDSP_vsmul ((float*) output, 1, &factor, (float*) output, 1, static_cast<size_t> (size << 1));
}

void performRealOnlyForwardTransform (float* inoutData, bool ignoreNegativeFreqs) const noexcept override
void performRealOnlyForwardTransform (float* inoutData) const noexcept override
{
auto size = (1 << order);
auto* inout = reinterpret_cast<Complex<float>*> (inoutData);
Expand All @@ -481,7 +495,12 @@ struct AppleFFT final : public FFT::Instance
vDSP_fft_zrip (fftSetup, &splitInOut, 2, order, kFFTDirection_Forward);
vDSP_vsmul (inoutData, 1, &forwardNormalisation, inoutData, 1, static_cast<size_t> (size << 1));

mirrorResult (inout, ignoreNegativeFreqs);
// Imaginary part of nyquist and DC frequencies are always zero
// so Apple uses the imaginary part of the DC frequency to store
// the real part of the nyquist frequency
auto* out = reinterpret_cast<Complex<float>*> (inoutData);
out[size >> 1] = { out[0].imag(), 0.0 };
out[0] = { out[0].real(), 0.0 };
}

void performRealOnlyInverseTransform (float* inoutData) const noexcept override
Expand All @@ -503,22 +522,6 @@ struct AppleFFT final : public FFT::Instance

private:
//==============================================================================
void mirrorResult (Complex<float>* out, bool ignoreNegativeFreqs) const noexcept
{
auto size = (1 << order);
auto i = size >> 1;

// Imaginary part of nyquist and DC frequencies are always zero
// so Apple uses the imaginary part of the DC frequency to store
// the real part of the nyquist frequency
out[i++] = { out[0].imag(), 0.0 };
out[0] = { out[0].real(), 0.0 };

if (! ignoreNegativeFreqs)
for (; i < size; ++i)
out[i] = std::conj (out[size - i]);
}

static DSPSplitComplex toSplitComplex (Complex<float>* data) noexcept
{
// this assumes that Complex interleaves real and imaginary parts
Expand Down Expand Up @@ -684,20 +687,14 @@ struct FFTWImpl : public FFT::Instance
}
}

void performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept override
void performRealOnlyForwardTransform (float* inputOutputData) const noexcept override
{
if (order == 0)
return;

auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);

fftw.execute_r2c_fftw (r2c, inputOutputData, out);

auto size = (1 << order);

if (! ignoreNegativeFreqs)
for (int i = size >> 1; i < size; ++i)
out[i] = std::conj (out[size - i]);
}

void performRealOnlyInverseTransform (float* inputOutputData) const noexcept override
Expand Down Expand Up @@ -784,19 +781,12 @@ struct IntelFFT final : public FFT::Instance
DftiComputeForward (c2c, (void*) input, output);
}

void performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept override
void performRealOnlyForwardTransform (float* inputOutputData) const noexcept override
{
if (order == 0)
return;

DftiComputeForward (c2r, inputOutputData);

auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);
auto size = (1 << order);

if (! ignoreNegativeFreqs)
for (int i = size >> 1; i < size; ++i)
out[i] = std::conj (out[size - i]);
}

void performRealOnlyInverseTransform (float* inputOutputData) const noexcept override
Expand Down Expand Up @@ -851,19 +841,9 @@ class IntelPerformancePrimitivesFFT final : public FFT::Instance
}
}

void performRealOnlyForwardTransform (float* inoutData, bool ignoreNegativeFreqs) const noexcept override
void performRealOnlyForwardTransform (float* inoutData) const noexcept override
{
ippsFFTFwd_RToCCS_32f_I (inoutData, real.specPtr, real.workBuf.get());

if (order == 0)
return;

auto* out = reinterpret_cast<Complex<float>*> (inoutData);
const auto size = (1 << order);

if (! ignoreNegativeFreqs)
for (auto i = size >> 1; i < size; ++i)
out[i] = std::conj (out[size - i]);
}

void performRealOnlyInverseTransform (float* inoutData) const noexcept override
Expand Down Expand Up @@ -970,7 +950,19 @@ void FFT::perform (const Complex<float>* input, Complex<float>* output, bool inv
void FFT::performRealOnlyForwardTransform (float* inputOutputData, bool ignoreNegativeFreqs) const noexcept
{
if (engine != nullptr)
engine->performRealOnlyForwardTransform (inputOutputData, ignoreNegativeFreqs);
engine->performRealOnlyForwardTransform (inputOutputData);

if (! ignoreNegativeFreqs && size != 1)
{
// Preserve compatibility with legacy implementation
// where the redundant negative frequencies were also generated.

auto* out = reinterpret_cast<Complex<float>*> (inputOutputData);

if (! ignoreNegativeFreqs)
for (auto i = size >> 1; i < size; ++i)
out[i] = std::conj (out[size - i]);
}
}

void FFT::performRealOnlyInverseTransform (float* inputOutputData) const noexcept
Expand Down
29 changes: 13 additions & 16 deletions modules/juce_dsp/frequency/juce_FFT.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,30 +67,27 @@ class JUCE_API FFT

As the coefficients of the negative frequencies (frequencies higher than
N/2 or pi) are the complex conjugate of their positive counterparts,
it may not be necessary to calculate them for your particular application.
it is usually unnecessary to calculate them.
You can use onlyCalculateNonNegativeFrequencies to let the FFT
engine know that you do not plan on using them. Note that this is only a
hint: some FFT engines (currently only the Fallback engine), will still
calculate the negative frequencies even if onlyCalculateNonNegativeFrequencies
is true.

The size of the array passed in must be 2 * getSize(), and the first half
should contain your raw input sample data. On return, if
onlyCalculateNonNegativeFrequencies is false, the array will contain size
complex real + imaginary parts data interleaved. If
onlyCalculateNonNegativeFrequencies is true, the array will contain at least
(size / 2) + 1 complex numbers. Both outputs can be passed to
engine know that you do not plan on using them.

The array size must be large enough for the outputs,
which are of size (2 * getSize()) by default or (2 + getSize()) if enabling onlyCalculateNonNegativeFrequencies.
The first getSize() elements should contain your raw input sample data.

On return, if the array will contain complex real + imaginary parts data interleaved.
If onlyCalculateNonNegativeFrequencies is true, the array will contain
(size / 2) + 1 complex numbers. The outputs can be passed to
performRealOnlyInverseTransform() in order to convert it back to reals.
*/
void performRealOnlyForwardTransform (float* inputOutputData,
bool onlyCalculateNonNegativeFrequencies = false) const noexcept;

/** Performs a reverse operation to data created in performRealOnlyForwardTransform().

Although performRealOnlyInverseTransform will only use the first ((size / 2) + 1)
complex numbers, the size of the array passed in must still be 2 * getSize(), as some
FFT engines require the extra space for the calculation. On return, the first half of the
array will contain the reconstituted samples.
The size of the array passed in must still be getSize() + 2,
which contains the ((size / 2) + 1) complex numbers representing the non-negative frequencies of the DFT.
On return, the first getSize() elements of the array will contain the reconstituted samples.
*/
void performRealOnlyInverseTransform (float* inputOutputData) const noexcept;

Expand Down