diff --git a/libraries/lib-fft/RealFFTf.cpp b/libraries/lib-fft/RealFFTf.cpp index d0c7efcbb9c6..0be77406c2ec 100644 --- a/libraries/lib-fft/RealFFTf.cpp +++ b/libraries/lib-fft/RealFFTf.cpp @@ -83,13 +83,6 @@ HFFT InitializeFFT(size_t fftlen) h->SinTable[h->BitReversed[i]+1]=(fft_type)-cos(2*M_PI*i/(2*h->Points)); } -#ifdef EXPERIMENTAL_EQ_SSE_THREADED - // NEW SSE FFT routines work on live data - for(size_t i = 0; i < 32; i++) - if((1 << i) & fftlen) - h->pow2Bits = i; -#endif - return h; } diff --git a/libraries/lib-fft/RealFFTf.h b/libraries/lib-fft/RealFFTf.h index 7a0ee682e6aa..86a53d6ae2ad 100644 --- a/libraries/lib-fft/RealFFTf.h +++ b/libraries/lib-fft/RealFFTf.h @@ -8,9 +8,6 @@ struct FFTParam { ArrayOf BitReversed; ArrayOf SinTable; size_t Points; -#ifdef EXPERIMENTAL_EQ_SSE_THREADED - int pow2Bits; -#endif }; struct FFT_API FFTDeleter{ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4d3f715966ae..48a2ecdbdf4a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -352,8 +352,6 @@ list( APPEND SOURCES effects/EffectUIServices.h effects/Equalization.cpp effects/Equalization.h - effects/Equalization48x.cpp - effects/Equalization48x.h effects/EqualizationBandSliders.cpp effects/EqualizationBandSliders.h effects/EqualizationCurves.cpp diff --git a/src/Experimental.cmake b/src/Experimental.cmake index 5449718e4c04..6d3619393f67 100644 --- a/src/Experimental.cmake +++ b/src/Experimental.cmake @@ -28,9 +28,6 @@ ]] set( EXPERIMENTAL_OPTIONS_LIST - # ACH 08 Jan 2014 - # EQ accelerated code - #EQ_SSE_THREADED ) # Now define the flags diff --git a/src/effects/Equalization48x.cpp b/src/effects/Equalization48x.cpp deleted file mode 100644 index 80236de62f75..000000000000 --- a/src/effects/Equalization48x.cpp +++ /dev/null @@ -1,1318 +0,0 @@ -/********************************************************************** - - Audacity: A Digital Audio Editor - - EffectEqualization.cpp - - Andrew Hallendorff - -*******************************************************************//** - - \file Equalization48x.cpp - \brief Fast SSE based implementation of equalization. - -*//****************************************************************/ - - -#include "Equalization48x.h" - -#ifdef EXPERIMENTAL_EQ_SSE_THREADED -#include "../Project.h" -#include "Equalization.h" -#include "WaveClip.h" -#include "WaveTrack.h" -#include "../float_cast.h" -#include - -#include // for wxUSE_* macros - -#if wxUSE_TOOLTIPS -#include -#endif - -#include - -#include "../RealFFTf48x.h" - -#ifndef USE_SSE2 -#define USE_SSE2 -#endif - -#include - -#ifdef __WXMSW__ -#include -#endif - -#include -#include - -#ifdef _WIN32 - -// Windows -#include -#define cpuid __cpuid - -#else - -// GCC Inline Assembly -void cpuid(int CPUInfo[4],int InfoType){ - __asm__ __volatile__ ( - "cpuid": - "=a" (CPUInfo[0]), - "=b" (CPUInfo[1]), - "=c" (CPUInfo[2]), - "=d" (CPUInfo[3]) : - "a" (InfoType) - ); -} - -#endif - -bool sMathCapsInitialized = false; - -MathCaps sMathCaps; - -// dirty switcher -int sMathPath=MATH_FUNCTION_SSE|MATH_FUNCTION_THREADED; - -void EffectEqualization48x::SetMathPath(int mathPath) { sMathPath=mathPath; }; - -int EffectEqualization48x::GetMathPath() { return sMathPath; }; - -void EffectEqualization48x::AddMathPathOption(int mathPath) { sMathPath|=mathPath; }; - -void EffectEqualization48x::RemoveMathPathOption(int mathPath) { sMathPath&=~mathPath; }; - -MathCaps *EffectEqualization48x::GetMathCaps() -{ - if(!sMathCapsInitialized) - { - sMathCapsInitialized=true; - sMathCaps.x64 = false; - sMathCaps.MMX = false; - sMathCaps.SSE = false; - sMathCaps.SSE2 = false; - sMathCaps.SSE3 = false; - sMathCaps.SSSE3 = false; - sMathCaps.SSE41 = false; - sMathCaps.SSE42 = false; - sMathCaps.SSE4a = false; - sMathCaps.AVX = false; - sMathCaps.XOP = false; - sMathCaps.FMA3 = false; - sMathCaps.FMA4 = false; - - int info[4]; - cpuid(info, 0); - int nIds = info[0]; - - cpuid(info, 0x80000000); - int nExIds = info[0]; - - // Detect Instruction Set - if (nIds >= 1){ - cpuid(info,0x00000001); - sMathCaps.MMX = (info[3] & ((int)1 << 23)) != 0; - sMathCaps.SSE = (info[3] & ((int)1 << 25)) != 0; - sMathCaps.SSE2 = (info[3] & ((int)1 << 26)) != 0; - sMathCaps.SSE3 = (info[2] & ((int)1 << 0)) != 0; - - sMathCaps.SSSE3 = (info[2] & ((int)1 << 9)) != 0; - sMathCaps.SSE41 = (info[2] & ((int)1 << 19)) != 0; - sMathCaps.SSE42 = (info[2] & ((int)1 << 20)) != 0; - - sMathCaps.AVX = (info[2] & ((int)1 << 28)) != 0; - sMathCaps.FMA3 = (info[2] & ((int)1 << 12)) != 0; - } - - if (nExIds >= 0x80000001){ - cpuid(info,0x80000001); - sMathCaps.x64 = (info[3] & ((int)1 << 29)) != 0; - sMathCaps.SSE4a = (info[2] & ((int)1 << 6)) != 0; - sMathCaps.FMA4 = (info[2] & ((int)1 << 16)) != 0; - sMathCaps.XOP = (info[2] & ((int)1 << 11)) != 0; - } - if(sMathCaps.SSE) - sMathPath=MATH_FUNCTION_SSE|MATH_FUNCTION_THREADED; // we are starting on. - } - return &sMathCaps; -}; - -void * malloc_simd(const size_t size) -{ -#if defined WIN32 // WIN32 - return _aligned_malloc(size, 16); -#elif defined __linux__ // Linux - return memalign (16, size); -#elif defined __MACH__ // Mac OS X - return malloc(size); -#else // other (use valloc for page-aligned memory) - return valloc(size); -#endif -} - -void free_simd::operator() (void* mem) const -{ -#if defined WIN32 // WIN32 - _aligned_free(mem); -#else - free(mem); -#endif -} - -EffectEqualization48x::EffectEqualization48x(): - mThreadCount(0),mFilterSize(0),mWindowSize(0),mBlockSize(0),mWorkerDataCount(0),mBlocksPerBuffer(20), - mScratchBufferSize(0),mSubBufferSize(0),mThreaded(false), - mBenching(false),mBufferCount(0) -{ -} - -EffectEqualization48x::~EffectEqualization48x() -{ -} - -bool EffectEqualization48x::AllocateBuffersWorkers(int nThreads) -{ - if(mBigBuffer) - FreeBuffersWorkers(); - mFilterSize=(mEffectEqualization->mM-1)&(~15); // 4000 !!! Filter MUST BE QUAD WORD ALIGNED !!!! - mWindowSize=mEffectEqualization->windowSize; - wxASSERT(mFilterSize < mWindowSize); - mBlockSize=mWindowSize-mFilterSize; // 12,384 - auto threadCount = wxThread::GetCPUCount(); - mThreaded = (nThreads > 0 && threadCount > 0); - if(mThreaded) - { - mThreadCount = threadCount; - mWorkerDataCount=mThreadCount+2; // 2 extra slots (maybe double later) - } else { - mWorkerDataCount=1; - mThreadCount=0; - } -#ifdef __AVX_ENABLED - mBufferCount=sMathPath&MATH_FUNCTION_AVX?8:4; -#else - mBufferCount=4; -#endif - // we're skewing the data by one block to allow for 1/4 block intersections. - // this will remove the disparity in data at the intersections of the runs - - // The nice magic allocation - // megabyte - 3 windows - 4 overlapping buffers - filter - // 2^20 = 1,048,576 - 3 * 2^14 (16,384) - ((4 * 20) - 3) * 12,384 - 4000 - // 1,048,576 - 49,152 - 953,568 - 4000 = 41,856 (leftover) - - mScratchBufferSize=mWindowSize*3*sizeof(float)*mBufferCount; // 3 window size blocks of instruction size - mSubBufferSize=mBlockSize*(mBufferCount*(mBlocksPerBuffer-1)); // we are going to do a full block overlap - mBigBuffer.reset( (float *)malloc_simd(sizeof(float) * (mSubBufferSize + mFilterSize + mScratchBufferSize) * mWorkerDataCount) ); // we run over by filtersize - // fill the bufferInfo - mBufferInfo.reinit(mWorkerDataCount); - for(int i=0;iProcessOne(count, track, start, len); - break; - } - return false; -} -#pragma warning(pop) - -bool EffectEqualization48x::Process(EffectEqualization* effectEqualization) -{ - mEffectEqualization=effectEqualization; -// return TrackCompare(); // used for debugging data - mEffectEqualization->CopyInputTracks(); // Set up mOutputTracks. - bool bBreakLoop = false; - - TableUsage(sMathPath); - if(sMathPath) // !!! Filter MUST BE QUAD WORD ALIGNED !!!! - mEffectEqualization->mM=(mEffectEqualization->mM&(~15))+1; - AllocateBuffersWorkers(sMathPath&MATH_FUNCTION_THREADED); - auto cleanup = finally( [&] { FreeBuffersWorkers(); } ); - int count = 0; - for( auto track : - mEffectEqualization->mOutputTracks->Selected< WaveTrack >() ) { - double trackStart = track->GetStartTime(); - double trackEnd = track->GetEndTime(); - double t0 = mEffectEqualization->mT0 < trackStart? trackStart: mEffectEqualization->mT0; - double t1 = mEffectEqualization->mT1 > trackEnd? trackEnd: mEffectEqualization->mT1; - - if (t1 > t0) { - auto start = track->TimeToLongSamples(t0); - auto end = track->TimeToLongSamples(t1); - auto len = end - start; - bBreakLoop=RunFunctionSelect(sMathPath, count, track, start, len); - if( bBreakLoop ) - break; - } - count++; - } - - mEffectEqualization->ReplaceProcessedTracks(!bBreakLoop); - return !bBreakLoop; -} - -bool EffectEqualization48x::TrackCompare() -{ - mEffectEqualization->CopyInputTracks(); // Set up mOutputTracks. - bool bBreakLoop = false; - - TableUsage(sMathPath); - if(sMathPath) // !!! Filter MUST BE QUAD WORD ALIGNED !!!! - mEffectEqualization->mM=(mEffectEqualization->mM&(~15))+1; - AllocateBuffersWorkers(sMathPath&MATH_FUNCTION_THREADED); - auto cleanup = finally( [&] { FreeBuffersWorkers(); } ); - // Reset map - // PRL: These two maps aren't really used - std::vector SecondIMap; - std::vector SecondOMap; - SecondIMap.clear(); - SecondOMap.clear(); - - auto pSecondOutputTracks = TrackList::Create( nullptr ); - auto &SecondOutputTracks = *pSecondOutputTracks; - - for (auto aTrack : - mEffectEqualization->inputTracks()->Any< const WaveTrack >()) { - - // Include selected tracks, plus sync-lock selected tracks - if (aTrack->GetSelected() || aTrack->IsSyncLockSelected()) - { - auto o = mEffectEqualization->mFactory->DuplicateWaveTrack( *aTrack ); - SecondIMap.push_back(aTrack); - SecondIMap.push_back(o.get()); - SecondOutputTracks.Add( o ); - } - } - - for(int i = 0; i < 2; i++) { - i?sMathPath=sMathPath:sMathPath=0; - int count = 0; - for( auto track : - ( i ? mEffectEqualization->mOutputTracks.get() - : &SecondOutputTracks ) -> Selected< WaveTrack >() ) { - double trackStart = track->GetStartTime(); - double trackEnd = track->GetEndTime(); - double t0 = mEffectEqualization->mT0 < trackStart? trackStart: mEffectEqualization->mT0; - double t1 = mEffectEqualization->mT1 > trackEnd? trackEnd: mEffectEqualization->mT1; - - if (t1 > t0) { - auto start = track->TimeToLongSamples(t0); - auto end = track->TimeToLongSamples(t1); - auto len = end - start; - bBreakLoop=RunFunctionSelect(sMathPath, count, track, start, len); - if( bBreakLoop ) - break; - } - count++; - } - } - - auto iter2 = (SecondOutputTracks.Selected< const WaveTrack >()).first; - auto track2 = *iter2; - for ( auto track : - mEffectEqualization->mOutputTracks->Selected< WaveTrack >() ) { - double trackStart = track->GetStartTime(); - double trackEnd = track->GetEndTime(); - double t0 = mEffectEqualization->mT0 < trackStart? trackStart: mEffectEqualization->mT0; - double t1 = mEffectEqualization->mT1 > trackEnd? trackEnd: mEffectEqualization->mT1; - - if (t1 > t0) { - auto start = track->TimeToLongSamples(t0); - auto end = track->TimeToLongSamples(t1); - auto len = end - start; - DeltaTrack(track, track2, start, len); - } - track2 = * ++iter2; - } - mEffectEqualization->ReplaceProcessedTracks(!bBreakLoop); - return bBreakLoop; // return !bBreakLoop ? -} - -bool EffectEqualization48x::DeltaTrack( - WaveTrack * t, const WaveTrack * t2, sampleCount start, sampleCount len) -{ - - auto trackBlockSize = t->GetMaxBlockSize(); - - Floats buffer1{ trackBlockSize }; - Floats buffer2{ trackBlockSize }; - - auto output = t->EmptyCopy(); - t->ConvertToSampleFormat( floatSample ); - auto originalLen = len; - auto currentSample = start; - - while(len > 0) { - auto curretLength = limitSampleBufferSize(trackBlockSize, len); - t->Get((samplePtr)buffer1.get(), floatSample, currentSample, curretLength); - t2->Get((samplePtr)buffer2.get(), floatSample, currentSample, curretLength); - for(decltype(curretLength) i=0;iAppend((samplePtr)buffer1.get(), floatSample, curretLength); - currentSample+=curretLength; - len-=curretLength; - } - output->Flush(); - len=originalLen; - ProcessTail(t, output.get(), start, len); - return true; -} - -#include - -bool EffectEqualization48x::Benchmark(EffectEqualization* effectEqualization) -{ - mEffectEqualization=effectEqualization; - mEffectEqualization->CopyInputTracks(); // Set up mOutputTracks. - bool bBreakLoop = false; - - TableUsage(sMathPath); - if(sMathPath) // !!! Filter MUST BE QUAD WORD ALIGNED !!!! - mEffectEqualization->mM=(mEffectEqualization->mM&(~15))+1; - AllocateBuffersWorkers(MATH_FUNCTION_THREADED); - auto cleanup = finally( [&] { FreeBuffersWorkers(); } ); - long times[] = { 0,0,0,0,0 }; - wxStopWatch timer; - mBenching = true; - for(int i = 0; i < 5 && !bBreakLoop; i++) { - int localMathPath; - switch(i) { - case 0: localMathPath=MATH_FUNCTION_SSE|MATH_FUNCTION_THREADED; - if(!sMathCaps.SSE) - localMathPath=-1; - break; - case 1: localMathPath=MATH_FUNCTION_SSE; - if(!sMathCaps.SSE) - localMathPath=-1; - break; - case 2: localMathPath=MATH_FUNCTION_SEGMENTED_CODE; - break; - case 3: localMathPath=MATH_FUNCTION_THREADED|MATH_FUNCTION_SEGMENTED_CODE; - break; - case 4: localMathPath=0; - break; - default: localMathPath=-1; - } - if(localMathPath >= 0) { - timer.Start(); - int count = 0; - for (auto track : - mEffectEqualization->mOutputTracks->Selected< WaveTrack >() ) { - double trackStart = track->GetStartTime(); - double trackEnd = track->GetEndTime(); - double t0 = mEffectEqualization->mT0 < trackStart? trackStart: mEffectEqualization->mT0; - double t1 = mEffectEqualization->mT1 > trackEnd? trackEnd: mEffectEqualization->mT1; - - if (t1 > t0) { - auto start = track->TimeToLongSamples(t0); - auto end = track->TimeToLongSamples(t1); - auto len = end - start; - bBreakLoop=RunFunctionSelect( localMathPath, count, track, start, len); - if( bBreakLoop ) - break; - } - count++; - } - times[i]=timer.Time(); - } - } - mBenching=false; - bBreakLoop=false; - mEffectEqualization->ReplaceProcessedTracks(bBreakLoop); - - wxTimeSpan tsSSEThreaded(0, 0, 0, times[0]); - wxTimeSpan tsSSE(0, 0, 0, times[1]); - wxTimeSpan tsDefaultEnhanced(0, 0, 0, times[2]); - wxTimeSpan tsDefaultThreaded(0, 0, 0, times[3]); - wxTimeSpan tsDefault(0, 0, 0, times[4]); - - mEffectEqualization->MessageBox( - XO( -"Benchmark times:\nOriginal: %s\nDefault Segmented: %s\nDefault Threaded: %s\nSSE: %s\nSSE Threaded: %s\n") - .Format( - tsDefault.Format(wxT("%M:%S.%l")), - tsDefaultEnhanced.Format(wxT("%M:%S.%l")), - tsDefaultThreaded.Format(wxT("%M:%S.%l")), - tsSSE.Format(wxT("%M:%S.%l")), - tsSSEThreaded.Format(wxT("%M:%S.%l")) ) ); - return bBreakLoop; // return !bBreakLoop ? -} - -bool EffectEqualization48x::ProcessTail(WaveTrack * t, WaveTrack * output, sampleCount start, sampleCount len) -{ - // double offsetT0 = t->LongSamplesToTime(offset); - double lenT = t->LongSamplesToTime(len); - // 'start' is the sample offset in 't', the passed in track - // 'startT' is the equivalent time value - // 'output' starts at zero - double startT = t->LongSamplesToTime(start); - - //output has one waveclip for the total length, even though - //t might have whitespace separating multiple clips - //we want to maintain the original clip structure, so - //only paste the intersections of the NEW clip. - - //Find the bits of clips that need replacing - std::vector > clipStartEndTimes; - std::vector > clipRealStartEndTimes; //the above may be truncated due to a clip being partially selected - for (const auto &clip: t->GetClips()) - { - double clipStartT; - double clipEndT; - - clipStartT = clip->GetStartTime(); - clipEndT = clip->GetEndTime(); - if( clipEndT <= startT ) - continue; // clip is not within selection - if( clipStartT >= startT + lenT ) - continue; // clip is not within selection - - //save the actual clip start/end so that we can rejoin them after we paste. - clipRealStartEndTimes.push_back(std::pair(clipStartT,clipEndT)); - - if( clipStartT < startT ) // does selection cover the whole clip? - clipStartT = startT; // don't copy all the NEW clip - if( clipEndT > startT + lenT ) // does selection cover the whole clip? - clipEndT = startT + lenT; // don't copy all the NEW clip - - //save them - clipStartEndTimes.push_back(std::pair(clipStartT,clipEndT)); - } - //now go thru and replace the old clips with NEW - for(unsigned int i=0;iClear(clipStartEndTimes[i].first,clipStartEndTimes[i].second); - // output->Copy(clipStartEndTimes[i].first-startT+offsetT0,clipStartEndTimes[i].second-startT+offsetT0, &toClipOutput); - auto toClipOutput = output->Copy(clipStartEndTimes[i].first-startT, clipStartEndTimes[i].second-startT); - //put the processed audio in - t->Paste(clipStartEndTimes[i].first, toClipOutput.get()); - //if the clip was only partially selected, the Paste will have created a split line. Join is needed to take care of this - //This is not true when the selection is fully contained within one clip (second half of conditional) - if( (clipRealStartEndTimes[i].first != clipStartEndTimes[i].first || - clipRealStartEndTimes[i].second != clipStartEndTimes[i].second) && - !(clipRealStartEndTimes[i].first <= startT && - clipRealStartEndTimes[i].second >= startT+lenT) ) - t->Join(clipRealStartEndTimes[i].first,clipRealStartEndTimes[i].second); - } - return true; -} - -bool EffectEqualization48x::ProcessBuffer(fft_type *sourceBuffer, fft_type *destBuffer, size_t bufferLength) -{ - BufferInfo bufferInfo; - bufferInfo.mContiguousBufferSize=bufferLength; - bufferInfo.mBufferSouce[0]=sourceBuffer; - bufferInfo.mBufferDest[0]=destBuffer; - bufferInfo.mScratchBuffer=&sourceBuffer[mSubBufferSize]; - return ProcessBuffer1x(&bufferInfo); -} - -bool EffectEqualization48x::ProcessBuffer1x(BufferInfo *bufferInfo) -{ - int bufferCount=bufferInfo->mContiguousBufferSize?1:4; - for(int bufferIndex=0;bufferIndexmBufferLength; - if(bufferInfo->mContiguousBufferSize) - bufferLength=bufferInfo->mContiguousBufferSize; - - auto blockCount=bufferLength/mBlockSize; - auto lastBlockSize=bufferLength%mBlockSize; - if(lastBlockSize) - blockCount++; - - float *workBuffer=bufferInfo->mScratchBuffer; // all scratch buffers are at the end - float *scratchBuffer=&workBuffer[mWindowSize*2]; // all scratch buffers are at the end - float *sourceBuffer=bufferInfo->mBufferSouce[bufferIndex]; - float *destBuffer=bufferInfo->mBufferDest[bufferIndex]; - for(size_t runx=0;runxFilter(mWindowSize, currentBuffer); - Filter1x(mWindowSize, currentBuffer, scratchBuffer); - float *writeEnd=currentBuffer+mBlockSize; - if(runx==blockCount) - writeEnd=currentBuffer+(lastBlockSize+mFilterSize); - if(runx) { - float *lastOverrun=&workBuffer[mWindowSize*((runx+1)&1)+mBlockSize]; - for(int j=0;j>1; // this will skip the first filterSize on the first run - while(currentBufferGetMaxBlockSize(); - - auto output = t->EmptyCopy(); - t->ConvertToSampleFormat( floatSample ); - - mEffectEqualization->TrackProgress(count, 0.0); - int subBufferSize=mBufferCount==8?(mSubBufferSize>>1):mSubBufferSize; // half the buffers if avx is active - auto bigRuns=len/(subBufferSize-mBlockSize); - int trackBlocksPerBig=subBufferSize/trackBlockSize; - int trackLeftovers=subBufferSize-trackBlocksPerBig*trackBlockSize; - size_t singleProcessLength; - if(bigRuns == 0) - singleProcessLength = len.as_size_t(); - else - singleProcessLength = - ((mFilterSize>>1)*bigRuns + len%(bigRuns*(subBufferSize-mBlockSize))) - .as_size_t(); - auto currentSample=start; - bool bBreakLoop = false; - for(int bigRun=0;bigRunGet((samplePtr)&mBigBuffer[i*trackBlockSize], floatSample, currentSample, trackBlockSize); - currentSample+=trackBlockSize; - } - if(trackLeftovers) { - t->Get((samplePtr)&mBigBuffer[trackBlocksPerBig*trackBlockSize], floatSample, currentSample, trackLeftovers); - currentSample+=trackLeftovers; - } - currentSample-=mBlockSize+(mFilterSize>>1); - - ProcessBuffer1x(mBufferInfo.get()); - bBreakLoop=mEffectEqualization->TrackProgress(count, (double)(bigRun)/bigRuns.as_double()); - if( bBreakLoop ) - break; - output->Append((samplePtr)&mBigBuffer[(bigRun?mBlockSize:0)+(mFilterSize>>1)], floatSample, subBufferSize-((bigRun?mBlockSize:0)+(mFilterSize>>1))); - } - if(singleProcessLength && !bBreakLoop) { - t->Get((samplePtr)mBigBuffer.get(), floatSample, currentSample, singleProcessLength+mBlockSize+(mFilterSize>>1)); - ProcessBuffer(mBigBuffer.get(), mBigBuffer.get(), singleProcessLength+mBlockSize+(mFilterSize>>1)); - output->Append((samplePtr)&mBigBuffer[bigRuns > 0 ? mBlockSize : 0], floatSample, singleProcessLength+mBlockSize+(mFilterSize>>1)); - } - output->Flush(); - if(!bBreakLoop) - ProcessTail(t, output.get(), start, len); - return bBreakLoop; -} - -void EffectEqualization48x::Filter1x(size_t len, - float *buffer, float *scratchBuffer) -{ - int i; - float real, imag; - // Apply FFT - RealFFTf1x(buffer, mEffectEqualization->hFFT.get()); - - // Apply filter - // DC component is purely real - - float filterFuncR, filterFuncI; - filterFuncR = mEffectEqualization->mFilterFuncR[0]; - scratchBuffer[0] = buffer[0] * filterFuncR; - auto halfLength = (len / 2); - - bool useBitReverseTable=sMathPath&1; - - for(i = 1; i < halfLength; i++) - { - if(useBitReverseTable) { - real=buffer[mEffectEqualization->hFFT->BitReversed[i] ]; - imag=buffer[mEffectEqualization->hFFT->BitReversed[i]+1]; - } else { - int bitReversed=SmallRB(i,mEffectEqualization->hFFT->pow2Bits); - real=buffer[bitReversed]; - imag=buffer[bitReversed+1]; - } - filterFuncR=mEffectEqualization->mFilterFuncR[i]; - filterFuncI=mEffectEqualization->mFilterFuncI[i]; - - scratchBuffer[2*i ] = real*filterFuncR - imag*filterFuncI; - scratchBuffer[2*i+1] = real*filterFuncI + imag*filterFuncR; - } - // Fs/2 component is purely real - filterFuncR=mEffectEqualization->mFilterFuncR[halfLength]; - scratchBuffer[1] = buffer[1] * filterFuncR; - - // Inverse FFT and normalization - InverseRealFFTf1x(scratchBuffer, mEffectEqualization->hFFT.get()); - ReorderToTime1x(mEffectEqualization->hFFT.get(), scratchBuffer, buffer); -} - -bool EffectEqualization48x::ProcessBuffer4x(BufferInfo *bufferInfo) -{ - // length must be a factor of window size for 4x processing. - if(bufferInfo->mBufferLength%mBlockSize) - return false; - - auto blockCount=bufferInfo->mBufferLength/mBlockSize; - - __m128 *readBlocks[4]; // some temps so we don't destroy the vars in the struct - __m128 *writeBlocks[4]; - for(int i=0;i<4;i++) { - readBlocks[i]=(__m128 *)bufferInfo->mBufferSouce[i]; - writeBlocks[i]=(__m128 *)bufferInfo->mBufferDest[i]; - } - - __m128 *swizzledBuffer128=(__m128 *)bufferInfo->mScratchBuffer; - __m128 *scratchBuffer=&swizzledBuffer128[mWindowSize*2]; - - for(size_t run4x=0;run4x>2; - // swizzle it back. - for(int i=writeToStart,j=writeStart;j>2; // these are 128b pointers, each window is 1/4 blockSize for those - writeBlocks[i]+=mBlockSize>>2; - } - } - return true; -} - -bool EffectEqualization48x::ProcessOne4x(int count, WaveTrack * t, - sampleCount start, sampleCount len) -{ - int subBufferSize=mBufferCount==8?(mSubBufferSize>>1):mSubBufferSize; // half the buffers if avx is active - - if(lenGetMaxBlockSize(); - - auto output = t->EmptyCopy(); - t->ConvertToSampleFormat( floatSample ); - - mEffectEqualization->TrackProgress(count, 0.0); - auto bigRuns = len/(subBufferSize-mBlockSize); - int trackBlocksPerBig=subBufferSize/trackBlockSize; - int trackLeftovers=subBufferSize-trackBlocksPerBig*trackBlockSize; - size_t singleProcessLength = - ((mFilterSize>>1)*bigRuns + len%(bigRuns*(subBufferSize-mBlockSize))) - .as_size_t(); - auto currentSample=start; - - bool bBreakLoop = false; - for(int bigRun=0;bigRunGet((samplePtr)&mBigBuffer[i*trackBlockSize], floatSample, currentSample, trackBlockSize); - currentSample+=trackBlockSize; - } - if(trackLeftovers) { - t->Get((samplePtr)&mBigBuffer[trackBlocksPerBig*trackBlockSize], floatSample, currentSample, trackLeftovers); - currentSample+=trackLeftovers; - } - currentSample-=mBlockSize+(mFilterSize>>1); - - ProcessBuffer4x(mBufferInfo.get()); - bBreakLoop=mEffectEqualization->TrackProgress(count, (double)(bigRun)/bigRuns.as_double()); - if( bBreakLoop ) - break; - output->Append((samplePtr)&mBigBuffer[(bigRun?mBlockSize:0)+(mFilterSize>>1)], floatSample, subBufferSize-((bigRun?mBlockSize:0)+(mFilterSize>>1))); - } - if(singleProcessLength && !bBreakLoop) { - t->Get((samplePtr)mBigBuffer.get(), floatSample, currentSample, singleProcessLength+mBlockSize+(mFilterSize>>1)); - ProcessBuffer(mBigBuffer.get(), mBigBuffer.get(), singleProcessLength+mBlockSize+(mFilterSize>>1)); - output->Append((samplePtr)&mBigBuffer[bigRuns > 0 ? mBlockSize : 0], floatSample, singleProcessLength+mBlockSize+(mFilterSize>>1)); -// output->Append((samplePtr)&mBigBuffer[bigRuns?mBlockSize:0], floatSample, singleProcessLength); - } - output->Flush(); - if(!bBreakLoop) - ProcessTail(t, output.get(), start, len); - return bBreakLoop; -} - -#include - -void *EQWorker::Entry() -{ - while(!mExitLoop) { - int i = 0; - { - wxMutexLocker locker( *mMutex ); - for(; i < mBufferInfoCount; i++) { - if(mBufferInfoList[i].mBufferStatus==BufferReady) { // we found an unlocked ready buffer - mBufferInfoList[i].mBufferStatus=BufferBusy; // we own it now - break; - } - } - } - if ( i < mBufferInfoCount ) { - switch (mProcessingType) - { - case 1: - mEffectEqualization48x->ProcessBuffer1x(&mBufferInfoList[i]); - break; - case 4: - mEffectEqualization48x->ProcessBuffer4x(&mBufferInfoList[i]); - break; - } - mBufferInfoList[i].mBufferStatus=BufferDone; // we're done - } - } - return NULL; -} - -bool EffectEqualization48x::ProcessOne1x4xThreaded(int count, WaveTrack * t, - sampleCount start, sampleCount len, int processingType) -{ - int subBufferSize=mBufferCount==8?(mSubBufferSize>>1):mSubBufferSize; // half the buffers if avx is active - - sampleCount blockCount=len/mBlockSize; - - if(blockCount<16) // it's not worth 4x processing do a regular process - return ProcessOne4x(count, t, start, len); - if(mThreadCount<=0 || blockCount<256) // don't do it without cores or big data - return ProcessOne4x(count, t, start, len); - - for(int i=0;iEmptyCopy(); - t->ConvertToSampleFormat( floatSample ); - - auto trackBlockSize = t->GetMaxBlockSize(); - mEffectEqualization->TrackProgress(count, 0.0); - auto bigRuns = len/(subBufferSize-mBlockSize); - int trackBlocksPerBig=subBufferSize/trackBlockSize; - int trackLeftovers=subBufferSize-trackBlocksPerBig*trackBlockSize; - size_t singleProcessLength = - ((mFilterSize>>1)*bigRuns + len%(bigRuns*(subBufferSize-mBlockSize))) - .as_size_t(); - auto currentSample=start; - - int bigBlocksRead=mWorkerDataCount, bigBlocksWritten=0; - - // fill the first workerDataCount buffers we checked above and there is at least this data - auto maxPreFill = bigRuns < mWorkerDataCount ? bigRuns : mWorkerDataCount; - for(int i=0;iGet((samplePtr)&mBufferInfo[i].mBufferSouce[0][j*trackBlockSize], floatSample, currentSample, trackBlockSize); - currentSample+=trackBlockSize; - } - if(trackLeftovers) { - t->Get((samplePtr)&mBufferInfo[i].mBufferSouce[0][trackBlocksPerBig*trackBlockSize], floatSample, currentSample, trackLeftovers); - currentSample+=trackLeftovers; - } - currentSample-=mBlockSize+(mFilterSize>>1); - mBufferInfo[i].mBufferStatus=BufferReady; // free for grabbin - } - int currentIndex=0; - bool bBreakLoop = false; - while(bigBlocksWrittenTrackProgress(count, (double)(bigBlocksWritten)/bigRuns.as_double()); - if( bBreakLoop ) - break; - wxMutexLocker locker( mDataMutex ); // Get in line for data - // process as many blocks as we can - while((mBufferInfo[currentIndex].mBufferStatus==BufferDone) && (bigBlocksWrittenAppend((samplePtr)&mBufferInfo[currentIndex].mBufferDest[0][(bigBlocksWritten?mBlockSize:0)+(mFilterSize>>1)], floatSample, subBufferSize-((bigBlocksWritten?mBlockSize:0)+(mFilterSize>>1))); - bigBlocksWritten++; - if(bigBlocksReadGet((samplePtr)&mBufferInfo[currentIndex].mBufferSouce[0][j*trackBlockSize], floatSample, currentSample, trackBlockSize); - currentSample+=trackBlockSize; - } - if(trackLeftovers) { - t->Get((samplePtr)&mBufferInfo[currentIndex].mBufferSouce[0][trackBlocksPerBig*trackBlockSize], floatSample, currentSample, trackLeftovers); - currentSample+=trackLeftovers; - } - currentSample-=mBlockSize+(mFilterSize>>1); - mBufferInfo[currentIndex].mBufferStatus=BufferReady; // free for grabbin - bigBlocksRead++; - } else mBufferInfo[currentIndex].mBufferStatus=BufferEmpty; // this is completely unnecessary - currentIndex=(currentIndex+1)%mWorkerDataCount; - } - } - if(singleProcessLength && !bBreakLoop) { - t->Get((samplePtr)mBigBuffer.get(), floatSample, currentSample, singleProcessLength+mBlockSize+(mFilterSize>>1)); - ProcessBuffer(mBigBuffer.get(), mBigBuffer.get(), singleProcessLength+mBlockSize+(mFilterSize>>1)); - output->Append((samplePtr)&mBigBuffer[mBlockSize], floatSample, singleProcessLength+mBlockSize+(mFilterSize>>1)); - } - output->Flush(); - if(!bBreakLoop) - ProcessTail(t, output.get(), start, len); - return bBreakLoop; -} - -void EffectEqualization48x::Filter4x(size_t len, - float *buffer, float *scratchBuffer) -{ - int i; - __m128 real128, imag128; - // Apply FFT - RealFFTf4x(buffer, mEffectEqualization->hFFT.get()); - - // Apply filter - // DC component is purely real - __m128 *localFFTBuffer=(__m128 *)scratchBuffer; - __m128 *localBuffer=(__m128 *)buffer; - - __m128 filterFuncR, filterFuncI; - filterFuncR = _mm_set1_ps(mEffectEqualization->mFilterFuncR[0]); - localFFTBuffer[0] = _mm_mul_ps(localBuffer[0], filterFuncR); - auto halfLength = (len / 2); - - bool useBitReverseTable = sMathPath & 1; - - for(i = 1; i < halfLength; i++) - { - if(useBitReverseTable) { - real128=localBuffer[mEffectEqualization->hFFT->BitReversed[i] ]; - imag128=localBuffer[mEffectEqualization->hFFT->BitReversed[i]+1]; - } else { - int bitReversed=SmallRB(i,mEffectEqualization->hFFT->pow2Bits); - real128=localBuffer[bitReversed]; - imag128=localBuffer[bitReversed+1]; - } - filterFuncR=_mm_set1_ps(mEffectEqualization->mFilterFuncR[i]); - filterFuncI=_mm_set1_ps(mEffectEqualization->mFilterFuncI[i]); - localFFTBuffer[2*i ] = _mm_sub_ps( _mm_mul_ps(real128, filterFuncR), _mm_mul_ps(imag128, filterFuncI)); - localFFTBuffer[2*i+1] = _mm_add_ps( _mm_mul_ps(real128, filterFuncI), _mm_mul_ps(imag128, filterFuncR)); - } - // Fs/2 component is purely real - filterFuncR=_mm_set1_ps(mEffectEqualization->mFilterFuncR[halfLength]); - localFFTBuffer[1] = _mm_mul_ps(localBuffer[1], filterFuncR); - - // Inverse FFT and normalization - InverseRealFFTf4x(scratchBuffer, mEffectEqualization->hFFT.get()); - ReorderToTime4x(mEffectEqualization->hFFT.get(), scratchBuffer, buffer); -} - -#ifdef __AVX_ENABLED - -// note although written it has not been tested - -bool EffectEqualization48x::ProcessBuffer8x(BufferInfo *bufferInfo) -{ - // length must be a factor of window size for 4x processing. - if(bufferInfo->mBufferLength%mBlockSize || mBufferCount!=8) - return false; - - auto blockCount=bufferInfo->mBufferLength/mBlockSize; - - __m128 *readBlocks[8]; // some temps so we don't destroy the vars in the struct - __m128 *writeBlocks[8]; - for(int i=0;i<8;i++) { - readBlocks[i]=(__m128 *)bufferInfo->mBufferSouce[i]; - writeBlocks[i]=(__m128 *)bufferInfo->mBufferDest[i]; - } - - __m128 *swizzledBuffer128=(__m128 *)bufferInfo->mScratchBuffer; - __m128 *scratchBuffer=&swizzledBuffer128[mWindowSize*4]; - - int doubleFilter=mFilterSize<<1; - int doubleWindow=mWindowSize<<1; - int doubleBlock=mBlockSize<<1; - for(int run4x=0;run4x>2; - // swizzle it back. - for(int i=writeToStart,j=writeStart;j>2; // these are 128b pointers, each window is 1/4 blockSize for those - writeBlocks[i]+=mBlockSize>>2; - } - } - return true; -} - -bool EffectEqualization48x::ProcessOne8x(int count, WaveTrack * t, - sampleCount start, sampleCount len) -{ - sampleCount blockCount=len/mBlockSize; - - if(blockCount<32) // it's not worth 8x processing do a regular process - return ProcessOne4x(count, t, start, len); - - auto trackBlockSize = t->GetMaxBlockSize(); - - auto output = t->EmptyCopy(); - t->ConvertToSampleFormat( floatSample ); - - mEffectEqualization->TrackProgress(count, 0.0); - int bigRuns=len/(mSubBufferSize-mBlockSize); - int trackBlocksPerBig=mSubBufferSize/trackBlockSize; - int trackLeftovers=mSubBufferSize-trackBlocksPerBig*trackBlockSize; - int singleProcessLength=(mFilterSize>>1)*bigRuns + len%(bigRuns*(mSubBufferSize-mBlockSize)); - auto currentSample=start; - - bool bBreakLoop = false; - for(int bigRun=0;bigRunGet((samplePtr)&mBigBuffer[i*trackBlockSize], floatSample, currentSample, trackBlockSize); - currentSample+=trackBlockSize; - } - if(trackLeftovers) { - t->Get((samplePtr)&mBigBuffer[trackBlocksPerBig*trackBlockSize], floatSample, currentSample, trackLeftovers); - currentSample+=trackLeftovers; - } - currentSample-=mBlockSize+(mFilterSize>>1); - - ProcessBuffer4x(mBufferInfo); - if (bBreakLoop=mEffectEqualization->TrackProgress(count, (double)(bigRun)/(double)bigRuns)) - { - break; - } - output->Append((samplePtr)&mBigBuffer[(bigRun?mBlockSize:0)+(mFilterSize>>1)], floatSample, mSubBufferSize-((bigRun?mBlockSize:0)+(mFilterSize>>1))); - } - if(singleProcessLength && !bBreakLoop) { - t->Get((samplePtr)mBigBuffer.get(), floatSample, currentSample, singleProcessLength+mBlockSize+(mFilterSize>>1)); - ProcessBuffer(mBigBuffer.get(), mBigBuffer.get(), singleProcessLength+mBlockSize+(mFilterSize>>1)); - output->Append((samplePtr)&mBigBuffer[mBlockSize], floatSample, singleProcessLength+mBlockSize+(mFilterSize>>1)); - } - output->Flush(); - if(!bBreakLoop) - ProcessTail(t, output.get(), start, len); - return bBreakLoop; -} - -bool EffectEqualization48x::ProcessOne8xThreaded(int count, WaveTrack * t, - sampleCount start, sampleCount len) -{ - sampleCount blockCount=len/mBlockSize; - - if(blockCount<16) // it's not worth 4x processing do a regular process - return ProcessOne4x(count, t, start, len); - if(mThreadCount<=0 || blockCount<256) // don't do it without cores or big data - return ProcessOne4x(count, t, start, len); - - auto output = t->EmptyCopy(); - t->ConvertToSampleFormat( floatSample ); - - auto trackBlockSize = t->GetMaxBlockSize(); - mEffectEqualization->TrackProgress(count, 0.0); - int bigRuns=len/(mSubBufferSize-mBlockSize); - int trackBlocksPerBig=mSubBufferSize/trackBlockSize; - int trackLeftovers=mSubBufferSize-trackBlocksPerBig*trackBlockSize; - int singleProcessLength=(mFilterSize>>1)*bigRuns + len%(bigRuns*(mSubBufferSize-mBlockSize)); - auto currentSample=start; - - int bigBlocksRead=mWorkerDataCount, bigBlocksWritten=0; - - // fill the first workerDataCount buffers we checked above and there is at least this data - for(int i=0;iGet((samplePtr)&mBufferInfo[i].mBufferSouce[0][j*trackBlockSize], floatSample, currentSample, trackBlockSize); - currentSample+=trackBlockSize; - } - if(trackLeftovers) { - t->Get((samplePtr)&mBufferInfo[i].mBufferSouce[0][trackBlocksPerBig*trackBlockSize], floatSample, currentSample, trackLeftovers); - currentSample+=trackLeftovers; - } - currentSample-=mBlockSize+(mFilterSize>>1); - mBufferInfo[i].mBufferStatus=BufferReady; // free for grabbin - } - int currentIndex=0; - bool bBreakLoop = false; - while(bigBlocksWrittenTrackProgress(count, (double)(bigBlocksWritten)/(double)bigRuns)) - { - break; - } - wxMutexLocker locker( mDataMutex ); // Get in line for data - // process as many blocks as we can - while((mBufferInfo[currentIndex].mBufferStatus==BufferDone) && (bigBlocksWrittenAppend((samplePtr)&mBufferInfo[currentIndex].mBufferDest[0][(bigBlocksWritten?mBlockSize:0)+(mFilterSize>>1)], floatSample, mSubBufferSize-((bigBlocksWritten?mBlockSize:0)+(mFilterSize>>1))); - bigBlocksWritten++; - if(bigBlocksReadGet((samplePtr)&mBufferInfo[currentIndex].mBufferSouce[0][j*trackBlockSize], floatSample, currentSample, trackBlockSize); - currentSample+=trackBlockSize; - } - if(trackLeftovers) { - t->Get((samplePtr)&mBufferInfo[currentIndex].mBufferSouce[0][trackBlocksPerBig*trackBlockSize], floatSample, currentSample, trackLeftovers); - currentSample+=trackLeftovers; - } - currentSample-=mBlockSize+(mFilterSize>>1); - mBufferInfo[currentIndex].mBufferStatus=BufferReady; // free for grabbin - bigBlocksRead++; - } else mBufferInfo[currentIndex].mBufferStatus=BufferEmpty; // this is completely unnecessary - currentIndex=(currentIndex+1)%mWorkerDataCount; - } - } - if(singleProcessLength && !bBreakLoop) { - t->Get((samplePtr)mBigBuffer.get(), floatSample, currentSample, singleProcessLength+mBlockSize+(mFilterSize>>1)); - ProcessBuffer(mBigBuffer.get(), mBigBuffer.get(), singleProcessLength+mBlockSize+(mFilterSize>>1)); - output->Append((samplePtr)&mBigBuffer[mBlockSize], floatSample, singleProcessLength+mBlockSize+(mFilterSize>>1)); - } - output->Flush(); - if(!bBreakLoop) - ProcessTail(t, output.get(), start, len); - return bBreakLoop; -} - - - - -void EffectEqualization48x::Filter8x(size_t len, - float *buffer, float *scratchBuffer) -{ - int i; - __m256 real256, imag256; - // Apply FFT - RealFFTf8x(buffer, mEffectEqualization->hFFT); - - // Apply filter - // DC component is purely real - __m256 *localFFTBuffer=(__m256 *)scratchBuffer; - __m256 *localBuffer=(__m256 *)buffer; - - __m256 filterFuncR, filterFuncI; - filterFuncR = _mm256_set1_ps(mEffectEqualization->mFilterFuncR[0]); - localFFTBuffer[0] = _mm256_mul_ps(localBuffer[0], filterFuncR); - auto halfLength = (len / 2); - - bool useBitReverseTable = sMathPath & 1; - - for(i = 1; i < halfLength; i++) - { - if(useBitReverseTable) { - real256=localBuffer[mEffectEqualization->hFFT->BitReversed[i] ]; - imag256=localBuffer[mEffectEqualization->hFFT->BitReversed[i]+1]; - } else { - int bitReversed=SmallRB(i,mEffectEqualization->hFFT->pow2Bits); - real256=localBuffer[bitReversed]; - imag256=localBuffer[bitReversed+1]; - } - filterFuncR=_mm256_set1_ps(mEffectEqualization->mFilterFuncR[i]); - filterFuncI=_mm256_set1_ps(mEffectEqualization->mFilterFuncI[i]); - localFFTBuffer[2*i ] = _mm256_sub_ps( _mm256_mul_ps(real256, filterFuncR), _mm256_mul_ps(imag256, filterFuncI)); - localFFTBuffer[2*i+1] = _mm256_add_ps( _mm256_mul_ps(real256, filterFuncI), _mm256_mul_ps(imag256, filterFuncR)); - } - // Fs/2 component is purely real - filterFuncR=_mm256_set1_ps(mEffectEqualization->mFilterFuncR[halfLength]); - localFFTBuffer[1] = _mm256_mul_ps(localBuffer[1], filterFuncR); - - // Inverse FFT and normalization - InverseRealFFTf8x(scratchBuffer, mEffectEqualization->hFFT); - ReorderToTime8x(mEffectEqualization->hFFT, scratchBuffer, buffer); -} - -#endif - -#endif diff --git a/src/effects/Equalization48x.h b/src/effects/Equalization48x.h deleted file mode 100644 index 1335ceea0a4e..000000000000 --- a/src/effects/Equalization48x.h +++ /dev/null @@ -1,180 +0,0 @@ -/********************************************************************** - -Audacity: A Digital Audio Editor - -Equalization48x.h - -Intrinsics (SSE/AVX) and Threaded Equalization - -***********************************************************************/ - -#ifndef __AUDACITY_EFFECT_EQUALIZATION48X__ -#define __AUDACITY_EFFECT_EQUALIZATION48X__ - -#ifdef EXPERIMENTAL_EQ_SSE_THREADED - -#include - -#include // to inherit -#include -class WaveTrack; -using fft_type = float; - -#ifdef __AVX_ENABLED -#define __MAXBUFFERCOUNT 8 -#else -#define __MAXBUFFERCOUNT 4 -#endif - -// bitwise function selection -// options are -#define MATH_FUNCTION_ORIGINAL 0 // 0 original path -#define MATH_FUNCTION_BITREVERSE_TABLE 1 // 1 SSE BitReverse Table -#define MATH_FUNCTION_SIN_COS_TABLE 2 // 2 SSE SinCos Table -#define MATH_FUNCTION_THREADED 4 // 4 SSE threaded no SinCos and no BitReverse buffer -#define MATH_FUNCTION_SSE 8 // 8 SSE no SinCos and no BitReverse buffer -#define MATH_FUNCTION_AVX 16 -#define MATH_FUNCTION_SEGMENTED_CODE 32 - -struct free_simd { - void operator () (void*) const; -}; -using simd_floats = std::unique_ptr< float[], free_simd >; - -// added by Andrew Hallendorff intrinsics processing -enum EQBufferStatus -{ - BufferEmpty=0, - BufferReady, - BufferBusy, - BufferDone -}; - -class BufferInfo { -public: - BufferInfo() { mBufferLength=0; mBufferStatus=BufferEmpty; mContiguousBufferSize=0; }; - float* mBufferSouce[__MAXBUFFERCOUNT]; - float* mBufferDest[__MAXBUFFERCOUNT]; - size_t mBufferLength; - size_t mFftWindowSize; - size_t mFftFilterSize; - float* mScratchBuffer; - size_t mContiguousBufferSize; - EQBufferStatus mBufferStatus; -}; - -typedef struct { - int x64; - int MMX; - int SSE; - int SSE2; - int SSE3; - int SSSE3; - int SSE41; - int SSE42; - int SSE4a; - int AVX; - int XOP; - int FMA3; - int FMA4; -} MathCaps; - -class EffectEqualization; - -class EffectEqualization48x; - -static int EQWorkerCounter=0; - -class EQWorker : public wxThread { -public: - EQWorker():wxThread(wxTHREAD_JOINABLE) { - mBufferInfoList=NULL; - mBufferInfoCount=0; - mMutex=NULL; - mEffectEqualization48x=NULL; - mExitLoop=false; - mThreadID=EQWorkerCounter++; - mProcessingType=4; - } - void SetData( BufferInfo* bufferInfoList, int bufferInfoCount, wxMutex *mutex, EffectEqualization48x *effectEqualization48x) { - mBufferInfoList=bufferInfoList; - mBufferInfoCount=bufferInfoCount; - mMutex=mutex; - mEffectEqualization48x=effectEqualization48x; - } - void ExitLoop() { // this will cause the thread to drop from the loops - mExitLoop=true; - } - void* Entry() override; - BufferInfo* mBufferInfoList; - int mBufferInfoCount, mThreadID; - wxMutex *mMutex; - EffectEqualization48x *mEffectEqualization48x; - bool mExitLoop; - int mProcessingType; -}; - -class EffectEqualization48x { - -public: - - EffectEqualization48x(); - virtual ~EffectEqualization48x(); - - static MathCaps *GetMathCaps(); - static void SetMathPath(int mathPath); - static int GetMathPath(); - static void AddMathPathOption(int mathPath); - static void RemoveMathPathOption(int mathPath); - - bool Process(EffectEqualization* effectEqualization); - bool Benchmark(EffectEqualization* effectEqualization); -private: - bool RunFunctionSelect(int flags, int count, WaveTrack * t, sampleCount start, sampleCount len); - bool TrackCompare(); - bool DeltaTrack(WaveTrack * t, const WaveTrack * t2, sampleCount start, sampleCount len); - bool AllocateBuffersWorkers(int nThreads); - bool FreeBuffersWorkers(); - - bool ProcessTail(WaveTrack * t, WaveTrack * output, sampleCount start, sampleCount len); - - bool ProcessBuffer(fft_type *sourceBuffer, fft_type *destBuffer, size_t bufferLength); - bool ProcessBuffer1x(BufferInfo *bufferInfo); - bool ProcessOne1x(int count, WaveTrack * t, sampleCount start, sampleCount len); - void Filter1x(size_t len, float *buffer, float *scratchBuffer); - - bool ProcessBuffer4x(BufferInfo *bufferInfo); - bool ProcessOne4x(int count, WaveTrack * t, sampleCount start, sampleCount len); - bool ProcessOne1x4xThreaded(int count, WaveTrack * t, sampleCount start, sampleCount len, int processingType=4); - void Filter4x(size_t len, float *buffer, float *scratchBuffer); - -#ifdef __AVX_ENABLED - bool ProcessBuffer8x(BufferInfo *bufferInfo); - bool ProcessOne8x(int count, WaveTrack * t, sampleCount start, sampleCount len); - bool ProcessOne8xThreaded(int count, WaveTrack * t, sampleCount start, sampleCount len); - void Filter8x(size_t len, float *buffer, float *scratchBuffer); -#endif - - EffectEqualization* mEffectEqualization; - size_t mThreadCount; - size_t mFilterSize; - size_t mBlockSize; - size_t mWindowSize; - int mBufferCount; - size_t mWorkerDataCount; - size_t mBlocksPerBuffer; - size_t mScratchBufferSize; - size_t mSubBufferSize; - simd_floats mBigBuffer; - ArrayOf mBufferInfo; - wxMutex mDataMutex; - ArrayOf mEQWorkers; - bool mThreaded; - bool mBenching; - friend EQWorker; - friend EffectEqualization; -}; - -#endif - -#endif diff --git a/src/prefs/EffectsPrefs.cpp b/src/prefs/EffectsPrefs.cpp index 3cf672e1addb..335a8be38281 100644 --- a/src/prefs/EffectsPrefs.cpp +++ b/src/prefs/EffectsPrefs.cpp @@ -124,16 +124,6 @@ void EffectsPrefs::PopulateOrExchange(ShuttleGui & S) } S.EndStatic(); -#ifdef EXPERIMENTAL_EQ_SSE_THREADED - S.StartStatic(XO("Instruction Set")); - { - S.TieCheckBox(XXO("&Use SSE/SSE2/.../AVX"), - {wxT("/SSE/GUI"), - true}); - } - S.EndStatic(); -#endif - if (auto pButton = S.AddButton(XXO("Open Plugin &Manager"), wxALIGN_LEFT)) pButton->Bind(wxEVT_BUTTON, [this](auto) { //Adding dependency on PluginRegistrationDialog, not good. Alternatively