From 47ad99719a8b28d11d982cc59cf58b01f55e537b Mon Sep 17 00:00:00 2001 From: WuChang <3142324836@qq.com> Date: Thu, 11 Jan 2024 17:51:12 +0800 Subject: [PATCH] Add SIMD speed up --- app/data/config/function.json | 3 +- app/translates/zh-CN/config.txt | 1 + src/audioCore/graph/MainGraph.cpp | 7 +- src/audioCore/misc/VMath.cpp | 207 +++++++++++++++++- src/audioCore/misc/VMath.h | 13 ++ src/audioCore/quickAPI/QuickGet.cpp | 13 ++ src/audioCore/quickAPI/QuickGet.h | 4 + src/audioCore/quickAPI/QuickSet.cpp | 10 + src/audioCore/quickAPI/QuickSet.h | 2 + src/audioCore/source/CloneableAudioSource.cpp | 26 ++- src/main.cpp | 1 + src/ui/component/ConfigComponent.cpp | 10 + 12 files changed, 271 insertions(+), 26 deletions(-) diff --git a/app/data/config/function.json b/app/data/config/function.json index ef1d9153..3b180c48 100644 --- a/app/data/config/function.json +++ b/app/data/config/function.json @@ -1,5 +1,6 @@ -{ +{ "return-on-stop": true, "anonymous-mode": false, + "simd-speed-up": 3, "cpu-painting": false } \ No newline at end of file diff --git a/app/translates/zh-CN/config.txt b/app/translates/zh-CN/config.txt index 9ec0fd97..b2bb1d76 100644 --- a/app/translates/zh-CN/config.txt +++ b/app/translates/zh-CN/config.txt @@ -18,6 +18,7 @@ countries: cn "Audio Core" = "音频内核" "return-on-stop" = "在停止时返回" "anonymous-mode" = "匿名模式" +"simd-speed-up" = "SIMD加速" "Performance" = "性能" "cpu-painting" = "CPU绘图" "System" = "系统" diff --git a/src/audioCore/graph/MainGraph.cpp b/src/audioCore/graph/MainGraph.cpp index 86029fa7..f7e2ccb4 100644 --- a/src/audioCore/graph/MainGraph.cpp +++ b/src/audioCore/graph/MainGraph.cpp @@ -3,6 +3,7 @@ #include "../misc/PlayPosition.h" #include "../misc/Renderer.h" #include "../misc/AudioLock.h" +#include "../misc/VMath.h" #include "../source/CloneableSourceManager.h" #include "../AudioCore.h" #include "../Utils.h" @@ -500,7 +501,7 @@ void MainGraph::processBlock(juce::AudioBuffer& audio, juce::MidiBuffer& juce::ScopedTryReadLock mackieLocker(audioLock::getMackieLock()); if (!(audioLocker.isLocked() && pluginLocker.isLocked() && sourceLocker.isLocked() && positionLocker.isLocked() && mackieLocker.isLocked())) { - audio.clear(); + vMath::zeroAllAudioData(audio); midi.clear(); return; } @@ -552,7 +553,7 @@ void MainGraph::processBlock(juce::AudioBuffer& audio, juce::MidiBuffer& /** Truncate Input */ if (isRendering) { - audio.clear(); + vMath::zeroAllAudioData(audio); midi.clear(); } @@ -565,7 +566,7 @@ void MainGraph::processBlock(juce::AudioBuffer& audio, juce::MidiBuffer& /** Truncate Output */ if (isRendering) { - audio.clear(); + vMath::zeroAllAudioData(audio); midi.clear(); } diff --git a/src/audioCore/misc/VMath.cpp b/src/audioCore/misc/VMath.cpp index 93d36064..ed6dc7d0 100644 --- a/src/audioCore/misc/VMath.cpp +++ b/src/audioCore/misc/VMath.cpp @@ -24,51 +24,179 @@ namespace vMath { } static void copyDataSSE3(float* dst, const float* src, int length) { - /** TODO */ + int clipSize = sizeof(__m128) / sizeof(float); + int clipNum = length / clipSize; + int clipMax = clipNum * clipSize; + + for (int i = 0; i < clipMax; i+= clipSize) { + __m128 data = _mm_loadu_ps(&(src[i])); + _mm_storeu_ps(&(dst[i]), data); + } + + copyDataNormal(&(dst[clipMax]), &(src[clipMax]), length - clipMax); } static void addDataSSE3(float* dst, const float* src, int length) { - /** TODO */ + int clipSize = sizeof(__m128) / sizeof(float); + int clipNum = length / clipSize; + int clipMax = clipNum * clipSize; + + for (int i = 0; i < clipMax; i += clipSize) { + __m128 data0 = _mm_loadu_ps(&(dst[i])); + __m128 data1 = _mm_loadu_ps(&(src[i])); + __m128 result = _mm_add_ps(data0, data1); + _mm_storeu_ps(&(dst[i]), result); + } + + addDataNormal(&(dst[clipMax]), &(src[clipMax]), length - clipMax); } static void zeroDataSSE3(float* dst, int length) { - /** TODO */ + int clipSize = sizeof(__m128) / sizeof(float); + int clipNum = length / clipSize; + int clipMax = clipNum * clipSize; + + constexpr __m128 zeroV = { 0.f, 0.f, 0.f, 0.f }; + for (int i = 0; i < clipMax; i += clipSize) { + _mm_storeu_ps(&(dst[i]), zeroV); + } + + zeroDataNormal(&(dst[clipMax]), length - clipMax); } static void averageDataSSE3(float* dst, const float* src, int length) { - /** TODO */ + int clipSize = sizeof(__m128) / sizeof(float); + int clipNum = length / clipSize; + int clipMax = clipNum * clipSize; + + constexpr __m128 averV = { .5f, .5f, .5f, .5f }; + for (int i = 0; i < clipMax; i += clipSize) { + __m128 data0 = _mm_loadu_ps(&(dst[i])); + __m128 data1 = _mm_loadu_ps(&(src[i])); + __m128 sum = _mm_add_ps(data0, data1); + __m128 result = _mm_mul_ps(sum, averV); + _mm_storeu_ps(&(dst[i]), result); + } + + averageDataNormal(&(dst[clipMax]), &(src[clipMax]), length - clipMax); } static void copyDataAVX2(float* dst, const float* src, int length) { - /** TODO */ + int clipSize = sizeof(__m256) / sizeof(float); + int clipNum = length / clipSize; + int clipMax = clipNum * clipSize; + + for (int i = 0; i < clipMax; i += clipSize) { + __m256 data = _mm256_loadu_ps(&(src[i])); + _mm256_storeu_ps(&(dst[i]), data); + } + + copyDataNormal(&(dst[clipMax]), &(src[clipMax]), length - clipMax); } static void addDataAVX2(float* dst, const float* src, int length) { - /** TODO */ + int clipSize = sizeof(__m256) / sizeof(float); + int clipNum = length / clipSize; + int clipMax = clipNum * clipSize; + + for (int i = 0; i < clipMax; i += clipSize) { + __m256 data0 = _mm256_loadu_ps(&(dst[i])); + __m256 data1 = _mm256_loadu_ps(&(src[i])); + __m256 result = _mm256_add_ps(data0, data1); + _mm256_storeu_ps(&(dst[i]), result); + } + + addDataNormal(&(dst[clipMax]), &(src[clipMax]), length - clipMax); } static void zeroDataAVX2(float* dst, int length) { - /** TODO */ + int clipSize = sizeof(__m256) / sizeof(float); + int clipNum = length / clipSize; + int clipMax = clipNum * clipSize; + + constexpr __m256 zeroV = { 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f }; + for (int i = 0; i < clipMax; i += clipSize) { + _mm256_storeu_ps(&(dst[i]), zeroV); + } + + zeroDataNormal(&(dst[clipMax]), length - clipMax); } static void averageDataAVX2(float* dst, const float* src, int length) { - /** TODO */ + int clipSize = sizeof(__m256) / sizeof(float); + int clipNum = length / clipSize; + int clipMax = clipNum * clipSize; + + constexpr __m256 averV = { .5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f }; + for (int i = 0; i < clipMax; i += clipSize) { + __m256 data0 = _mm256_loadu_ps(&(dst[i])); + __m256 data1 = _mm256_loadu_ps(&(src[i])); + __m256 sum = _mm256_add_ps(data0, data1); + __m256 result = _mm256_mul_ps(sum, averV); + _mm256_storeu_ps(&(dst[i]), result); + } + + averageDataNormal(&(dst[clipMax]), &(src[clipMax]), length - clipMax); } static void copyDataAVX512(float* dst, const float* src, int length) { - /** TODO */ + int clipSize = sizeof(__m512) / sizeof(float); + int clipNum = length / clipSize; + int clipMax = clipNum * clipSize; + + for (int i = 0; i < clipMax; i += clipSize) { + __m512 data = _mm512_loadu_ps(&(src[i])); + _mm512_storeu_ps(&(dst[i]), data); + } + + copyDataNormal(&(dst[clipMax]), &(src[clipMax]), length - clipMax); } static void addDataAVX512(float* dst, const float* src, int length) { - /** TODO */ + int clipSize = sizeof(__m512) / sizeof(float); + int clipNum = length / clipSize; + int clipMax = clipNum * clipSize; + + for (int i = 0; i < clipMax; i += clipSize) { + __m512 data0 = _mm512_loadu_ps(&(dst[i])); + __m512 data1 = _mm512_loadu_ps(&(src[i])); + __m512 result = _mm512_add_ps(data0, data1); + _mm512_storeu_ps(&(dst[i]), result); + } + + addDataNormal(&(dst[clipMax]), &(src[clipMax]), length - clipMax); } static void zeroDataAVX512(float* dst, int length) { - /** TODO */ + int clipSize = sizeof(__m512) / sizeof(float); + int clipNum = length / clipSize; + int clipMax = clipNum * clipSize; + + constexpr __m512 zeroV = { + 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f }; + for (int i = 0; i < clipMax; i += clipSize) { + _mm512_storeu_ps(&(dst[i]), zeroV); + } + + zeroDataNormal(&(dst[clipMax]), length - clipMax); } static void averageDataAVX512(float* dst, const float* src, int length) { - /** TODO */ + int clipSize = sizeof(__m512) / sizeof(float); + int clipNum = length / clipSize; + int clipMax = clipNum * clipSize; + + constexpr __m512 averV = { + .5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f }; + for (int i = 0; i < clipMax; i += clipSize) { + __m512 data0 = _mm512_loadu_ps(&(dst[i])); + __m512 data1 = _mm512_loadu_ps(&(src[i])); + __m512 sum = _mm512_add_ps(data0, data1); + __m512 result = _mm512_mul_ps(sum, averV); + _mm512_storeu_ps(&(dst[i]), result); + } + + averageDataNormal(&(dst[clipMax]), &(src[clipMax]), length - clipMax); } static InsType type = InsType::Normal; @@ -109,4 +237,59 @@ namespace vMath { InsType getInsType() { return vMath::type; } + + constexpr std::array insTypeNameList{ + "Normal", "SSE3", "AVX2", "AVX512" }; + + const juce::String getInsTypeName() { + return vMath::insTypeNameList[(int)(vMath::getInsType())]; + } + + const juce::StringArray getAllInsTypeName() { + juce::StringArray result; + for (auto s : insTypeNameList) { + result.add(juce::String{ s }); + } + return result; + } + + void copyAudioData(juce::AudioSampleBuffer& dst, const juce::AudioSampleBuffer& src, + int dstStartSample, int srcStartSample, int dstChannel, int srcChannel, int length) { + auto wPtr = dst.getWritePointer(dstChannel); + auto rPtr = src.getReadPointer(srcChannel); + if (!wPtr || !rPtr) { return; } + + copyData(&(wPtr[dstStartSample]), &(rPtr[srcStartSample]), length); + } + + void addAudioData(juce::AudioSampleBuffer& dst, const juce::AudioSampleBuffer& src, + int dstStartSample, int srcStartSample, int dstChannel, int srcChannel, int length) { + auto wPtr = dst.getWritePointer(dstChannel); + auto rPtr = src.getReadPointer(srcChannel); + if (!wPtr || !rPtr) { return; } + + addData(&(wPtr[dstStartSample]), &(rPtr[srcStartSample]), length); + } + + void zeroAudioData(juce::AudioSampleBuffer& dst, + int dstStartSample, int dstChannel, int length) { + auto wPtr = dst.getWritePointer(dstChannel); + if (!wPtr) { return; } + + zeroData(&(wPtr[dstStartSample]), length); + } + void zeroAllAudioChannels(juce::AudioSampleBuffer& dst, + int dstStartSample, int length) { + for (int i = 0; i < dst.getNumChannels(); i++) { + zeroAudioData(dst, dstStartSample, i, length); + } + } + void zeroAllAudioDataOnChannel(juce::AudioSampleBuffer& dst, int dstChannel) { + zeroAudioData(dst, 0, dstChannel, dst.getNumSamples()); + } + void zeroAllAudioData(juce::AudioSampleBuffer& dst) { + for (int i = 0; i < dst.getNumChannels(); i++) { + zeroAllAudioDataOnChannel(dst, i); + } + } } diff --git a/src/audioCore/misc/VMath.h b/src/audioCore/misc/VMath.h index 16c2e31e..8343a3a9 100644 --- a/src/audioCore/misc/VMath.h +++ b/src/audioCore/misc/VMath.h @@ -10,4 +10,17 @@ namespace vMath { void setInsType(InsType type); InsType getInsType(); + const juce::String getInsTypeName(); + const juce::StringArray getAllInsTypeName(); + + void copyAudioData(juce::AudioSampleBuffer& dst, const juce::AudioSampleBuffer& src, + int dstStartSample, int srcStartSample, int dstChannel, int srcChannel, int length); + void addAudioData(juce::AudioSampleBuffer& dst, const juce::AudioSampleBuffer& src, + int dstStartSample, int srcStartSample, int dstChannel, int srcChannel, int length); + void zeroAudioData(juce::AudioSampleBuffer& dst, + int dstStartSample, int dstChannel, int length); + void zeroAllAudioChannels(juce::AudioSampleBuffer& dst, + int dstStartSample, int length); + void zeroAllAudioDataOnChannel(juce::AudioSampleBuffer& dst, int dstChannel); + void zeroAllAudioData(juce::AudioSampleBuffer& dst); } diff --git a/src/audioCore/quickAPI/QuickGet.cpp b/src/audioCore/quickAPI/QuickGet.cpp index f8b73d60..7f187e08 100644 --- a/src/audioCore/quickAPI/QuickGet.cpp +++ b/src/audioCore/quickAPI/QuickGet.cpp @@ -4,6 +4,7 @@ #include "../plugin/Plugin.h" #include "../misc/Device.h" #include "../misc/PlayPosition.h" +#include "../misc/VMath.h" namespace quickAPI { juce::Component* getAudioDebugger() { @@ -154,4 +155,16 @@ namespace quickAPI { const juce::StringArray getPluginSearchPath() { return Plugin::getInstance()->getPluginSearchPath(); } + + int getSIMDLevel() { + return (int)(vMath::getInsType()); + } + + const juce::String getSIMDInsName() { + return vMath::getInsTypeName(); + } + + const juce::StringArray getAllSIMDInsName() { + return vMath::getAllInsTypeName(); + } } \ No newline at end of file diff --git a/src/audioCore/quickAPI/QuickGet.h b/src/audioCore/quickAPI/QuickGet.h index 053bf2f2..647e62cd 100644 --- a/src/audioCore/quickAPI/QuickGet.h +++ b/src/audioCore/quickAPI/QuickGet.h @@ -39,4 +39,8 @@ namespace quickAPI { const juce::StringArray getPluginBlackList(); const juce::StringArray getPluginSearchPath(); + + int getSIMDLevel(); + const juce::String getSIMDInsName(); + const juce::StringArray getAllSIMDInsName(); } \ No newline at end of file diff --git a/src/audioCore/quickAPI/QuickSet.cpp b/src/audioCore/quickAPI/QuickSet.cpp index 86343751..dba2b2d5 100644 --- a/src/audioCore/quickAPI/QuickSet.cpp +++ b/src/audioCore/quickAPI/QuickSet.cpp @@ -2,6 +2,8 @@ #include "../AudioConfig.h" #include "../AudioCore.h" #include "../plugin/Plugin.h" +#include "../misc/AudioLock.h" +#include "../misc/VMath.h" namespace quickAPI { void setPluginSearchPathListFilePath(const juce::String& path) { @@ -64,4 +66,12 @@ namespace quickAPI { Plugin::getInstance()->removeFromPluginSearchPath(path); return true; } + + void setSIMDLevel(int level) { + if (level >= (int)(vMath::InsType::MaxNum)) { level = (int)(vMath::InsType::MaxNum) - 1; } + if (level < 0) { level = 0; } + + juce::ScopedWriteLock locker(audioLock::getAudioLock()); + vMath::setInsType((vMath::InsType)(level)); + } } \ No newline at end of file diff --git a/src/audioCore/quickAPI/QuickSet.h b/src/audioCore/quickAPI/QuickSet.h index 6eb10016..536181a3 100644 --- a/src/audioCore/quickAPI/QuickSet.h +++ b/src/audioCore/quickAPI/QuickSet.h @@ -20,4 +20,6 @@ namespace quickAPI { bool removeFromPluginBlackList(const juce::String& plugin); bool addToPluginSearchPath(const juce::String& path); bool removeFromPluginSearchPath(const juce::String& path); + + void setSIMDLevel(int level); } \ No newline at end of file diff --git a/src/audioCore/source/CloneableAudioSource.cpp b/src/audioCore/source/CloneableAudioSource.cpp index 029c1aff..d33f2ee5 100644 --- a/src/audioCore/source/CloneableAudioSource.cpp +++ b/src/audioCore/source/CloneableAudioSource.cpp @@ -1,6 +1,7 @@ #include "CloneableAudioSource.h" #include "../misc/AudioLock.h" +#include "../misc/VMath.h" #include "../Utils.h" #include using namespace org::vocalsharp::vocalshaper; @@ -142,7 +143,7 @@ void CloneableAudioSource::init(double sampleRate, int channelNum, int sampleNum /** Create Buffer */ this->buffer = juce::AudioSampleBuffer{ channelNum, sampleNum }; - this->buffer.clear(); + vMath::zeroAllAudioData(this->buffer); this->sourceSampleRate = sampleRate; /** Create Audio Source */ @@ -164,7 +165,8 @@ void CloneableAudioSource::prepareToRecord( /** Clear Buffer If Sample Rate Mismatch */ if (this->getSourceSampleRate() != sampleRate) { this->sourceSampleRate = sampleRate; - this->buffer.setSize(this->buffer.getNumChannels(), 0, false, true, true); + this->buffer.setSize(this->buffer.getNumChannels(), 0, true, false, true); + vMath::zeroAllAudioData(this->buffer); /** Set Flag */ this->changed(); @@ -175,9 +177,16 @@ void CloneableAudioSource::prepareToRecord( this->memorySource = nullptr; /** Init Buffer */ - this->buffer.setSize( - std::max(inputChannels, this->buffer.getNumChannels()), - this->buffer.getNumSamples(), true, true, true); + { + int currentChannels = this->buffer.getNumChannels(); + if (inputChannels > currentChannels) { + this->buffer.setSize(inputChannels, + this->buffer.getNumSamples(), true, false, true); + for (int i = currentChannels; i < inputChannels; i++) { + vMath::zeroAllAudioDataOnChannel(this->buffer, i); + } + } + } /** Create Audio Source */ this->memorySource = std::make_unique(this->buffer, false, false); @@ -215,15 +224,12 @@ void CloneableAudioSource::writeData( if (startSample > this->buffer.getNumSamples() - length) { int newLength = startSample + length; this->buffer.setSize( - this->buffer.getNumChannels(), newLength, true, true, true); + this->buffer.getNumChannels(), newLength, true, false, true); } /** CopyData */ for (int i = 0; i < buffer.getNumChannels() && i < this->buffer.getNumChannels(); i++) { - if (auto rptr = buffer.getReadPointer(i)) { - this->buffer.copyFrom( - i, startSample, &(rptr)[srcStartSample], length); - } + vMath::copyAudioData(this->buffer, buffer, startSample, srcStartSample, i, i, length); } /** Set Flag */ diff --git a/src/main.cpp b/src/main.cpp index c9624818..202d0d29 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -56,6 +56,7 @@ class MainApplication : public juce::JUCEApplication { auto& funcVar = ConfigManager::getInstance()->get("function"); quickAPI::setReturnToStartOnStop(funcVar["return-on-stop"]); quickAPI::setAnonymousMode(funcVar["anonymous-mode"]); + quickAPI::setSIMDLevel(funcVar["simd-speed-up"]); /** Output */ auto formats = quickAPI::getAudioFormatsSupported(true); diff --git a/src/ui/component/ConfigComponent.cpp b/src/ui/component/ConfigComponent.cpp index 4f781a6f..5504c5b1 100644 --- a/src/ui/component/ConfigComponent.cpp +++ b/src/ui/component/ConfigComponent.cpp @@ -280,12 +280,22 @@ void ConfigComponent::createFunctionPage() { auto anonymousValueCallback = []()->const juce::var { return quickAPI::getAnonymousMode(); }; + auto simdUpdateCallback = [](const juce::var& data) { + quickAPI::setSIMDLevel(data); + return true; + }; + auto simdValueCallback = []()->const juce::var { + return quickAPI::getSIMDLevel(); + }; juce::Array audioProps; audioProps.add(new ConfigBooleanProp{ "function", "return-on-stop", "Disabled", "Enabled", returnToStartUpdateCallback , returnToStartValueCallback}); audioProps.add(new ConfigBooleanProp{ "function", "anonymous-mode", "Disabled", "Enabled", anonymousUpdateCallback , anonymousValueCallback }); + audioProps.add(new ConfigChoiceProp{ "function", "simd-speed-up", + quickAPI::getAllSIMDInsName(), ConfigChoiceProp::ValueType::IndexVal, + simdUpdateCallback , simdValueCallback }); audioProps.add(new ConfigWhiteSpaceProp{}); panel->addSection(TRANS("Audio Core"), audioProps);