Skip to content

Commit

Permalink
Add SIMD speed up
Browse files Browse the repository at this point in the history
  • Loading branch information
FangCunWuChang committed Jan 11, 2024
1 parent 58931c9 commit 47ad997
Show file tree
Hide file tree
Showing 12 changed files with 271 additions and 26 deletions.
3 changes: 2 additions & 1 deletion app/data/config/function.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
{
"return-on-stop": true,
"anonymous-mode": false,
"simd-speed-up": 3,
"cpu-painting": false
}
1 change: 1 addition & 0 deletions app/translates/zh-CN/config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ countries: cn
"Audio Core" = "音频内核"
"return-on-stop" = "在停止时返回"
"anonymous-mode" = "匿名模式"
"simd-speed-up" = "SIMD加速"
"Performance" = "性能"
"cpu-painting" = "CPU绘图"
"System" = "系统"
Expand Down
7 changes: 4 additions & 3 deletions src/audioCore/graph/MainGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "../misc/PlayPosition.h"
#include "../misc/Renderer.h"
#include "../misc/AudioLock.h"
#include "../misc/VMath.h"
#include "../source/CloneableSourceManager.h"
#include "../AudioCore.h"
#include "../Utils.h"
Expand Down Expand Up @@ -500,7 +501,7 @@ void MainGraph::processBlock(juce::AudioBuffer<float>& audio, juce::MidiBuffer&
juce::ScopedTryReadLock mackieLocker(audioLock::getMackieLock());
if (!(audioLocker.isLocked() && pluginLocker.isLocked()
&& sourceLocker.isLocked() && positionLocker.isLocked() && mackieLocker.isLocked())) {
audio.clear();
vMath::zeroAllAudioData(audio);
midi.clear();
return;
}
Expand Down Expand Up @@ -552,7 +553,7 @@ void MainGraph::processBlock(juce::AudioBuffer<float>& audio, juce::MidiBuffer&

/** Truncate Input */
if (isRendering) {
audio.clear();
vMath::zeroAllAudioData(audio);
midi.clear();
}

Expand All @@ -565,7 +566,7 @@ void MainGraph::processBlock(juce::AudioBuffer<float>& audio, juce::MidiBuffer&

/** Truncate Output */
if (isRendering) {
audio.clear();
vMath::zeroAllAudioData(audio);
midi.clear();
}

Expand Down
207 changes: 195 additions & 12 deletions src/audioCore/misc/VMath.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,51 +24,179 @@ namespace vMath {
}

static void copyDataSSE3(float* dst, const float* src, int length) {
/** TODO */
int clipSize = sizeof(__m128) / sizeof(float);
int clipNum = length / clipSize;
int clipMax = clipNum * clipSize;

for (int i = 0; i < clipMax; i+= clipSize) {
__m128 data = _mm_loadu_ps(&(src[i]));
_mm_storeu_ps(&(dst[i]), data);
}

copyDataNormal(&(dst[clipMax]), &(src[clipMax]), length - clipMax);
}

static void addDataSSE3(float* dst, const float* src, int length) {
/** TODO */
int clipSize = sizeof(__m128) / sizeof(float);
int clipNum = length / clipSize;
int clipMax = clipNum * clipSize;

for (int i = 0; i < clipMax; i += clipSize) {
__m128 data0 = _mm_loadu_ps(&(dst[i]));
__m128 data1 = _mm_loadu_ps(&(src[i]));
__m128 result = _mm_add_ps(data0, data1);
_mm_storeu_ps(&(dst[i]), result);
}

addDataNormal(&(dst[clipMax]), &(src[clipMax]), length - clipMax);
}

static void zeroDataSSE3(float* dst, int length) {
/** TODO */
int clipSize = sizeof(__m128) / sizeof(float);
int clipNum = length / clipSize;
int clipMax = clipNum * clipSize;

constexpr __m128 zeroV = { 0.f, 0.f, 0.f, 0.f };
for (int i = 0; i < clipMax; i += clipSize) {
_mm_storeu_ps(&(dst[i]), zeroV);
}

zeroDataNormal(&(dst[clipMax]), length - clipMax);
}

static void averageDataSSE3(float* dst, const float* src, int length) {
/** TODO */
int clipSize = sizeof(__m128) / sizeof(float);
int clipNum = length / clipSize;
int clipMax = clipNum * clipSize;

constexpr __m128 averV = { .5f, .5f, .5f, .5f };
for (int i = 0; i < clipMax; i += clipSize) {
__m128 data0 = _mm_loadu_ps(&(dst[i]));
__m128 data1 = _mm_loadu_ps(&(src[i]));
__m128 sum = _mm_add_ps(data0, data1);
__m128 result = _mm_mul_ps(sum, averV);
_mm_storeu_ps(&(dst[i]), result);
}

averageDataNormal(&(dst[clipMax]), &(src[clipMax]), length - clipMax);
}

static void copyDataAVX2(float* dst, const float* src, int length) {
/** TODO */
int clipSize = sizeof(__m256) / sizeof(float);
int clipNum = length / clipSize;
int clipMax = clipNum * clipSize;

for (int i = 0; i < clipMax; i += clipSize) {
__m256 data = _mm256_loadu_ps(&(src[i]));
_mm256_storeu_ps(&(dst[i]), data);
}

copyDataNormal(&(dst[clipMax]), &(src[clipMax]), length - clipMax);
}

static void addDataAVX2(float* dst, const float* src, int length) {
/** TODO */
int clipSize = sizeof(__m256) / sizeof(float);
int clipNum = length / clipSize;
int clipMax = clipNum * clipSize;

for (int i = 0; i < clipMax; i += clipSize) {
__m256 data0 = _mm256_loadu_ps(&(dst[i]));
__m256 data1 = _mm256_loadu_ps(&(src[i]));
__m256 result = _mm256_add_ps(data0, data1);
_mm256_storeu_ps(&(dst[i]), result);
}

addDataNormal(&(dst[clipMax]), &(src[clipMax]), length - clipMax);
}

static void zeroDataAVX2(float* dst, int length) {
/** TODO */
int clipSize = sizeof(__m256) / sizeof(float);
int clipNum = length / clipSize;
int clipMax = clipNum * clipSize;

constexpr __m256 zeroV = { 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f };
for (int i = 0; i < clipMax; i += clipSize) {
_mm256_storeu_ps(&(dst[i]), zeroV);
}

zeroDataNormal(&(dst[clipMax]), length - clipMax);
}

static void averageDataAVX2(float* dst, const float* src, int length) {
/** TODO */
int clipSize = sizeof(__m256) / sizeof(float);
int clipNum = length / clipSize;
int clipMax = clipNum * clipSize;

constexpr __m256 averV = { .5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f };
for (int i = 0; i < clipMax; i += clipSize) {
__m256 data0 = _mm256_loadu_ps(&(dst[i]));
__m256 data1 = _mm256_loadu_ps(&(src[i]));
__m256 sum = _mm256_add_ps(data0, data1);
__m256 result = _mm256_mul_ps(sum, averV);
_mm256_storeu_ps(&(dst[i]), result);
}

averageDataNormal(&(dst[clipMax]), &(src[clipMax]), length - clipMax);
}

static void copyDataAVX512(float* dst, const float* src, int length) {
/** TODO */
int clipSize = sizeof(__m512) / sizeof(float);
int clipNum = length / clipSize;
int clipMax = clipNum * clipSize;

for (int i = 0; i < clipMax; i += clipSize) {
__m512 data = _mm512_loadu_ps(&(src[i]));
_mm512_storeu_ps(&(dst[i]), data);
}

copyDataNormal(&(dst[clipMax]), &(src[clipMax]), length - clipMax);
}

static void addDataAVX512(float* dst, const float* src, int length) {
/** TODO */
int clipSize = sizeof(__m512) / sizeof(float);
int clipNum = length / clipSize;
int clipMax = clipNum * clipSize;

for (int i = 0; i < clipMax; i += clipSize) {
__m512 data0 = _mm512_loadu_ps(&(dst[i]));
__m512 data1 = _mm512_loadu_ps(&(src[i]));
__m512 result = _mm512_add_ps(data0, data1);
_mm512_storeu_ps(&(dst[i]), result);
}

addDataNormal(&(dst[clipMax]), &(src[clipMax]), length - clipMax);
}

static void zeroDataAVX512(float* dst, int length) {
/** TODO */
int clipSize = sizeof(__m512) / sizeof(float);
int clipNum = length / clipSize;
int clipMax = clipNum * clipSize;

constexpr __m512 zeroV = {
0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f };
for (int i = 0; i < clipMax; i += clipSize) {
_mm512_storeu_ps(&(dst[i]), zeroV);
}

zeroDataNormal(&(dst[clipMax]), length - clipMax);
}

static void averageDataAVX512(float* dst, const float* src, int length) {
/** TODO */
int clipSize = sizeof(__m512) / sizeof(float);
int clipNum = length / clipSize;
int clipMax = clipNum * clipSize;

constexpr __m512 averV = {
.5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f, .5f };
for (int i = 0; i < clipMax; i += clipSize) {
__m512 data0 = _mm512_loadu_ps(&(dst[i]));
__m512 data1 = _mm512_loadu_ps(&(src[i]));
__m512 sum = _mm512_add_ps(data0, data1);
__m512 result = _mm512_mul_ps(sum, averV);
_mm512_storeu_ps(&(dst[i]), result);
}

averageDataNormal(&(dst[clipMax]), &(src[clipMax]), length - clipMax);
}

static InsType type = InsType::Normal;
Expand Down Expand Up @@ -109,4 +237,59 @@ namespace vMath {
InsType getInsType() {
return vMath::type;
}

constexpr std::array<const char*, InsType::MaxNum> insTypeNameList{
"Normal", "SSE3", "AVX2", "AVX512" };

const juce::String getInsTypeName() {
return vMath::insTypeNameList[(int)(vMath::getInsType())];
}

const juce::StringArray getAllInsTypeName() {
juce::StringArray result;
for (auto s : insTypeNameList) {
result.add(juce::String{ s });
}
return result;
}

void copyAudioData(juce::AudioSampleBuffer& dst, const juce::AudioSampleBuffer& src,
int dstStartSample, int srcStartSample, int dstChannel, int srcChannel, int length) {
auto wPtr = dst.getWritePointer(dstChannel);
auto rPtr = src.getReadPointer(srcChannel);
if (!wPtr || !rPtr) { return; }

copyData(&(wPtr[dstStartSample]), &(rPtr[srcStartSample]), length);
}

void addAudioData(juce::AudioSampleBuffer& dst, const juce::AudioSampleBuffer& src,
int dstStartSample, int srcStartSample, int dstChannel, int srcChannel, int length) {
auto wPtr = dst.getWritePointer(dstChannel);
auto rPtr = src.getReadPointer(srcChannel);
if (!wPtr || !rPtr) { return; }

addData(&(wPtr[dstStartSample]), &(rPtr[srcStartSample]), length);
}

void zeroAudioData(juce::AudioSampleBuffer& dst,
int dstStartSample, int dstChannel, int length) {
auto wPtr = dst.getWritePointer(dstChannel);
if (!wPtr) { return; }

zeroData(&(wPtr[dstStartSample]), length);
}
void zeroAllAudioChannels(juce::AudioSampleBuffer& dst,
int dstStartSample, int length) {
for (int i = 0; i < dst.getNumChannels(); i++) {
zeroAudioData(dst, dstStartSample, i, length);
}
}
void zeroAllAudioDataOnChannel(juce::AudioSampleBuffer& dst, int dstChannel) {
zeroAudioData(dst, 0, dstChannel, dst.getNumSamples());
}
void zeroAllAudioData(juce::AudioSampleBuffer& dst) {
for (int i = 0; i < dst.getNumChannels(); i++) {
zeroAllAudioDataOnChannel(dst, i);
}
}
}
13 changes: 13 additions & 0 deletions src/audioCore/misc/VMath.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,17 @@ namespace vMath {

void setInsType(InsType type);
InsType getInsType();
const juce::String getInsTypeName();
const juce::StringArray getAllInsTypeName();

void copyAudioData(juce::AudioSampleBuffer& dst, const juce::AudioSampleBuffer& src,
int dstStartSample, int srcStartSample, int dstChannel, int srcChannel, int length);
void addAudioData(juce::AudioSampleBuffer& dst, const juce::AudioSampleBuffer& src,
int dstStartSample, int srcStartSample, int dstChannel, int srcChannel, int length);
void zeroAudioData(juce::AudioSampleBuffer& dst,
int dstStartSample, int dstChannel, int length);
void zeroAllAudioChannels(juce::AudioSampleBuffer& dst,
int dstStartSample, int length);
void zeroAllAudioDataOnChannel(juce::AudioSampleBuffer& dst, int dstChannel);
void zeroAllAudioData(juce::AudioSampleBuffer& dst);
}
13 changes: 13 additions & 0 deletions src/audioCore/quickAPI/QuickGet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "../plugin/Plugin.h"
#include "../misc/Device.h"
#include "../misc/PlayPosition.h"
#include "../misc/VMath.h"

namespace quickAPI {
juce::Component* getAudioDebugger() {
Expand Down Expand Up @@ -154,4 +155,16 @@ namespace quickAPI {
const juce::StringArray getPluginSearchPath() {
return Plugin::getInstance()->getPluginSearchPath();
}

int getSIMDLevel() {
return (int)(vMath::getInsType());
}

const juce::String getSIMDInsName() {
return vMath::getInsTypeName();
}

const juce::StringArray getAllSIMDInsName() {
return vMath::getAllInsTypeName();
}
}
4 changes: 4 additions & 0 deletions src/audioCore/quickAPI/QuickGet.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,8 @@ namespace quickAPI {

const juce::StringArray getPluginBlackList();
const juce::StringArray getPluginSearchPath();

int getSIMDLevel();
const juce::String getSIMDInsName();
const juce::StringArray getAllSIMDInsName();
}
10 changes: 10 additions & 0 deletions src/audioCore/quickAPI/QuickSet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#include "../AudioConfig.h"
#include "../AudioCore.h"
#include "../plugin/Plugin.h"
#include "../misc/AudioLock.h"
#include "../misc/VMath.h"

namespace quickAPI {
void setPluginSearchPathListFilePath(const juce::String& path) {
Expand Down Expand Up @@ -64,4 +66,12 @@ namespace quickAPI {
Plugin::getInstance()->removeFromPluginSearchPath(path);
return true;
}

void setSIMDLevel(int level) {
if (level >= (int)(vMath::InsType::MaxNum)) { level = (int)(vMath::InsType::MaxNum) - 1; }
if (level < 0) { level = 0; }

juce::ScopedWriteLock locker(audioLock::getAudioLock());
vMath::setInsType((vMath::InsType)(level));
}
}
2 changes: 2 additions & 0 deletions src/audioCore/quickAPI/QuickSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@ namespace quickAPI {
bool removeFromPluginBlackList(const juce::String& plugin);
bool addToPluginSearchPath(const juce::String& path);
bool removeFromPluginSearchPath(const juce::String& path);

void setSIMDLevel(int level);
}
Loading

0 comments on commit 47ad997

Please sign in to comment.