Skip to content

Commit

Permalink
Enabling all available SIMD extensions when testing.
Browse files Browse the repository at this point in the history
  • Loading branch information
Dawoodoz committed Feb 1, 2025
1 parent 8ba2e9c commit f8ea6b1
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 91 deletions.
8 changes: 2 additions & 6 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,11 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
architecture: [x86_32, x86_64]
architecture: [x86_32, x86_64, arm, arm64]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Run tests
run: |
cd ./Source
if [[ "${{ matrix.architecture }}" == "x86_32" ]]; then
./test.sh
elif [[ "${{ matrix.architecture }}" == "x86_64" ]]; then
./test.sh
fi
./test.sh
8 changes: 2 additions & 6 deletions .github/workflows/ci.yml.tabs
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,11 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
architecture: [x86_32, x86_64]
architecture: [x86_32, x86_64, arm, arm64]
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Run tests
run: |
cd ./Source
if [[ "${{ matrix.architecture }}" == "x86_32" ]]; then
./test.sh
elif [[ "${{ matrix.architecture }}" == "x86_64" ]]; then
./test.sh
fi
./test.sh
146 changes: 70 additions & 76 deletions Source/DFPSR/base/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -3341,87 +3341,81 @@
template <uint32_t bitOffset>
inline U8x32 bitShiftLeftImmediate(const U8x32& left) {
static_assert(bitOffset < 8u, "Immediate left shift of 32-bit values may not shift more than 7 bits!");
#if defined USE_AVX2
return U8x32(_mm256_slli_epi8(left.v, bitOffset));
#else
return U8x32(
left.scalars[ 0] << bitOffset,
left.scalars[ 1] << bitOffset,
left.scalars[ 2] << bitOffset,
left.scalars[ 3] << bitOffset,
left.scalars[ 4] << bitOffset,
left.scalars[ 5] << bitOffset,
left.scalars[ 6] << bitOffset,
left.scalars[ 7] << bitOffset,
left.scalars[ 8] << bitOffset,
left.scalars[ 9] << bitOffset,
left.scalars[10] << bitOffset,
left.scalars[11] << bitOffset,
left.scalars[12] << bitOffset,
left.scalars[13] << bitOffset,
left.scalars[14] << bitOffset,
left.scalars[15] << bitOffset,
left.scalars[16] << bitOffset,
left.scalars[17] << bitOffset,
left.scalars[18] << bitOffset,
left.scalars[19] << bitOffset,
left.scalars[20] << bitOffset,
left.scalars[21] << bitOffset,
left.scalars[22] << bitOffset,
left.scalars[23] << bitOffset,
left.scalars[24] << bitOffset,
left.scalars[25] << bitOffset,
left.scalars[26] << bitOffset,
left.scalars[27] << bitOffset,
left.scalars[28] << bitOffset,
left.scalars[29] << bitOffset,
left.scalars[30] << bitOffset,
left.scalars[31] << bitOffset
);
#endif
// TODO: Use a larger lane and a mask generated in compile time.
return U8x32(
left.scalars[ 0] << bitOffset,
left.scalars[ 1] << bitOffset,
left.scalars[ 2] << bitOffset,
left.scalars[ 3] << bitOffset,
left.scalars[ 4] << bitOffset,
left.scalars[ 5] << bitOffset,
left.scalars[ 6] << bitOffset,
left.scalars[ 7] << bitOffset,
left.scalars[ 8] << bitOffset,
left.scalars[ 9] << bitOffset,
left.scalars[10] << bitOffset,
left.scalars[11] << bitOffset,
left.scalars[12] << bitOffset,
left.scalars[13] << bitOffset,
left.scalars[14] << bitOffset,
left.scalars[15] << bitOffset,
left.scalars[16] << bitOffset,
left.scalars[17] << bitOffset,
left.scalars[18] << bitOffset,
left.scalars[19] << bitOffset,
left.scalars[20] << bitOffset,
left.scalars[21] << bitOffset,
left.scalars[22] << bitOffset,
left.scalars[23] << bitOffset,
left.scalars[24] << bitOffset,
left.scalars[25] << bitOffset,
left.scalars[26] << bitOffset,
left.scalars[27] << bitOffset,
left.scalars[28] << bitOffset,
left.scalars[29] << bitOffset,
left.scalars[30] << bitOffset,
left.scalars[31] << bitOffset
);
}
// bitOffset must be an immediate constant from 0 to 31, so a template argument is used.
template <uint32_t bitOffset>
inline U8x32 bitShiftRightImmediate(const U8x32& left) {
static_assert(bitOffset < 8u, "Immediate right shift of 32-bit values may not shift more than 7 bits!");
#if defined USE_AVX2
return U8x32(_mm256_srli_epi8(left.v, bitOffset));
#else
return U8x32(
left.scalars[ 0] >> bitOffset,
left.scalars[ 1] >> bitOffset,
left.scalars[ 2] >> bitOffset,
left.scalars[ 3] >> bitOffset,
left.scalars[ 4] >> bitOffset,
left.scalars[ 5] >> bitOffset,
left.scalars[ 6] >> bitOffset,
left.scalars[ 7] >> bitOffset,
left.scalars[ 8] >> bitOffset,
left.scalars[ 9] >> bitOffset,
left.scalars[10] >> bitOffset,
left.scalars[11] >> bitOffset,
left.scalars[12] >> bitOffset,
left.scalars[13] >> bitOffset,
left.scalars[14] >> bitOffset,
left.scalars[15] >> bitOffset,
left.scalars[16] >> bitOffset,
left.scalars[17] >> bitOffset,
left.scalars[18] >> bitOffset,
left.scalars[19] >> bitOffset,
left.scalars[20] >> bitOffset,
left.scalars[21] >> bitOffset,
left.scalars[22] >> bitOffset,
left.scalars[23] >> bitOffset,
left.scalars[24] >> bitOffset,
left.scalars[25] >> bitOffset,
left.scalars[26] >> bitOffset,
left.scalars[27] >> bitOffset,
left.scalars[28] >> bitOffset,
left.scalars[29] >> bitOffset,
left.scalars[30] >> bitOffset,
left.scalars[31] >> bitOffset
);
#endif
// TODO: Use a larger lane and a mask generated in compile time.
return U8x32(
left.scalars[ 0] >> bitOffset,
left.scalars[ 1] >> bitOffset,
left.scalars[ 2] >> bitOffset,
left.scalars[ 3] >> bitOffset,
left.scalars[ 4] >> bitOffset,
left.scalars[ 5] >> bitOffset,
left.scalars[ 6] >> bitOffset,
left.scalars[ 7] >> bitOffset,
left.scalars[ 8] >> bitOffset,
left.scalars[ 9] >> bitOffset,
left.scalars[10] >> bitOffset,
left.scalars[11] >> bitOffset,
left.scalars[12] >> bitOffset,
left.scalars[13] >> bitOffset,
left.scalars[14] >> bitOffset,
left.scalars[15] >> bitOffset,
left.scalars[16] >> bitOffset,
left.scalars[17] >> bitOffset,
left.scalars[18] >> bitOffset,
left.scalars[19] >> bitOffset,
left.scalars[20] >> bitOffset,
left.scalars[21] >> bitOffset,
left.scalars[22] >> bitOffset,
left.scalars[23] >> bitOffset,
left.scalars[24] >> bitOffset,
left.scalars[25] >> bitOffset,
left.scalars[26] >> bitOffset,
left.scalars[27] >> bitOffset,
left.scalars[28] >> bitOffset,
left.scalars[29] >> bitOffset,
left.scalars[30] >> bitOffset,
left.scalars[31] >> bitOffset
);
}

inline U16x16 operator+(const U16x16& left, const U16x16& right) {
Expand Down
7 changes: 4 additions & 3 deletions Source/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,18 @@ TEMP_ROOT=${ROOT_PATH}/../../temporary
CPP_VERSION=-std=c++14
MODE="-DDEBUG"
DEBUGGER="-g"
SIMD="-march=native"
O_LEVEL=-O2

chmod +x ${ROOT_PATH}/tools/build.sh;
${ROOT_PATH}/tools/buildScripts/build.sh "NONE" "NONE" "${ROOT_PATH}" "${TEMP_ROOT}" "NONE" "${MODE} ${DEBUGGER} ${CPP_VERSION} ${O_LEVEL}";
${ROOT_PATH}/tools/buildScripts/build.sh "NONE" "NONE" "${ROOT_PATH}" "${TEMP_ROOT}" "NONE" "${MODE} ${DEBUGGER} ${SIMD} ${CPP_VERSION} ${O_LEVEL}";
if [ $? -ne 0 ]
then
exit 1
fi

# Get the specific temporary sub-folder for the compilation settings
TEMP_SUB="${MODE}_${DEBUGGER}_${CPP_VERSION}_${O_LEVEL}"
TEMP_SUB="${MODE}_${DEBUGGER}_${SIMD}_${CPP_VERSION}_${O_LEVEL}"
TEMP_SUB=$(echo $TEMP_SUB | tr "+" "p")
TEMP_SUB=$(echo $TEMP_SUB | tr -d " =-")
TEMP_DIR=${TEMP_ROOT}/${TEMP_SUB}
Expand All @@ -31,7 +32,7 @@ for file in ./test/tests/*.cpp; do
rm -f ${TEMP_DIR}/application;
# Compile test case that defines main
echo "Compiling ${name}";
g++ ${CPP_VERSION} ${MODE} ${DEBUGGER} -c ${file} -o ${TEMP_DIR}/${base}_test.o;
g++ ${CPP_VERSION} ${MODE} ${DEBUGGER} ${SIMD} -c ${file} -o ${TEMP_DIR}/${base}_test.o;
if [ $? -ne 0 ]
then
exit 1
Expand Down

0 comments on commit f8ea6b1

Please sign in to comment.