From 7fc4c0feef1cd29cc2aebdb82935e7154ddf204e Mon Sep 17 00:00:00 2001 From: Hansem Ro Date: Thu, 17 Aug 2023 01:08:06 -0700 Subject: [PATCH 1/3] Add V2/V4 Siglent BIN import filter This allows importing of analog/math/digital waveforms from V2/V4 Siglent BIN waveform formats. Siglent scopes that export V2 BIN format: - SDS2000X Plus with FW 1.2.6 to 1.3.9RX - SDS5000X with FW 0.8.6 to 0.9.6 - SDS6000 with FW older than 1.4.1.0 Siglent scopes that export V4 BIN format: - SDS2000X Plus with FW newer than 1.4.0 - SDS5000X with FW newer than 0.9.6 - SDS6000 with FW newer than 1.4.1.0 - SDS2000X HD V2 and V4 spec details can be found on page 29 and 37 of E02A document, respectively. E02A: https://web.archive.org/web/20230730072643/https://www.siglenteu.com/wp-content/uploads/2021/08/How-to-Extract-Data-from-the-Binary-File.pdf --- scopeprotocols/CMakeLists.txt | 1 + scopeprotocols/SiglentBINImportFilter.cpp | 281 ++++++++++++++++++++++ scopeprotocols/SiglentBINImportFilter.h | 103 ++++++++ scopeprotocols/scopeprotocols.cpp | 1 + scopeprotocols/scopeprotocols.h | 1 + 5 files changed, 387 insertions(+) create mode 100644 scopeprotocols/SiglentBINImportFilter.cpp create mode 100644 scopeprotocols/SiglentBINImportFilter.h diff --git a/scopeprotocols/CMakeLists.txt b/scopeprotocols/CMakeLists.txt index 8e6db270..5283b94f 100644 --- a/scopeprotocols/CMakeLists.txt +++ b/scopeprotocols/CMakeLists.txt @@ -131,6 +131,7 @@ set(SCOPEPROTOCOLS_SOURCES SDCmdDecoder.cpp SDDataDecoder.cpp SDRAMDecoderBase.cpp + SiglentBINImportFilter.cpp SNRFilter.cpp SParameterCascadeFilter.cpp SParameterDeEmbedFilter.cpp diff --git a/scopeprotocols/SiglentBINImportFilter.cpp b/scopeprotocols/SiglentBINImportFilter.cpp new file mode 100644 index 00000000..5a8ef536 --- /dev/null +++ b/scopeprotocols/SiglentBINImportFilter.cpp @@ -0,0 +1,281 @@ +/*********************************************************************************************************************** +* * +* libscopeprotocols * +* * +* Copyright (c) 2012-2023 Andrew D. Zonenberg and contributors * +* All rights reserved. * +* * +* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the * +* following conditions are met: * +* * +* * Redistributions of source code must retain the above copyright notice, this list of conditions, and the * +* following disclaimer. * +* * +* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the * +* following disclaimer in the documentation and/or other materials provided with the distribution. * +* * +* * Neither the name of the author nor the names of any contributors may be used to endorse or promote products * +* derived from this software without specific prior written permission. * +* * +* THIS SOFTWARE IS PROVIDED BY THE AUTHORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * +* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * +* THE AUTHORS BE HELD LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * +* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * +* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * +* POSSIBILITY OF SUCH DAMAGE. * +* * +***********************************************************************************************************************/ + +#include "../scopehal/scopehal.h" +#include "SiglentBINImportFilter.h" + +using namespace std; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Construction / destruction + +SiglentBINImportFilter::SiglentBINImportFilter(const string& color) + : ImportFilter(color) +{ + m_fpname = "Siglent (V2/V4) BIN File"; + m_parameters[m_fpname] = FilterParameter(FilterParameter::TYPE_FILENAME, Unit(Unit::UNIT_COUNTS)); + m_parameters[m_fpname].m_fileFilterMask = "*.bin"; + m_parameters[m_fpname].m_fileFilterName = "V2/V4 Siglent binary waveform files (*.bin)"; + m_parameters[m_fpname].signal_changed().connect(sigc::mem_fun(*this, &SiglentBINImportFilter::OnFileNameChanged)); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Accessors + +string SiglentBINImportFilter::GetProtocolName() +{ + return "Siglent (V2/V4) BIN Import"; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Actual decoder logic + +void SiglentBINImportFilter::OnFileNameChanged() +{ + //Wipe anything we may have had in the past + ClearStreams(); + + auto fname = m_parameters[m_fpname].ToString(); + if(fname.empty()) + return; + + //Set waveform timestamp to file timestamp + time_t timestamp = 0; + int64_t fs = 0; + GetTimestampOfFile(fname, timestamp, fs); + + string f = ReadFile(fname); + uint32_t fpos = 0; + + FileHeader fh; + f.copy((char*)&fh, sizeof(FileHeader), fpos); + fpos += sizeof(FileHeader); + + switch(fh.version) + { + case 2: + break; + case 4: + fpos += 4; + break; + default: + LogError("Unsupported version (%d) in file header\n", fh.version); + return; + } + + LogDebug("Version: %d\n", fh.version); + + //Parse waveform header + WaveHeader wh; + f.copy((char*)&wh, sizeof(WaveHeader), fpos); + fpos += sizeof(WaveHeader); + + for(int i = 0; i < 4; i++) + { + LogDebug("ch%d_en: %d\n", i+1, wh.ch_en[i]); + LogDebug("ch%d_v_gain: %f\n", i+1, wh.ch_v_gain[i].value); + LogDebug("ch%d_v_offset: %f\n", i+1, wh.ch_v_offset[i].value); + LogDebug("ch%d_probe: %f\n", i+1, wh.ch_probe[i]); + LogDebug("ch%d_codes_per_div: %d\n", i+1, wh.ch_codes_per_div[i]); + } + + LogDebug("digital_en: %d\n", wh.digital_en); + for(int i = 0; i < 16; i++) + { + LogDebug("d%d_ch_en: %d\n", i, wh.d_ch_en[i]); + } + + LogDebug("time_div: %f\n", wh.time_div); + LogDebug("time_delay: %f\n", wh.time_delay); + LogDebug("wave_length: %d\n", wh.wave_length); + LogDebug("s_rate: %f\n", wh.s_rate); + LogDebug("d_wave_length: %d\n", wh.d_wave_length); + LogDebug("d_s_rate: %f\n", wh.d_s_rate); + + LogDebug("data_width: %d\n", wh.data_width); + LogDebug("byte_order: %d\n", wh.byte_order); + LogDebug("num_hori_div: %d\n", wh.num_hori_div); + + for(int i = 0; i < 4; i++) + { + LogDebug("math%d_en: %d\n", i+1, wh.math_en[i]); + LogDebug("math%d_v_gain: %f\n", i+1, wh.math_v_gain[i].value); + LogDebug("math%d_v_offset: %f\n", i+1, wh.math_v_offset[i].value); + LogDebug("math%d_wave_length: %d\n", i+1, wh.math_wave_length[i]); + LogDebug("math%d_s_interval: %f\n", i+1, wh.math_s_interval[i]); + } + LogDebug("math_codes_per_div: %d\n", wh.math_codes_per_div); + + switch(fh.version) + { + case 2: + fpos = 0x800; + break; + case 4: + fpos = 0x1000; + break; + default: + LogError("Unsupported version (%d) in file header\n", fh.version); + return; + } + + //Process analog data + uint32_t data_width = wh.data_width + 1; // number of bytes + int32_t center_code = (1 << (8*data_width - 1)) - 1; + + uint32_t wave_idx = 0; + for(int i = 0; i < 4; i++) + { + if(wh.ch_en[i] == 1) + { + string name = string("C") + to_string(i+1); + AddStream(Unit(Unit::UNIT_VOLTS), name, Stream::STREAM_TYPE_ANALOG); + auto wfm = new UniformAnalogWaveform; + wfm->m_timescale = round(FS_PER_SECOND / wh.s_rate); + wfm->m_startTimestamp = timestamp * FS_PER_SECOND; + wfm->m_startFemtoseconds = fs; + wfm->m_triggerPhase = 0; + wfm->PrepareForCpuAccess(); + SetData(wfm, m_streams.size() - 1); + + LogDebug("Waveform[%d]: %s\n", wave_idx, name.c_str()); + double v_gain = wh.ch_v_gain[i].value * wh.ch_probe[i] / wh.ch_codes_per_div[i]; + LogDebug("\tv_gain: %f\n", v_gain); + LogDebug("\tcenter: %d\n", center_code); + + if(data_width == 2) + { + for(size_t j = 0; j < wh.wave_length; j++) + { + const uint16_t* sample = reinterpret_cast(f.c_str() + fpos); + float value = ((static_cast(*sample) - center_code)) * v_gain - wh.ch_v_offset[i].value; + wfm->m_samples.push_back(value); + fpos += 2; + } + } + else + { + for(size_t j = 0; j < wh.wave_length; j++) + { + const uint8_t* sample = reinterpret_cast(f.c_str() + fpos); + float value = (static_cast(*sample) - center_code) * v_gain - wh.ch_v_offset[i].value; + wfm->m_samples.push_back(value); + fpos += 1; + } + } + + wfm->MarkModifiedFromCpu(); + wave_idx += 1; + } + } + + //Process math data + for(int i = 0; i < 4; i++) + { + if(wh.math_en[i] == 1) + { + string name = string("F") + to_string(i+1); + AddStream(Unit(Unit::UNIT_VOLTS), name, Stream::STREAM_TYPE_ANALOG); + auto wfm = new UniformAnalogWaveform; + wfm->m_timescale = round(wh.math_s_interval[i] * FS_PER_SECOND); + wfm->m_startTimestamp = timestamp * FS_PER_SECOND; + wfm->m_startFemtoseconds = fs; + wfm->m_triggerPhase = 0; + wfm->PrepareForCpuAccess(); + SetData(wfm, m_streams.size() - 1); + + LogDebug("Waveform[%d]: %s\n", wave_idx, name.c_str()); + double v_gain = wh.math_v_gain[i].value / wh.math_codes_per_div; + LogDebug("\tv_gain: %f\n", v_gain); + LogDebug("\tcenter: %d\n", center_code); + + if(data_width == 2) + { + for(size_t j = 0; j < wh.math_wave_length[i]; j++) + { + const uint16_t* sample = reinterpret_cast(f.c_str() + fpos); + float value = ((static_cast(*sample) - center_code)) * v_gain - wh.math_v_offset[i].value; + wfm->m_samples.push_back(value); + fpos += 2; + } + } + else + { + for(size_t j = 0; j < wh.math_wave_length[i]; j++) + { + const uint8_t* sample = reinterpret_cast(f.c_str() + fpos); + float value = (static_cast(*sample) - center_code) * v_gain - wh.math_v_offset[i].value; + wfm->m_samples.push_back(value); + fpos += 1; + } + } + + wfm->MarkModifiedFromCpu(); + wave_idx += 1; + } + } + + //Process digital data + if(wh.digital_en) + { + for(int i = 0; i < 16; i++) + { + if(wh.d_ch_en[i] == 1) + { + string name = string("D") + to_string(i); + AddStream(Unit(Unit::UNIT_VOLTS), name, Stream::STREAM_TYPE_DIGITAL); + auto wfm = new UniformDigitalWaveform; + wfm->m_timescale = round(FS_PER_SECOND / wh.d_s_rate); + wfm->m_startTimestamp = timestamp * FS_PER_SECOND; + wfm->m_startFemtoseconds = fs; + wfm->m_triggerPhase = 0; + wfm->PrepareForCpuAccess(); + SetData(wfm, m_streams.size() - 1); + + LogDebug("Waveform[%d]: %s\n", wave_idx, name.c_str()); + for(size_t j = 0; j < (wh.d_wave_length / 8); j++) + { + uint8_t samples = *reinterpret_cast(f.c_str() + fpos); + for(int k = 0; k < 8; k++) + { + bool value = samples & 0x1; + samples >>= 1; + wfm->m_samples.push_back(value); + } + fpos += 1; + } + + wfm->MarkModifiedFromCpu(); + wave_idx += 1; + } + } + } + + m_outputsChangedSignal.emit(); +} diff --git a/scopeprotocols/SiglentBINImportFilter.h b/scopeprotocols/SiglentBINImportFilter.h new file mode 100644 index 00000000..03a8a9bf --- /dev/null +++ b/scopeprotocols/SiglentBINImportFilter.h @@ -0,0 +1,103 @@ +/*********************************************************************************************************************** +* * +* libscopeprotocols * +* * +* Copyright (c) 2012-2023 Andrew D. Zonenberg and contributors * +* All rights reserved. * +* * +* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the * +* following conditions are met: * +* * +* * Redistributions of source code must retain the above copyright notice, this list of conditions, and the * +* following disclaimer. * +* * +* * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the * +* following disclaimer in the documentation and/or other materials provided with the distribution. * +* * +* * Neither the name of the author nor the names of any contributors may be used to endorse or promote products * +* derived from this software without specific prior written permission. * +* * +* THIS SOFTWARE IS PROVIDED BY THE AUTHORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * +* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * +* THE AUTHORS BE HELD LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * +* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * +* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * +* POSSIBILITY OF SUCH DAMAGE. * +* * +***********************************************************************************************************************/ + +/** + @file + @author Andrew D. Zonenberg + @brief Declaration of SiglentBINImportFilter + */ +#ifndef SiglentBINImportFilter_h +#define SiglentBINImportFilter_h + +class SiglentBINImportFilter : public ImportFilter +{ +public: + SiglentBINImportFilter(const std::string& color); + + static std::string GetProtocolName(); + + PROTOCOL_DECODER_INITPROC(SiglentBINImportFilter) + + //Siglent binary capture structs + #pragma pack(push, 1) + struct FileHeader + { + uint32_t version; //File format version + }; + + // V2/V4 wave header + struct WaveHeader + { + int32_t ch_en[4]; //C1-C4 channel enable + struct { //C1-C4 vertical gain + double value; + char reserved[32]; + } ch_v_gain[4]; + struct { //C1-C4 vertical offset + double value; + char reserved[32]; + } ch_v_offset[4]; + int32_t digital_en; //Digital enable + int32_t d_ch_en[16]; //D0-D15 channel enable + double time_div; //Time base + char reserved9[32]; + double time_delay; //Trigger delay + char reserved10[32]; + uint32_t wave_length; //Number of samples in each analog waveform + double s_rate; //C1-C4 sampling rate + char reserved11[32]; + uint32_t d_wave_length; //Number of samples in each digital waveform + double d_s_rate; //D0-D15 sampling rate + char reserved12[32]; + double ch_probe[4]; //C1-C4 probe factor + int8_t data_width; //0:1 Byte, 1:2 Bytes + int8_t byte_order; //0:LSB, 1:MSB + char reserved13[6]; + int32_t num_hori_div; //Number of horizontal divisions + int32_t ch_codes_per_div[4];//C1-C4 codes per division + int32_t math_en[4]; //F1-F4 channel enable + struct { //F1-F4 vertical gain + double value; + char reserved[32]; + } math_v_gain[4]; + struct { //F1-F2 vertical offset + double value; + char reserved[32]; + } math_v_offset[4]; + uint32_t math_wave_length[4];//F1-F4 number of samples + double math_s_interval[4]; //F1-F4 sampling interval + int32_t math_codes_per_div; //F1-F4 codes per division + }; + #pragma pack(pop) + +protected: + void OnFileNameChanged(); +}; + +#endif diff --git a/scopeprotocols/scopeprotocols.cpp b/scopeprotocols/scopeprotocols.cpp index daed916b..642d94d5 100644 --- a/scopeprotocols/scopeprotocols.cpp +++ b/scopeprotocols/scopeprotocols.cpp @@ -165,6 +165,7 @@ void ScopeProtocolStaticInit() AddDecoderClass(ScaleFilter); AddDecoderClass(SDCmdDecoder); AddDecoderClass(SDDataDecoder); + AddDecoderClass(SiglentBINImportFilter); AddDecoderClass(SNRFilter); AddDecoderClass(SParameterCascadeFilter); AddDecoderClass(SParameterDeEmbedFilter); diff --git a/scopeprotocols/scopeprotocols.h b/scopeprotocols/scopeprotocols.h index 228bacb5..76efccc2 100644 --- a/scopeprotocols/scopeprotocols.h +++ b/scopeprotocols/scopeprotocols.h @@ -165,6 +165,7 @@ #include "ScaleFilter.h" #include "SDCmdDecoder.h" #include "SDDataDecoder.h" +#include "SiglentBINImportFilter.h" #include "SNRFilter.h" #include "SParameterCascadeFilter.h" #include "SParameterDeEmbedFilter.h" From 4cbd793bac82234743cb55a43007be0dbc95677a Mon Sep 17 00:00:00 2001 From: Hansem Ro Date: Tue, 22 Aug 2023 16:51:50 -0700 Subject: [PATCH 2/3] SiglentBINImportFilter: AVX2 + FMA implementation for analog/math channels This also defines Oscilloscope::ConvertUnsigned16BitSamples with generic, AVX2, and FMA implementations. --- scopehal/Oscilloscope.cpp | 218 ++++++++++++++++++++++ scopehal/Oscilloscope.h | 7 + scopeprotocols/SiglentBINImportFilter.cpp | 58 +++--- 3 files changed, 255 insertions(+), 28 deletions(-) diff --git a/scopehal/Oscilloscope.cpp b/scopehal/Oscilloscope.cpp index 877825a0..4c94faf0 100644 --- a/scopehal/Oscilloscope.cpp +++ b/scopehal/Oscilloscope.cpp @@ -1277,3 +1277,221 @@ void Oscilloscope::Convert16BitSamplesAVX512F(float* pout, int16_t* pin, float g } #endif /* __x86_64__ */ +/** + @brief Converts Unsigned 16-bit ADC samples to floating point + */ +void Oscilloscope::ConvertUnsigned16BitSamples(float* pout, uint16_t* pin, float gain, float offset, size_t count) +{ + //Divide large waveforms (>1M points) into blocks and multithread them + //TODO: tune split + if(count > 1000000) + { + //Round blocks to multiples of 64 samples for clean vectorization + size_t numblocks = omp_get_max_threads(); + size_t lastblock = numblocks - 1; + size_t blocksize = count / numblocks; + blocksize = blocksize - (blocksize % 64); + + #pragma omp parallel for + for(size_t i=0; i(pin + k)); + __m256i raw_samples2 = _mm256_loadu_si256(reinterpret_cast<__m256i*>(pin + k + 16)); + + //Extract the low and high halves (8 samples each) from the input blocks + __m128i block0_u16 = _mm256_extracti128_si256(raw_samples1, 0); + __m128i block1_u16 = _mm256_extracti128_si256(raw_samples1, 1); + __m128i block2_u16 = _mm256_extracti128_si256(raw_samples2, 0); + __m128i block3_u16 = _mm256_extracti128_si256(raw_samples2, 1); + + //Convert both blocks from unsigned 16-bit to signed 32-bit, giving us a pair of 8x int32 vectors + __m256i block0_i32 = _mm256_cvtepu16_epi32(block0_u16); + __m256i block1_i32 = _mm256_cvtepu16_epi32(block1_u16); + __m256i block2_i32 = _mm256_cvtepu16_epi32(block2_u16); + __m256i block3_i32 = _mm256_cvtepu16_epi32(block3_u16); + + //Convert the 32-bit int blocks to fp32 + //Sadly there's no direct epi16 to ps conversion instruction. + __m256 block0_float = _mm256_cvtepi32_ps(block0_i32); + __m256 block1_float = _mm256_cvtepi32_ps(block1_i32); + __m256 block2_float = _mm256_cvtepi32_ps(block2_i32); + __m256 block3_float = _mm256_cvtepi32_ps(block3_i32); + + //Woo! We've finally got floating point data. Now we can do the fun part. + block0_float = _mm256_mul_ps(block0_float, gains); + block1_float = _mm256_mul_ps(block1_float, gains); + block2_float = _mm256_mul_ps(block2_float, gains); + block3_float = _mm256_mul_ps(block3_float, gains); + + block0_float = _mm256_sub_ps(block0_float, offsets); + block1_float = _mm256_sub_ps(block1_float, offsets); + block2_float = _mm256_sub_ps(block2_float, offsets); + block3_float = _mm256_sub_ps(block3_float, offsets); + + //All done, store back to the output buffer + _mm256_store_ps(pout + k, block0_float); + _mm256_store_ps(pout + k + 8, block1_float); + _mm256_store_ps(pout + k + 16, block2_float); + _mm256_store_ps(pout + k + 24, block3_float); + } + + //Get any extras we didn't get in the SIMD loop + for(size_t k=end; k(pin + k)); + __m256i raw_samples2 = _mm256_loadu_si256(reinterpret_cast<__m256i*>(pin + k + 16)); + __m256i raw_samples3 = _mm256_loadu_si256(reinterpret_cast<__m256i*>(pin + k + 32)); + __m256i raw_samples4 = _mm256_loadu_si256(reinterpret_cast<__m256i*>(pin + k + 48)); + + //Extract the low and high halves (8 samples each) from the input blocks + __m128i block0_u16 = _mm256_extracti128_si256(raw_samples1, 0); + __m128i block1_u16 = _mm256_extracti128_si256(raw_samples1, 1); + __m128i block2_u16 = _mm256_extracti128_si256(raw_samples2, 0); + __m128i block3_u16 = _mm256_extracti128_si256(raw_samples2, 1); + __m128i block4_u16 = _mm256_extracti128_si256(raw_samples3, 0); + __m128i block5_u16 = _mm256_extracti128_si256(raw_samples3, 1); + __m128i block6_u16 = _mm256_extracti128_si256(raw_samples4, 0); + __m128i block7_u16 = _mm256_extracti128_si256(raw_samples4, 1); + + //Convert the blocks from unsigned 16-bit to signed 32-bit, giving us a pair of 8x int32 vectors + __m256i block0_i32 = _mm256_cvtepu16_epi32(block0_u16); + __m256i block1_i32 = _mm256_cvtepu16_epi32(block1_u16); + __m256i block2_i32 = _mm256_cvtepu16_epi32(block2_u16); + __m256i block3_i32 = _mm256_cvtepu16_epi32(block3_u16); + __m256i block4_i32 = _mm256_cvtepu16_epi32(block4_u16); + __m256i block5_i32 = _mm256_cvtepu16_epi32(block5_u16); + __m256i block6_i32 = _mm256_cvtepu16_epi32(block6_u16); + __m256i block7_i32 = _mm256_cvtepu16_epi32(block7_u16); + + //Convert the 32-bit int blocks to fp32 + //Sadly there's no direct epi16 to ps conversion instruction. + __m256 block0_float = _mm256_cvtepi32_ps(block0_i32); + __m256 block1_float = _mm256_cvtepi32_ps(block1_i32); + __m256 block2_float = _mm256_cvtepi32_ps(block2_i32); + __m256 block3_float = _mm256_cvtepi32_ps(block3_i32); + __m256 block4_float = _mm256_cvtepi32_ps(block4_i32); + __m256 block5_float = _mm256_cvtepi32_ps(block5_i32); + __m256 block6_float = _mm256_cvtepi32_ps(block6_i32); + __m256 block7_float = _mm256_cvtepi32_ps(block7_i32); + + //Woo! We've finally got floating point data. Now we can do the fun part. + block0_float = _mm256_fmsub_ps(block0_float, gains, offsets); + block1_float = _mm256_fmsub_ps(block1_float, gains, offsets); + block2_float = _mm256_fmsub_ps(block2_float, gains, offsets); + block3_float = _mm256_fmsub_ps(block3_float, gains, offsets); + block4_float = _mm256_fmsub_ps(block4_float, gains, offsets); + block5_float = _mm256_fmsub_ps(block5_float, gains, offsets); + block6_float = _mm256_fmsub_ps(block6_float, gains, offsets); + block7_float = _mm256_fmsub_ps(block7_float, gains, offsets); + + //All done, store back to the output buffer + _mm256_store_ps(pout + k, block0_float); + _mm256_store_ps(pout + k + 8, block1_float); + _mm256_store_ps(pout + k + 16, block2_float); + _mm256_store_ps(pout + k + 24, block3_float); + + _mm256_store_ps(pout + k + 32, block4_float); + _mm256_store_ps(pout + k + 40, block5_float); + _mm256_store_ps(pout + k + 48, block6_float); + _mm256_store_ps(pout + k + 56, block7_float); + } + + //Get any extras we didn't get in the SIMD loop + for(size_t k=end; km_triggerPhase = 0; wfm->PrepareForCpuAccess(); SetData(wfm, m_streams.size() - 1); + wfm->Resize(wh.wave_length); LogDebug("Waveform[%d]: %s\n", wave_idx, name.c_str()); double v_gain = wh.ch_v_gain[i].value * wh.ch_probe[i] / wh.ch_codes_per_div[i]; @@ -171,23 +172,23 @@ void SiglentBINImportFilter::OnFileNameChanged() if(data_width == 2) { - for(size_t j = 0; j < wh.wave_length; j++) - { - const uint16_t* sample = reinterpret_cast(f.c_str() + fpos); - float value = ((static_cast(*sample) - center_code)) * v_gain - wh.ch_v_offset[i].value; - wfm->m_samples.push_back(value); - fpos += 2; - } + Oscilloscope::ConvertUnsigned16BitSamples( + wfm->m_samples.GetCpuPointer(), + (uint16_t*)(f.c_str() + fpos), + v_gain, + v_gain * center_code + wh.ch_v_offset[i].value, + wh.wave_length); + fpos += 2 * wh.wave_length; } else { - for(size_t j = 0; j < wh.wave_length; j++) - { - const uint8_t* sample = reinterpret_cast(f.c_str() + fpos); - float value = (static_cast(*sample) - center_code) * v_gain - wh.ch_v_offset[i].value; - wfm->m_samples.push_back(value); - fpos += 1; - } + Oscilloscope::ConvertUnsigned8BitSamples( + wfm->m_samples.GetCpuPointer(), + (uint8_t*)(f.c_str() + fpos), + v_gain, + v_gain * center_code + wh.ch_v_offset[i].value, + wh.wave_length); + fpos += wh.wave_length; } wfm->MarkModifiedFromCpu(); @@ -209,6 +210,7 @@ void SiglentBINImportFilter::OnFileNameChanged() wfm->m_triggerPhase = 0; wfm->PrepareForCpuAccess(); SetData(wfm, m_streams.size() - 1); + wfm->Resize(wh.math_wave_length[i]); LogDebug("Waveform[%d]: %s\n", wave_idx, name.c_str()); double v_gain = wh.math_v_gain[i].value / wh.math_codes_per_div; @@ -217,23 +219,23 @@ void SiglentBINImportFilter::OnFileNameChanged() if(data_width == 2) { - for(size_t j = 0; j < wh.math_wave_length[i]; j++) - { - const uint16_t* sample = reinterpret_cast(f.c_str() + fpos); - float value = ((static_cast(*sample) - center_code)) * v_gain - wh.math_v_offset[i].value; - wfm->m_samples.push_back(value); - fpos += 2; - } + Oscilloscope::ConvertUnsigned16BitSamples( + wfm->m_samples.GetCpuPointer(), + (uint16_t*)(f.c_str() + fpos), + v_gain, + v_gain * center_code + wh.math_v_offset[i].value, + wh.math_wave_length[i]); + fpos += 2 * wh.math_wave_length[i]; } else { - for(size_t j = 0; j < wh.math_wave_length[i]; j++) - { - const uint8_t* sample = reinterpret_cast(f.c_str() + fpos); - float value = (static_cast(*sample) - center_code) * v_gain - wh.math_v_offset[i].value; - wfm->m_samples.push_back(value); - fpos += 1; - } + Oscilloscope::ConvertUnsigned8BitSamples( + wfm->m_samples.GetCpuPointer(), + (uint8_t*)(f.c_str() + fpos), + v_gain, + v_gain * center_code + wh.math_v_offset[i].value, + wh.math_wave_length[i]); + fpos += wh.math_wave_length[i]; } wfm->MarkModifiedFromCpu(); From 4bfaf83c696304d6a9166045060fe731df11e636 Mon Sep 17 00:00:00 2001 From: Hansem Ro Date: Tue, 22 Aug 2023 19:15:45 -0700 Subject: [PATCH 3/3] Add AVX512 implementation of ConvertUnsigned16BitSamples --- scopehal/Oscilloscope.cpp | 65 +++++++++++++++++++++++++++++++++++++-- scopehal/Oscilloscope.h | 1 + 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/scopehal/Oscilloscope.cpp b/scopehal/Oscilloscope.cpp index 4c94faf0..0268df0b 100644 --- a/scopehal/Oscilloscope.cpp +++ b/scopehal/Oscilloscope.cpp @@ -1302,7 +1302,16 @@ void Oscilloscope::ConvertUnsigned16BitSamples(float* pout, uint16_t* pin, float size_t off = i*blocksize; #ifdef __x86_64__ - if(g_hasAvx2) + if(g_hasAvx512F) + { + ConvertUnsigned16BitSamplesAVX512F( + pout + off, + pin + off, + gain, + offset, + nsamp); + } + else if(g_hasAvx2) { if(g_hasFMA) { @@ -1384,7 +1393,7 @@ void Oscilloscope::ConvertUnsigned16BitSamplesAVX2(float* pout, uint16_t* pin, f __m128i block2_u16 = _mm256_extracti128_si256(raw_samples2, 0); __m128i block3_u16 = _mm256_extracti128_si256(raw_samples2, 1); - //Convert both blocks from unsigned 16-bit to signed 32-bit, giving us a pair of 8x int32 vectors + //Convert the blocks from unsigned 16-bit to signed 32-bit, giving us a pair of 8x int32 vectors __m256i block0_i32 = _mm256_cvtepu16_epi32(block0_u16); __m256i block1_i32 = _mm256_cvtepu16_epi32(block1_u16); __m256i block2_i32 = _mm256_cvtepu16_epi32(block2_u16); @@ -1494,4 +1503,56 @@ void Oscilloscope::ConvertUnsigned16BitSamplesFMA(float* pout, uint16_t* pin, fl for(size_t k=end; k(pin + k)); + __m512i raw_samples2 = _mm512_loadu_si512(reinterpret_cast<__m512i*>(pin + k + 32)); + + //Extract the high and low halves (16 samples each) from the input blocks + __m256i block0_u16 = _mm512_extracti64x4_epi64(raw_samples1, 0); + __m256i block1_u16 = _mm512_extracti64x4_epi64(raw_samples1, 1); + __m256i block2_u16 = _mm512_extracti64x4_epi64(raw_samples2, 0); + __m256i block3_u16 = _mm512_extracti64x4_epi64(raw_samples2, 1); + + //Convert the blocks from unsigned 16-bit to signed 32-bit, giving us a pair of 16x int32 vectors + __m512i block0_i32 = _mm512_cvtepu16_epi32(block0_u16); + __m512i block1_i32 = _mm512_cvtepu16_epi32(block1_u16); + __m512i block2_i32 = _mm512_cvtepu16_epi32(block2_u16); + __m512i block3_i32 = _mm512_cvtepu16_epi32(block3_u16); + + //Convert the 32-bit int blocks to fp32 + //Sadly there's no direct epi16 to ps conversion instruction. + __m512 block0_float = _mm512_cvtepi32_ps(block0_i32); + __m512 block1_float = _mm512_cvtepi32_ps(block1_i32); + __m512 block2_float = _mm512_cvtepi32_ps(block2_i32); + __m512 block3_float = _mm512_cvtepi32_ps(block3_i32); + + //Woo! We've finally got floating point data. Now we can do the fun part. + block0_float = _mm512_fmsub_ps(block0_float, gains, offsets); + block1_float = _mm512_fmsub_ps(block1_float, gains, offsets); + block2_float = _mm512_fmsub_ps(block2_float, gains, offsets); + block3_float = _mm512_fmsub_ps(block3_float, gains, offsets); + + //All done, store back to the output buffer + _mm512_store_ps(pout + k, block0_float); + _mm512_store_ps(pout + k + 16, block1_float); + _mm512_store_ps(pout + k + 32, block2_float); + _mm512_store_ps(pout + k + 48, block3_float); + } + + //Get any extras we didn't get in the SIMD loop + for(size_t k=end; k