From 2fc8e05f79861d224ec0c3b79ed28f127a823f1b Mon Sep 17 00:00:00 2001 From: ChristianFeldmann Date: Fri, 6 Dec 2024 22:51:23 +0100 Subject: [PATCH] Remove old unused SSE code --- YUViewLib/src/common/Typedef.h | 84 ------- YUViewLib/src/decoder/decoderDav1d.cpp | 10 +- YUViewLib/src/decoder/decoderDav1d.h | 21 +- YUViewLib/src/decoder/decoderHM.cpp | 4 - YUViewLib/src/decoder/decoderHM.h | 5 - YUViewLib/src/decoder/decoderLibde265.cpp | 10 +- YUViewLib/src/decoder/decoderLibde265.h | 11 +- YUViewLib/src/decoder/decoderVTM.cpp | 4 - YUViewLib/src/decoder/decoderVTM.h | 15 +- YUViewLib/src/filesource/FileSource.cpp | 15 -- YUViewLib/src/filesource/FileSource.h | 3 - YUViewLib/src/video/yuv/videoHandlerYUV.cpp | 252 -------------------- YUViewLib/src/video/yuv/videoHandlerYUV.h | 12 - 13 files changed, 22 insertions(+), 424 deletions(-) diff --git a/YUViewLib/src/common/Typedef.h b/YUViewLib/src/common/Typedef.h index 211b8ab78..a1fd7e3ba 100644 --- a/YUViewLib/src/common/Typedef.h +++ b/YUViewLib/src/common/Typedef.h @@ -84,90 +84,6 @@ const bool is_Q_OS_LINUX = false; // However, it is not yet clear what to do if the user wants/needs a second instance. #define WIN_LINUX_SINGLE_INSTANCE 0 -// Activate SSE YUV conversion -// Do not activate. This is not supported right now. -#define SSE_CONVERSION 0 -#if SSE_CONVERSION - -#define HAVE_SSE4_1 1 -#define SSE_CONVERSION_420_ALT 1 // Alternate method for SSE Conversion, Testing only - -#ifdef HAVE_MALLOC_H -#include -#endif - -#ifdef HAVE_SSE4_1 -#define MEMORY_PADDING 8 -#else -#define MEMORY_PADDING 0 -#endif - -#define STANDARD_ALIGNMENT 16 - -#ifdef HAVE___MINGW_ALIGNED_MALLOC -#define ALLOC_ALIGNED(alignment, size) __mingw_aligned_malloc((size), (alignment)) -#define FREE_ALIGNED(mem) __mingw_aligned_free((mem)) -#elif _WIN32 -#define ALLOC_ALIGNED(alignment, size) _aligned_malloc((size), (alignment)) -#define FREE_ALIGNED(mem) _aligned_free((mem)) -#elif defined(HAVE_POSIX_MEMALIGN) -static inline void *ALLOC_ALIGNED(size_t alignment, size_t size) -{ - void *mem = NULL; - if (posix_memalign(&mem, alignment, size) != 0) - { - return NULL; - } - return mem; -}; -#define FREE_ALIGNED(mem) free((mem)) -#else -#define ALLOC_ALIGNED(alignment, size) memalign((alignment), (size)) -#define FREE_ALIGNED(mem) free((mem)) -#endif - -#define ALLOC_ALIGNED_16(size) ALLOC_ALIGNED(16, size) - -// A small class comparable to QByteArray but aligned to 16 byte addresses -class byteArrayAligned -{ -public: - byteArrayAligned() : _data(NULL), _size(-1) {} - ~byteArrayAligned() - { - if (_size != -1) - { - assert(_data != NULL); - FREE_ALIGNED(_data); - } - } - int size() { return _size; } - int capacity() { return _size; } - char *data() { return _data; } - bool isEmpty() { return _size <= 0 ? true : false; } - void resize(int size) - { - if (_size != -1) - { - // The array has been allocated before. Free it. - assert(_data != NULL); - FREE_ALIGNED(_data); - _data = NULL; - _size = -1; - } - // Allocate a new array of sufficient size - assert(_size == -1); - assert(_data == NULL); - _data = (char *)ALLOC_ALIGNED_16(size + MEMORY_PADDING); - _size = size; - } - -private: - char *_data; - int _size; -}; -#endif // SSE_CONVERSION - // The default frame rate that will be used when we could not guess it. #define DEFAULT_FRAMERATE 24.0 diff --git a/YUViewLib/src/decoder/decoderDav1d.cpp b/YUViewLib/src/decoder/decoderDav1d.cpp index 8f7915671..7792a9625 100644 --- a/YUViewLib/src/decoder/decoderDav1d.cpp +++ b/YUViewLib/src/decoder/decoderDav1d.cpp @@ -449,7 +449,7 @@ bool decoderDav1d::pushData(QByteArray &data) // Since dav1d consumes the data (takes ownership), we need to copy it to a new buffer from // dav1d Dav1dData *dav1dData = new Dav1dData; - uint8_t * rawDataPointer = this->lib.dav1d_data_create(dav1dData, data.size()); + uint8_t *rawDataPointer = this->lib.dav1d_data_create(dav1dData, data.size()); memcpy(rawDataPointer, data.data(), data.size()); int err = this->lib.dav1d_send_data(decoder, dav1dData); @@ -476,11 +476,7 @@ bool decoderDav1d::pushData(QByteArray &data) return true; } -#if SSE_CONVERSION -void decoderDav1d::copyImgToByteArray(const Dav1dPictureWrapper &src, byteArrayAligned &dst) -#else void decoderDav1d::copyImgToByteArray(const Dav1dPictureWrapper &src, QByteArray &dst) -#endif { // How many image planes are there? int nrPlanes = (src.getSubsampling() == Subsampling::YUV_400) ? 1 : 3; @@ -789,7 +785,7 @@ void decoderDav1d::cacheStatistics(const Dav1dPictureWrapper &img) DEBUG_DAV1D("decoderDav1d::cacheStatistics"); - Av1Block * blockData = img.getBlockData(); + Av1Block *blockData = img.getBlockData(); Dav1dFrameHeader *frameHeader = img.getFrameHeader(); if (frameHeader == nullptr) return; @@ -904,7 +900,7 @@ void decoderDav1d::parseBlockRecursive( } } -void decoderDav1d::parseBlockPartition(Av1Block * blockData, +void decoderDav1d::parseBlockPartition(Av1Block *blockData, unsigned x, unsigned y, unsigned blockWidth4, diff --git a/YUViewLib/src/decoder/decoderDav1d.h b/YUViewLib/src/decoder/decoderDav1d.h index 44333e3f2..6fc4c06a7 100644 --- a/YUViewLib/src/decoder/decoderDav1d.h +++ b/YUViewLib/src/decoder/decoderDav1d.h @@ -70,12 +70,12 @@ class Dav1dPictureWrapper void clear() { memset(&curPicture, 0, sizeof(Dav1dPicture)); } Size getFrameSize() const; - Dav1dPicture * getPicture() const { return (Dav1dPicture *)(&curPicture); } + Dav1dPicture *getPicture() const { return (Dav1dPicture *)(&curPicture); } video::yuv::Subsampling getSubsampling() const; int getBitDepth() const { return curPicture.p.bpc; } - uint8_t * getData(int component) const { return (uint8_t *)curPicture.data[component]; } + uint8_t *getData(int component) const { return (uint8_t *)curPicture.data[component]; } ptrdiff_t getStride(int component) const { return curPicture.stride[component]; } - uint8_t * getDataPrediction(int component) const + uint8_t *getDataPrediction(int component) const { return internalsSupported ? (uint8_t *)curPicture.pred[component] : nullptr; } @@ -89,7 +89,7 @@ class Dav1dPictureWrapper } Dav1dSequenceHeader *getSequenceHeader() const { return curPicture.seq_hdr; } - Dav1dFrameHeader * getFrameHeader() const { return curPicture.frame_hdr; } + Dav1dFrameHeader *getFrameHeader() const { return curPicture.frame_hdr; } private: Dav1dPicture curPicture; @@ -135,7 +135,7 @@ class decoderDav1d : public decoderBaseSingleLib private: // A private constructor that creates an uninitialized decoder library. // Used by checkLibraryFile to check if a file can be used as a hevcDecoderLibde265. - decoderDav1d() : decoderBaseSingleLib(){}; + decoderDav1d() : decoderBaseSingleLib() {}; // Try to resolve all the required function pointers from the library void resolveLibraryFunctionPointers() override; @@ -150,7 +150,7 @@ class decoderDav1d : public decoderBaseSingleLib void allocateNewDecoder(); - Dav1dContext * decoder{}; + Dav1dContext *decoder{}; Dav1dSettings settings; Dav1dAnalyzerFlags analyzerSettings; @@ -170,22 +170,17 @@ class decoderDav1d : public decoderBaseSingleLib // We buffer the current image as a QByteArray so you can call getYUVFrameData as often as // necessary without invoking the copy operation from the libde265 buffer to the QByteArray again. -#if SSE_CONVERSION - byteArrayAligned currentOutputBuffer; - void copyImgToByteArray(const Dav1dPictureWrapper &src, byteArrayAligned &dst); -#else QByteArray currentOutputBuffer; void copyImgToByteArray( const Dav1dPictureWrapper &src, - QByteArray & dst); // Copy the raw data from the Dav1dPicture source *src to the byte array -#endif + QByteArray &dst); // Copy the raw data from the Dav1dPicture source *src to the byte array // Statistics void fillStatisticList(stats::StatisticsData &) const override; void cacheStatistics(const Dav1dPictureWrapper &img); void parseBlockRecursive( Av1Block *blockData, unsigned x, unsigned y, BlockLevel level, dav1dFrameInfo &frameInfo); - void parseBlockPartition(Av1Block * blockData, + void parseBlockPartition(Av1Block *blockData, unsigned x, unsigned y, unsigned blockWidth4, diff --git a/YUViewLib/src/decoder/decoderHM.cpp b/YUViewLib/src/decoder/decoderHM.cpp index 7b4c29535..4f9444e7f 100644 --- a/YUViewLib/src/decoder/decoderHM.cpp +++ b/YUViewLib/src/decoder/decoderHM.cpp @@ -372,11 +372,7 @@ QByteArray decoderHM::getRawFrameData() return currentOutputBuffer; } -#if SSE_CONVERSION -void decoderHM::copyImgToByteArray(libHMDec_picture *src, byteArrayAligned &dst) -#else void decoderHM::copyImgToByteArray(libHMDec_picture *src, QByteArray &dst) -#endif { // How many image planes are there? auto fmt = this->lib.libHMDEC_get_chroma_format(src); diff --git a/YUViewLib/src/decoder/decoderHM.h b/YUViewLib/src/decoder/decoderHM.h index 88022d7ac..54d71e124 100644 --- a/YUViewLib/src/decoder/decoderHM.h +++ b/YUViewLib/src/decoder/decoderHM.h @@ -143,15 +143,10 @@ class decoderHM : public decoderBaseSingleLib // We buffer the current image as a QByteArray so you can call getYUVFrameData as often as // necessary without invoking the copy operation from the hm image buffer to the QByteArray again. -#if SSE_CONVERSION - byteArrayAligned currentOutputBuffer; - void copyImgToByteArray(libHMDec_picture *src, byteArrayAligned &dst); -#else QByteArray currentOutputBuffer; void copyImgToByteArray( libHMDec_picture *src, QByteArray &dst); // Copy the raw data from the de265_image source *src to the byte array -#endif LibraryFunctionsHM lib; }; diff --git a/YUViewLib/src/decoder/decoderLibde265.cpp b/YUViewLib/src/decoder/decoderLibde265.cpp index 9287f7f25..e87a82ef1 100644 --- a/YUViewLib/src/decoder/decoderLibde265.cpp +++ b/YUViewLib/src/decoder/decoderLibde265.cpp @@ -464,11 +464,7 @@ bool decoderLibde265::pushData(QByteArray &data) return true; } -#if SSE_CONVERSION -void decoderLibde265::copyImgToByteArray(const de265_image *src, byteArrayAligned &dst) -#else void decoderLibde265::copyImgToByteArray(const de265_image *src, QByteArray &dst) -#endif { // How many image planes are there? auto cMode = this->lib.de265_get_chroma_format(src); @@ -967,9 +963,9 @@ void decoderLibde265::fillStatisticList(stats::StatisticsData &statisticsData) c stats::StatisticsType intraDirC( 10, "Intra Dir Chroma", ColorMapper({0, 34}, PredefinedType::Jet)); - intraDirC.description = "The intra mode for the chroma component per TU (intra prediction is " - "performed on a TU level)"; - intraDirC.hasVectorData = true; + intraDirC.description = "The intra mode for the chroma component per TU (intra prediction is " + "performed on a TU level)"; + intraDirC.hasVectorData = true; intraDirC.renderVectorData = true; intraDirC.renderVectorDataValues = false; intraDirC.vectorScale = 32; diff --git a/YUViewLib/src/decoder/decoderLibde265.h b/YUViewLib/src/decoder/decoderLibde265.h index fe0ae28c5..4b09c21b9 100644 --- a/YUViewLib/src/decoder/decoderLibde265.h +++ b/YUViewLib/src/decoder/decoderLibde265.h @@ -81,7 +81,7 @@ struct LibraryFunctionsDe265 const uint8_t *(*de265_internals_get_image_plane)(const struct de265_image *img, de265_internals_param signal, int channel, - int * out_stride){}; + int *out_stride){}; void (*de265_internals_set_parameter_bool)(de265_decoder_context *, enum de265_internals_param param, int value){}; @@ -124,7 +124,7 @@ class decoderLibde265 : public decoderBaseSingleLib private: // A private constructor that creates an uninitialized decoder library. // Used by checkLibraryFile to check if a file can be used as a hevcDecoderLibde265. - decoderLibde265() : decoderBaseSingleLib(){}; + decoderLibde265() : decoderBaseSingleLib() {}; // Try to resolve all the required function pointers from the library void resolveLibraryFunctionPointers() override; @@ -175,15 +175,10 @@ class decoderLibde265 : public decoderBaseSingleLib // We buffer the current image as a QByteArray so you can call getYUVFrameData as often as // necessary without invoking the copy operation from the libde265 buffer to the QByteArray again. -#if SSE_CONVERSION - byteArrayAligned currentOutputBuffer; - void copyImgToByteArray(const de265_image *src, byteArrayAligned &dst); -#else QByteArray currentOutputBuffer; void copyImgToByteArray( const de265_image *src, - QByteArray & dst); // Copy the raw data from the de265_image source *src to the byte array -#endif + QByteArray &dst); // Copy the raw data from the de265_image source *src to the byte array LibraryFunctionsDe265 lib; }; diff --git a/YUViewLib/src/decoder/decoderVTM.cpp b/YUViewLib/src/decoder/decoderVTM.cpp index 08f39a7de..8d4eb4285 100644 --- a/YUViewLib/src/decoder/decoderVTM.cpp +++ b/YUViewLib/src/decoder/decoderVTM.cpp @@ -366,11 +366,7 @@ QByteArray decoderVTM::getRawFrameData() return currentOutputBuffer; } -#if SSE_CONVERSION -void decoderVTM::copyImgToByteArray(libVTMDec_picture *src, byteArrayAligned &dst) -#else void decoderVTM::copyImgToByteArray(libVTMDec_picture *src, QByteArray &dst) -#endif { // How many image planes are there? auto fmt = this->lib.libVTMDec_get_chroma_format(src); diff --git a/YUViewLib/src/decoder/decoderVTM.h b/YUViewLib/src/decoder/decoderVTM.h index 4eba0c522..68a5b729e 100644 --- a/YUViewLib/src/decoder/decoderVTM.h +++ b/YUViewLib/src/decoder/decoderVTM.h @@ -51,11 +51,11 @@ struct LibraryFunctionsVTM void (*libVTMDec_set_SEI_Check)(libVTMDec_context *, bool check_hash){}; void (*libVTMDec_set_max_temporal_layer)(libVTMDec_context *, int max_layer){}; libVTMDec_error (*libVTMDec_push_nal_unit)(libVTMDec_context *decCtx, - const void * data8, + const void *data8, int length, bool eof, - bool & bNewPicture, - bool & checkOutputPictures){}; + bool &bNewPicture, + bool &checkOutputPictures){}; // Get a picture and retrive information on the picture libVTMDec_picture *(*libVTMDec_get_picture)(libVTMDec_context *){}; @@ -93,7 +93,7 @@ class decoderVTM : public decoderBaseSingleLib private: // A private constructor that creates an uninitialized decoder library. // Used by checkLibraryFile to check if a file can be used as this type of decoder. - decoderVTM(){}; + decoderVTM() {}; // Return the possible names of the HM library QStringList getLibraryNames() const override; @@ -130,15 +130,10 @@ class decoderVTM : public decoderBaseSingleLib // We buffer the current image as a QByteArray so you can call getYUVFrameData as often as // necessary without invoking the copy operation from the hm image buffer to the QByteArray again. -#if SSE_CONVERSION - byteArrayAligned currentOutputBuffer; - void copyImgToByteArray(libVTMDec_picture *src, byteArrayAligned &dst); -#else QByteArray currentOutputBuffer; void copyImgToByteArray( libVTMDec_picture *src, - QByteArray & dst); // Copy the raw data from the de265_image source *src to the byte array -#endif + QByteArray &dst); // Copy the raw data from the de265_image source *src to the byte array LibraryFunctionsVTM lib; }; diff --git a/YUViewLib/src/filesource/FileSource.cpp b/YUViewLib/src/filesource/FileSource.cpp index ecc5df6ae..e5df613a9 100644 --- a/YUViewLib/src/filesource/FileSource.cpp +++ b/YUViewLib/src/filesource/FileSource.cpp @@ -77,21 +77,6 @@ bool FileSource::openFile(const std::filesystem::path &filePath) return true; } -#if SSE_CONVERSION -// Resize the target array if necessary and read the given number of bytes to the data array -void FileSource::readBytes(byteArrayAligned &targetBuffer, int64_t startPos, int64_t nrBytes) -{ - if (!isOk()) - return; - - if (targetBuffer.size() < nrBytes) - targetBuffer.resize(nrBytes); - - srcFile.seek(startPos); - srcFile.read(targetBuffer.data(), nrBytes); -} -#endif - // Resize the target array if necessary and read the given number of bytes to the data array int64_t FileSource::readBytes(QByteArray &targetBuffer, int64_t startPos, int64_t nrBytes) { diff --git a/YUViewLib/src/filesource/FileSource.h b/YUViewLib/src/filesource/FileSource.h index 0a3e77d99..2ee9be62f 100644 --- a/YUViewLib/src/filesource/FileSource.h +++ b/YUViewLib/src/filesource/FileSource.h @@ -93,9 +93,6 @@ class FileSource : public QObject // Read the given number of bytes starting at startPos into the QByteArray out // Resize the QByteArray if necessary. Return how many bytes were read. int64_t readBytes(QByteArray &targetBuffer, int64_t startPos, int64_t nrBytes); -#if SSE_CONVERSION - void readBytes(byteArrayAligned &data, int64_t startPos, int64_t nrBytes); -#endif void updateFileWatchSetting(); void clearFileCache(); diff --git a/YUViewLib/src/video/yuv/videoHandlerYUV.cpp b/YUViewLib/src/video/yuv/videoHandlerYUV.cpp index 825ae1602..70ed0a9c2 100644 --- a/YUViewLib/src/video/yuv/videoHandlerYUV.cpp +++ b/YUViewLib/src/video/yuv/videoHandlerYUV.cpp @@ -40,9 +40,6 @@ #include #include -#if SSE_CONVERSION_420_ALT -#include -#endif #include #include @@ -454,103 +451,6 @@ bool convertYUV420ToRGB(const QByteArray &sourceBuffer, Q_ASSERT(sourceBuffer.size() >= componentLenghtY + componentLengthUV + componentLengthUV); // YUV 420 must be (at least) 1.5*Y-area -#if SSE_CONVERSION_420_ALT - quint8 *srcYRaw = (quint8 *)sourceBuffer.data(); - quint8 *srcURaw = srcYRaw + componentLenghtY; - quint8 *srcVRaw = srcURaw + componentLengthUV; - - quint8 *dstBuffer = (quint8 *)targetBuffer.data(); - quint32 dstBufferStride = frameWidth * 4; - - yuv420_to_argb8888(srcYRaw, - srcURaw, - srcVRaw, - frameWidth, - frameWidth >> 1, - frameWidth, - frameHeight, - dstBuffer, - dstBufferStride); - return false; -#endif - -#if SSE_CONVERSION - // Try to use SSE. If this fails use conventional algorithm - - if (frameWidth % 32 == 0 && frameHeight % 2 == 0) - { - // We can use 16byte aligned read/write operations - - quint8 *srcY = (quint8 *)sourceBuffer.data(); - quint8 *srcU = srcY + componentLenghtY; - quint8 *srcV = srcU + componentLengthUV; - - __m128i yMult = _mm_set_epi16(75, 75, 75, 75, 75, 75, 75, 75); - __m128i ySub = _mm_set_epi16(16, 16, 16, 16, 16, 16, 16, 16); - __m128i ugMult = _mm_set_epi16(25, 25, 25, 25, 25, 25, 25, 25); - //__m128i sub16 = _mm_set_epi8(16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16); - __m128i sub128 = _mm_set_epi8( - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128); - - //__m128i test = _mm_set_epi8(128, 0, 1, 2, 3, 245, 254, 255, 128, 128, 128, 128, 128, 128, 128, - // 128); - - __m128i y, u, v, uMult, vMult; - __m128i RGBOut0, RGBOut1, RGBOut2; - __m128i tmp; - - for (int yh = 0; yh < frameHeight / 2; yh++) - { - for (int x = 0; x < frameWidth / 32; x += 32) - { - // Load 16 bytes U/V - u = _mm_load_si128((__m128i *)&srcU[x / 2]); - v = _mm_load_si128((__m128i *)&srcV[x / 2]); - // Subtract 128 from each U/V value (16 values) - u = _mm_sub_epi8(u, sub128); - v = _mm_sub_epi8(v, sub128); - - // Load 16 bytes Y from this line and the next one - y = _mm_load_si128((__m128i *)&srcY[x]); - - // Get the lower 8 (8bit signed) Y values and put them into a 16bit register - tmp = _mm_srai_epi16(_mm_unpacklo_epi8(y, y), 8); - // Subtract 16 and multiply by 75 - tmp = _mm_sub_epi16(tmp, ySub); - tmp = _mm_mullo_epi16(tmp, yMult); - - // Now to add them to the 16 bit RGB output values - RGBOut0 = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(1, 0, 1, 0)); - RGBOut0 = _mm_shufflelo_epi16(RGBOut0, _MM_SHUFFLE(1, 0, 0, 0)); - RGBOut0 = _mm_shufflehi_epi16(RGBOut0, _MM_SHUFFLE(2, 2, 1, 1)); - - RGBOut1 = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(2, 1, 2, 1)); - RGBOut1 = _mm_shufflelo_epi16(RGBOut1, _MM_SHUFFLE(1, 1, 1, 0)); - RGBOut1 = _mm_shufflehi_epi16(RGBOut1, _MM_SHUFFLE(3, 2, 2, 2)); - - RGBOut2 = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(3, 2, 3, 2)); - RGBOut2 = _mm_shufflelo_epi16(RGBOut2, _MM_SHUFFLE(2, 2, 1, 1)); - RGBOut2 = _mm_shufflehi_epi16(RGBOut2, _MM_SHUFFLE(3, 3, 3, 2)); - - // y2 = _mm_load_si128((__m128i *) &srcY[x + 16]); - - // --- Start with the left 8 values from U/V - - // Get the lower 8 (8bit signed) U/V values and put them into a 16bit register - uMult = _mm_srai_epi16(_mm_unpacklo_epi8(u, u), 8); - vMult = _mm_srai_epi16(_mm_unpacklo_epi8(v, v), 8); - - // Multiply - - /*y3 = _mm_load_si128((__m128i *) &srcY[x + frameWidth]); - y4 = _mm_load_si128((__m128i *) &srcY[x + frameWidth + 16]);*/ - } - } - - return true; - } -#endif - static unsigned char *clip_buf = clp_buf + 384; if (!clp_buf_initialized) initClippingTable(); @@ -2546,158 +2446,6 @@ void videoHandlerYUV::drawFrame(QPainter *painter, videoHandler::drawFrame(painter, frameIdx, zoomFactor, drawRawData); } -/// --- Convert from the current YUV input format to YUV 444 - -#if SSE_CONVERSION_420_ALT -void videoHandlerYUV::yuv420_to_argb8888(quint8 *yp, - quint8 *up, - quint8 *vp, - quint32 sy, - quint32 suv, - int width, - int height, - quint8 *rgb, - quint32 srgb) -{ - __m128i y0r0, y0r1, u0, v0; - __m128i y00r0, y01r0, y00r1, y01r1; - __m128i u00, u01, v00, v01; - __m128i rv00, rv01, gu00, gu01, gv00, gv01, bu00, bu01; - __m128i r00, r01, g00, g01, b00, b01; - __m128i rgb0123, rgb4567, rgb89ab, rgbcdef; - __m128i gbgb; - __m128i ysub, uvsub; - __m128i zero, facy, facrv, facgu, facgv, facbu; - __m128i *srcy128r0, *srcy128r1; - __m128i *dstrgb128r0, *dstrgb128r1; - __m64 *srcu64, *srcv64; - - // Implement the following conversion: - // B = 1.164(Y - 16) + 2.018(U - 128) - // G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) - // R = 1.164(Y - 16) + 1.596(V - 128) - - int x, y; - // constants - ysub = _mm_set1_epi32(0x00100010); // value 16 for subtraction - uvsub = _mm_set1_epi32(0x00800080); // value 128 - - // multiplication factors bit shifted by 6 - facy = _mm_set1_epi32(0x004a004a); - facrv = _mm_set1_epi32(0x00660066); - facgu = _mm_set1_epi32(0x00190019); - facgv = _mm_set1_epi32(0x00340034); - facbu = _mm_set1_epi32(0x00810081); - - zero = _mm_set1_epi32(0x00000000); - - for (y = 0; y < height; y += 2) - { - srcy128r0 = (__m128i *)(yp + sy * y); - srcy128r1 = (__m128i *)(yp + sy * y + sy); - srcu64 = (__m64 *)(up + suv * (y / 2)); - srcv64 = (__m64 *)(vp + suv * (y / 2)); - - // dst row 0 and row 1 - dstrgb128r0 = (__m128i *)(rgb + srgb * y); - dstrgb128r1 = (__m128i *)(rgb + srgb * y + srgb); - - for (x = 0; x < width; x += 16) - { - u0 = _mm_loadl_epi64((__m128i *)srcu64); - srcu64++; - v0 = _mm_loadl_epi64((__m128i *)srcv64); - srcv64++; - - y0r0 = _mm_load_si128(srcy128r0++); - y0r1 = _mm_load_si128(srcy128r1++); - - // expand to 16 bit, subtract and multiply constant y factors - y00r0 = _mm_mullo_epi16(_mm_sub_epi16(_mm_unpacklo_epi8(y0r0, zero), ysub), facy); - y01r0 = _mm_mullo_epi16(_mm_sub_epi16(_mm_unpackhi_epi8(y0r0, zero), ysub), facy); - y00r1 = _mm_mullo_epi16(_mm_sub_epi16(_mm_unpacklo_epi8(y0r1, zero), ysub), facy); - y01r1 = _mm_mullo_epi16(_mm_sub_epi16(_mm_unpackhi_epi8(y0r1, zero), ysub), facy); - - // expand u and v so they're aligned with y values - u0 = _mm_unpacklo_epi8(u0, zero); - u00 = _mm_sub_epi16(_mm_unpacklo_epi16(u0, u0), uvsub); - u01 = _mm_sub_epi16(_mm_unpackhi_epi16(u0, u0), uvsub); - - v0 = _mm_unpacklo_epi8(v0, zero); - v00 = _mm_sub_epi16(_mm_unpacklo_epi16(v0, v0), uvsub); - v01 = _mm_sub_epi16(_mm_unpackhi_epi16(v0, v0), uvsub); - - // common factors on both rows. - rv00 = _mm_mullo_epi16(facrv, v00); - rv01 = _mm_mullo_epi16(facrv, v01); - gu00 = _mm_mullo_epi16(facgu, u00); - gu01 = _mm_mullo_epi16(facgu, u01); - gv00 = _mm_mullo_epi16(facgv, v00); - gv01 = _mm_mullo_epi16(facgv, v01); - bu00 = _mm_mullo_epi16(facbu, u00); - bu01 = _mm_mullo_epi16(facbu, u01); - - // add together and bit shift to the right - r00 = _mm_srai_epi16(_mm_add_epi16(y00r0, rv00), 6); - r01 = _mm_srai_epi16(_mm_add_epi16(y01r0, rv01), 6); - g00 = _mm_srai_epi16(_mm_sub_epi16(_mm_sub_epi16(y00r0, gu00), gv00), 6); - g01 = _mm_srai_epi16(_mm_sub_epi16(_mm_sub_epi16(y01r0, gu01), gv01), 6); - b00 = _mm_srai_epi16(_mm_add_epi16(y00r0, bu00), 6); - b01 = _mm_srai_epi16(_mm_add_epi16(y01r0, bu01), 6); - - r00 = _mm_packus_epi16(r00, r01); - g00 = _mm_packus_epi16(g00, g01); - b00 = _mm_packus_epi16(b00, b01); - - // shuffle back together to lower 0rgb0rgb... - r01 = _mm_unpacklo_epi8(r00, zero); // 0r0r... - gbgb = _mm_unpacklo_epi8(b00, g00); // gbgb... - rgb0123 = _mm_unpacklo_epi16(gbgb, r01); // lower 0rgb0rgb... - rgb4567 = _mm_unpackhi_epi16(gbgb, r01); // upper 0rgb0rgb... - - // shuffle back together to upper 0rgb0rgb... - r01 = _mm_unpackhi_epi8(r00, zero); - gbgb = _mm_unpackhi_epi8(b00, g00); - rgb89ab = _mm_unpacklo_epi16(gbgb, r01); - rgbcdef = _mm_unpackhi_epi16(gbgb, r01); - - // write to dst - _mm_store_si128(dstrgb128r0++, rgb0123); - _mm_store_si128(dstrgb128r0++, rgb4567); - _mm_store_si128(dstrgb128r0++, rgb89ab); - _mm_store_si128(dstrgb128r0++, rgbcdef); - - // row 1 - r00 = _mm_srai_epi16(_mm_add_epi16(y00r1, rv00), 6); - r01 = _mm_srai_epi16(_mm_add_epi16(y01r1, rv01), 6); - g00 = _mm_srai_epi16(_mm_sub_epi16(_mm_sub_epi16(y00r1, gu00), gv00), 6); - g01 = _mm_srai_epi16(_mm_sub_epi16(_mm_sub_epi16(y01r1, gu01), gv01), 6); - b00 = _mm_srai_epi16(_mm_add_epi16(y00r1, bu00), 6); - b01 = _mm_srai_epi16(_mm_add_epi16(y01r1, bu01), 6); - - r00 = _mm_packus_epi16(r00, r01); - g00 = _mm_packus_epi16(g00, g01); - b00 = _mm_packus_epi16(b00, b01); - - r01 = _mm_unpacklo_epi8(r00, zero); - gbgb = _mm_unpacklo_epi8(b00, g00); - rgb0123 = _mm_unpacklo_epi16(gbgb, r01); - rgb4567 = _mm_unpackhi_epi16(gbgb, r01); - - r01 = _mm_unpackhi_epi8(r00, zero); - gbgb = _mm_unpackhi_epi8(b00, g00); - rgb89ab = _mm_unpacklo_epi16(gbgb, r01); - rgbcdef = _mm_unpackhi_epi16(gbgb, r01); - - _mm_store_si128(dstrgb128r1++, rgb0123); - _mm_store_si128(dstrgb128r1++, rgb4567); - _mm_store_si128(dstrgb128r1++, rgb89ab); - _mm_store_si128(dstrgb128r1++, rgbcdef); - } - } -} -#endif - QLayout *videoHandlerYUV::createVideoHandlerControls(bool isSizeAndFormatFixed) { // Absolutely always only call this function once! diff --git a/YUViewLib/src/video/yuv/videoHandlerYUV.h b/YUViewLib/src/video/yuv/videoHandlerYUV.h index 0e79d9ce9..aade4af02 100644 --- a/YUViewLib/src/video/yuv/videoHandlerYUV.h +++ b/YUViewLib/src/video/yuv/videoHandlerYUV.h @@ -224,18 +224,6 @@ class videoHandlerYUV : public videoHandler const Size frameSize, const PixelFormatYUV &sourceBufferFormat) const; -#if SSE_CONVERSION_420_ALT - void yuv420_to_argb8888(quint8 *yp, - quint8 *up, - quint8 *vp, - quint32 sy, - quint32 suv, - int width, - int height, - quint8 *rgb, - quint32 srgb); -#endif - SafeUi ui; bool diffReady{};