From 2fc8e05f79861d224ec0c3b79ed28f127a823f1b Mon Sep 17 00:00:00 2001
From: ChristianFeldmann <christian.feldmann@gmx.de>
Date: Fri, 6 Dec 2024 22:51:23 +0100
Subject: [PATCH] Remove old unused SSE code

---
 YUViewLib/src/common/Typedef.h              |  84 -------
 YUViewLib/src/decoder/decoderDav1d.cpp      |  10 +-
 YUViewLib/src/decoder/decoderDav1d.h        |  21 +-
 YUViewLib/src/decoder/decoderHM.cpp         |   4 -
 YUViewLib/src/decoder/decoderHM.h           |   5 -
 YUViewLib/src/decoder/decoderLibde265.cpp   |  10 +-
 YUViewLib/src/decoder/decoderLibde265.h     |  11 +-
 YUViewLib/src/decoder/decoderVTM.cpp        |   4 -
 YUViewLib/src/decoder/decoderVTM.h          |  15 +-
 YUViewLib/src/filesource/FileSource.cpp     |  15 --
 YUViewLib/src/filesource/FileSource.h       |   3 -
 YUViewLib/src/video/yuv/videoHandlerYUV.cpp | 252 --------------------
 YUViewLib/src/video/yuv/videoHandlerYUV.h   |  12 -
 13 files changed, 22 insertions(+), 424 deletions(-)

diff --git a/YUViewLib/src/common/Typedef.h b/YUViewLib/src/common/Typedef.h
index 211b8ab78..a1fd7e3ba 100644
--- a/YUViewLib/src/common/Typedef.h
+++ b/YUViewLib/src/common/Typedef.h
@@ -84,90 +84,6 @@ const bool is_Q_OS_LINUX = false;
 // However, it is not yet clear what to do if the user wants/needs a second instance.
 #define WIN_LINUX_SINGLE_INSTANCE 0
 
-// Activate SSE YUV conversion
-// Do not activate. This is not supported right now.
-#define SSE_CONVERSION 0
-#if SSE_CONVERSION
-
-#define HAVE_SSE4_1 1
-#define SSE_CONVERSION_420_ALT 1 // Alternate method for SSE Conversion, Testing only
-
-#ifdef HAVE_MALLOC_H
-#include <malloc.h>
-#endif
-
-#ifdef HAVE_SSE4_1
-#define MEMORY_PADDING 8
-#else
-#define MEMORY_PADDING 0
-#endif
-
-#define STANDARD_ALIGNMENT 16
-
-#ifdef HAVE___MINGW_ALIGNED_MALLOC
-#define ALLOC_ALIGNED(alignment, size) __mingw_aligned_malloc((size), (alignment))
-#define FREE_ALIGNED(mem) __mingw_aligned_free((mem))
-#elif _WIN32
-#define ALLOC_ALIGNED(alignment, size) _aligned_malloc((size), (alignment))
-#define FREE_ALIGNED(mem) _aligned_free((mem))
-#elif defined(HAVE_POSIX_MEMALIGN)
-static inline void *ALLOC_ALIGNED(size_t alignment, size_t size)
-{
-  void *mem = NULL;
-  if (posix_memalign(&mem, alignment, size) != 0)
-  {
-    return NULL;
-  }
-  return mem;
-};
-#define FREE_ALIGNED(mem) free((mem))
-#else
-#define ALLOC_ALIGNED(alignment, size) memalign((alignment), (size))
-#define FREE_ALIGNED(mem) free((mem))
-#endif
-
-#define ALLOC_ALIGNED_16(size) ALLOC_ALIGNED(16, size)
-
-// A small class comparable to QByteArray but aligned to 16 byte addresses
-class byteArrayAligned
-{
-public:
-  byteArrayAligned() : _data(NULL), _size(-1) {}
-  ~byteArrayAligned()
-  {
-    if (_size != -1)
-    {
-      assert(_data != NULL);
-      FREE_ALIGNED(_data);
-    }
-  }
-  int   size() { return _size; }
-  int   capacity() { return _size; }
-  char *data() { return _data; }
-  bool  isEmpty() { return _size <= 0 ? true : false; }
-  void  resize(int size)
-  {
-    if (_size != -1)
-    {
-      // The array has been allocated before. Free it.
-      assert(_data != NULL);
-      FREE_ALIGNED(_data);
-      _data = NULL;
-      _size = -1;
-    }
-    // Allocate a new array of sufficient size
-    assert(_size == -1);
-    assert(_data == NULL);
-    _data = (char *)ALLOC_ALIGNED_16(size + MEMORY_PADDING);
-    _size = size;
-  }
-
-private:
-  char *_data;
-  int   _size;
-};
-#endif // SSE_CONVERSION
-
 // The default frame rate that will be used when we could not guess it.
 #define DEFAULT_FRAMERATE 24.0
 
diff --git a/YUViewLib/src/decoder/decoderDav1d.cpp b/YUViewLib/src/decoder/decoderDav1d.cpp
index 8f7915671..7792a9625 100644
--- a/YUViewLib/src/decoder/decoderDav1d.cpp
+++ b/YUViewLib/src/decoder/decoderDav1d.cpp
@@ -449,7 +449,7 @@ bool decoderDav1d::pushData(QByteArray &data)
     // Since dav1d consumes the data (takes ownership), we need to copy it to a new buffer from
     // dav1d
     Dav1dData *dav1dData      = new Dav1dData;
-    uint8_t *  rawDataPointer = this->lib.dav1d_data_create(dav1dData, data.size());
+    uint8_t   *rawDataPointer = this->lib.dav1d_data_create(dav1dData, data.size());
     memcpy(rawDataPointer, data.data(), data.size());
 
     int err = this->lib.dav1d_send_data(decoder, dav1dData);
@@ -476,11 +476,7 @@ bool decoderDav1d::pushData(QByteArray &data)
   return true;
 }
 
-#if SSE_CONVERSION
-void decoderDav1d::copyImgToByteArray(const Dav1dPictureWrapper &src, byteArrayAligned &dst)
-#else
 void decoderDav1d::copyImgToByteArray(const Dav1dPictureWrapper &src, QByteArray &dst)
-#endif
 {
   // How many image planes are there?
   int nrPlanes = (src.getSubsampling() == Subsampling::YUV_400) ? 1 : 3;
@@ -789,7 +785,7 @@ void decoderDav1d::cacheStatistics(const Dav1dPictureWrapper &img)
 
   DEBUG_DAV1D("decoderDav1d::cacheStatistics");
 
-  Av1Block *        blockData   = img.getBlockData();
+  Av1Block         *blockData   = img.getBlockData();
   Dav1dFrameHeader *frameHeader = img.getFrameHeader();
   if (frameHeader == nullptr)
     return;
@@ -904,7 +900,7 @@ void decoderDav1d::parseBlockRecursive(
   }
 }
 
-void decoderDav1d::parseBlockPartition(Av1Block *      blockData,
+void decoderDav1d::parseBlockPartition(Av1Block       *blockData,
                                        unsigned        x,
                                        unsigned        y,
                                        unsigned        blockWidth4,
diff --git a/YUViewLib/src/decoder/decoderDav1d.h b/YUViewLib/src/decoder/decoderDav1d.h
index 44333e3f2..6fc4c06a7 100644
--- a/YUViewLib/src/decoder/decoderDav1d.h
+++ b/YUViewLib/src/decoder/decoderDav1d.h
@@ -70,12 +70,12 @@ class Dav1dPictureWrapper
 
   void                    clear() { memset(&curPicture, 0, sizeof(Dav1dPicture)); }
   Size                    getFrameSize() const;
-  Dav1dPicture *          getPicture() const { return (Dav1dPicture *)(&curPicture); }
+  Dav1dPicture           *getPicture() const { return (Dav1dPicture *)(&curPicture); }
   video::yuv::Subsampling getSubsampling() const;
   int                     getBitDepth() const { return curPicture.p.bpc; }
-  uint8_t * getData(int component) const { return (uint8_t *)curPicture.data[component]; }
+  uint8_t  *getData(int component) const { return (uint8_t *)curPicture.data[component]; }
   ptrdiff_t getStride(int component) const { return curPicture.stride[component]; }
-  uint8_t * getDataPrediction(int component) const
+  uint8_t  *getDataPrediction(int component) const
   {
     return internalsSupported ? (uint8_t *)curPicture.pred[component] : nullptr;
   }
@@ -89,7 +89,7 @@ class Dav1dPictureWrapper
   }
 
   Dav1dSequenceHeader *getSequenceHeader() const { return curPicture.seq_hdr; }
-  Dav1dFrameHeader *   getFrameHeader() const { return curPicture.frame_hdr; }
+  Dav1dFrameHeader    *getFrameHeader() const { return curPicture.frame_hdr; }
 
 private:
   Dav1dPicture curPicture;
@@ -135,7 +135,7 @@ class decoderDav1d : public decoderBaseSingleLib
 private:
   // A private constructor that creates an uninitialized decoder library.
   // Used by checkLibraryFile to check if a file can be used as a hevcDecoderLibde265.
-  decoderDav1d() : decoderBaseSingleLib(){};
+  decoderDav1d() : decoderBaseSingleLib() {};
 
   // Try to resolve all the required function pointers from the library
   void resolveLibraryFunctionPointers() override;
@@ -150,7 +150,7 @@ class decoderDav1d : public decoderBaseSingleLib
 
   void allocateNewDecoder();
 
-  Dav1dContext *     decoder{};
+  Dav1dContext      *decoder{};
   Dav1dSettings      settings;
   Dav1dAnalyzerFlags analyzerSettings;
 
@@ -170,22 +170,17 @@ class decoderDav1d : public decoderBaseSingleLib
 
   // We buffer the current image as a QByteArray so you can call getYUVFrameData as often as
   // necessary without invoking the copy operation from the libde265 buffer to the QByteArray again.
-#if SSE_CONVERSION
-  byteArrayAligned currentOutputBuffer;
-  void             copyImgToByteArray(const Dav1dPictureWrapper &src, byteArrayAligned &dst);
-#else
   QByteArray currentOutputBuffer;
   void       copyImgToByteArray(
             const Dav1dPictureWrapper &src,
-            QByteArray &               dst); // Copy the raw data from the Dav1dPicture source *src to the byte array
-#endif
+            QByteArray &dst); // Copy the raw data from the Dav1dPicture source *src to the byte array
 
   // Statistics
   void fillStatisticList(stats::StatisticsData &) const override;
   void cacheStatistics(const Dav1dPictureWrapper &img);
   void parseBlockRecursive(
       Av1Block *blockData, unsigned x, unsigned y, BlockLevel level, dav1dFrameInfo &frameInfo);
-  void         parseBlockPartition(Av1Block *      blockData,
+  void         parseBlockPartition(Av1Block       *blockData,
                                    unsigned        x,
                                    unsigned        y,
                                    unsigned        blockWidth4,
diff --git a/YUViewLib/src/decoder/decoderHM.cpp b/YUViewLib/src/decoder/decoderHM.cpp
index 7b4c29535..4f9444e7f 100644
--- a/YUViewLib/src/decoder/decoderHM.cpp
+++ b/YUViewLib/src/decoder/decoderHM.cpp
@@ -372,11 +372,7 @@ QByteArray decoderHM::getRawFrameData()
   return currentOutputBuffer;
 }
 
-#if SSE_CONVERSION
-void decoderHM::copyImgToByteArray(libHMDec_picture *src, byteArrayAligned &dst)
-#else
 void decoderHM::copyImgToByteArray(libHMDec_picture *src, QByteArray &dst)
-#endif
 {
   // How many image planes are there?
   auto fmt      = this->lib.libHMDEC_get_chroma_format(src);
diff --git a/YUViewLib/src/decoder/decoderHM.h b/YUViewLib/src/decoder/decoderHM.h
index 88022d7ac..54d71e124 100644
--- a/YUViewLib/src/decoder/decoderHM.h
+++ b/YUViewLib/src/decoder/decoderHM.h
@@ -143,15 +143,10 @@ class decoderHM : public decoderBaseSingleLib
 
   // We buffer the current image as a QByteArray so you can call getYUVFrameData as often as
   // necessary without invoking the copy operation from the hm image buffer to the QByteArray again.
-#if SSE_CONVERSION
-  byteArrayAligned currentOutputBuffer;
-  void             copyImgToByteArray(libHMDec_picture *src, byteArrayAligned &dst);
-#else
   QByteArray currentOutputBuffer;
   void       copyImgToByteArray(
             libHMDec_picture *src,
             QByteArray &dst); // Copy the raw data from the de265_image source *src to the byte array
-#endif
 
   LibraryFunctionsHM lib;
 };
diff --git a/YUViewLib/src/decoder/decoderLibde265.cpp b/YUViewLib/src/decoder/decoderLibde265.cpp
index 9287f7f25..e87a82ef1 100644
--- a/YUViewLib/src/decoder/decoderLibde265.cpp
+++ b/YUViewLib/src/decoder/decoderLibde265.cpp
@@ -464,11 +464,7 @@ bool decoderLibde265::pushData(QByteArray &data)
   return true;
 }
 
-#if SSE_CONVERSION
-void decoderLibde265::copyImgToByteArray(const de265_image *src, byteArrayAligned &dst)
-#else
 void decoderLibde265::copyImgToByteArray(const de265_image *src, QByteArray &dst)
-#endif
 {
   // How many image planes are there?
   auto cMode    = this->lib.de265_get_chroma_format(src);
@@ -967,9 +963,9 @@ void decoderLibde265::fillStatisticList(stats::StatisticsData &statisticsData) c
 
   stats::StatisticsType intraDirC(
       10, "Intra Dir Chroma", ColorMapper({0, 34}, PredefinedType::Jet));
-  intraDirC.description = "The intra mode for the chroma component per TU (intra prediction is "
-                          "performed on a TU level)";
-  intraDirC.hasVectorData          = true;
+  intraDirC.description   = "The intra mode for the chroma component per TU (intra prediction is "
+                            "performed on a TU level)";
+  intraDirC.hasVectorData = true;
   intraDirC.renderVectorData       = true;
   intraDirC.renderVectorDataValues = false;
   intraDirC.vectorScale            = 32;
diff --git a/YUViewLib/src/decoder/decoderLibde265.h b/YUViewLib/src/decoder/decoderLibde265.h
index fe0ae28c5..4b09c21b9 100644
--- a/YUViewLib/src/decoder/decoderLibde265.h
+++ b/YUViewLib/src/decoder/decoderLibde265.h
@@ -81,7 +81,7 @@ struct LibraryFunctionsDe265
   const uint8_t *(*de265_internals_get_image_plane)(const struct de265_image *img,
                                                     de265_internals_param     signal,
                                                     int                       channel,
-                                                    int *                     out_stride){};
+                                                    int                      *out_stride){};
   void (*de265_internals_set_parameter_bool)(de265_decoder_context *,
                                              enum de265_internals_param param,
                                              int                        value){};
@@ -124,7 +124,7 @@ class decoderLibde265 : public decoderBaseSingleLib
 private:
   // A private constructor that creates an uninitialized decoder library.
   // Used by checkLibraryFile to check if a file can be used as a hevcDecoderLibde265.
-  decoderLibde265() : decoderBaseSingleLib(){};
+  decoderLibde265() : decoderBaseSingleLib() {};
 
   // Try to resolve all the required function pointers from the library
   void resolveLibraryFunctionPointers() override;
@@ -175,15 +175,10 @@ class decoderLibde265 : public decoderBaseSingleLib
 
   // We buffer the current image as a QByteArray so you can call getYUVFrameData as often as
   // necessary without invoking the copy operation from the libde265 buffer to the QByteArray again.
-#if SSE_CONVERSION
-  byteArrayAligned currentOutputBuffer;
-  void             copyImgToByteArray(const de265_image *src, byteArrayAligned &dst);
-#else
   QByteArray currentOutputBuffer;
   void       copyImgToByteArray(
             const de265_image *src,
-            QByteArray &       dst); // Copy the raw data from the de265_image source *src to the byte array
-#endif
+            QByteArray &dst); // Copy the raw data from the de265_image source *src to the byte array
 
   LibraryFunctionsDe265 lib;
 };
diff --git a/YUViewLib/src/decoder/decoderVTM.cpp b/YUViewLib/src/decoder/decoderVTM.cpp
index 08f39a7de..8d4eb4285 100644
--- a/YUViewLib/src/decoder/decoderVTM.cpp
+++ b/YUViewLib/src/decoder/decoderVTM.cpp
@@ -366,11 +366,7 @@ QByteArray decoderVTM::getRawFrameData()
   return currentOutputBuffer;
 }
 
-#if SSE_CONVERSION
-void decoderVTM::copyImgToByteArray(libVTMDec_picture *src, byteArrayAligned &dst)
-#else
 void decoderVTM::copyImgToByteArray(libVTMDec_picture *src, QByteArray &dst)
-#endif
 {
   // How many image planes are there?
   auto fmt      = this->lib.libVTMDec_get_chroma_format(src);
diff --git a/YUViewLib/src/decoder/decoderVTM.h b/YUViewLib/src/decoder/decoderVTM.h
index 4eba0c522..68a5b729e 100644
--- a/YUViewLib/src/decoder/decoderVTM.h
+++ b/YUViewLib/src/decoder/decoderVTM.h
@@ -51,11 +51,11 @@ struct LibraryFunctionsVTM
   void (*libVTMDec_set_SEI_Check)(libVTMDec_context *, bool check_hash){};
   void (*libVTMDec_set_max_temporal_layer)(libVTMDec_context *, int max_layer){};
   libVTMDec_error (*libVTMDec_push_nal_unit)(libVTMDec_context *decCtx,
-                                             const void *       data8,
+                                             const void        *data8,
                                              int                length,
                                              bool               eof,
-                                             bool &             bNewPicture,
-                                             bool &             checkOutputPictures){};
+                                             bool              &bNewPicture,
+                                             bool              &checkOutputPictures){};
 
   // Get a picture and retrive information on the picture
   libVTMDec_picture *(*libVTMDec_get_picture)(libVTMDec_context *){};
@@ -93,7 +93,7 @@ class decoderVTM : public decoderBaseSingleLib
 private:
   // A private constructor that creates an uninitialized decoder library.
   // Used by checkLibraryFile to check if a file can be used as this type of decoder.
-  decoderVTM(){};
+  decoderVTM() {};
 
   // Return the possible names of the HM library
   QStringList getLibraryNames() const override;
@@ -130,15 +130,10 @@ class decoderVTM : public decoderBaseSingleLib
 
   // We buffer the current image as a QByteArray so you can call getYUVFrameData as often as
   // necessary without invoking the copy operation from the hm image buffer to the QByteArray again.
-#if SSE_CONVERSION
-  byteArrayAligned currentOutputBuffer;
-  void             copyImgToByteArray(libVTMDec_picture *src, byteArrayAligned &dst);
-#else
   QByteArray currentOutputBuffer;
   void       copyImgToByteArray(
             libVTMDec_picture *src,
-            QByteArray &       dst); // Copy the raw data from the de265_image source *src to the byte array
-#endif
+            QByteArray &dst); // Copy the raw data from the de265_image source *src to the byte array
 
   LibraryFunctionsVTM lib;
 };
diff --git a/YUViewLib/src/filesource/FileSource.cpp b/YUViewLib/src/filesource/FileSource.cpp
index ecc5df6ae..e5df613a9 100644
--- a/YUViewLib/src/filesource/FileSource.cpp
+++ b/YUViewLib/src/filesource/FileSource.cpp
@@ -77,21 +77,6 @@ bool FileSource::openFile(const std::filesystem::path &filePath)
   return true;
 }
 
-#if SSE_CONVERSION
-// Resize the target array if necessary and read the given number of bytes to the data array
-void FileSource::readBytes(byteArrayAligned &targetBuffer, int64_t startPos, int64_t nrBytes)
-{
-  if (!isOk())
-    return;
-
-  if (targetBuffer.size() < nrBytes)
-    targetBuffer.resize(nrBytes);
-
-  srcFile.seek(startPos);
-  srcFile.read(targetBuffer.data(), nrBytes);
-}
-#endif
-
 // Resize the target array if necessary and read the given number of bytes to the data array
 int64_t FileSource::readBytes(QByteArray &targetBuffer, int64_t startPos, int64_t nrBytes)
 {
diff --git a/YUViewLib/src/filesource/FileSource.h b/YUViewLib/src/filesource/FileSource.h
index 0a3e77d99..2ee9be62f 100644
--- a/YUViewLib/src/filesource/FileSource.h
+++ b/YUViewLib/src/filesource/FileSource.h
@@ -93,9 +93,6 @@ class FileSource : public QObject
   // Read the given number of bytes starting at startPos into the QByteArray out
   // Resize the QByteArray if necessary. Return how many bytes were read.
   int64_t readBytes(QByteArray &targetBuffer, int64_t startPos, int64_t nrBytes);
-#if SSE_CONVERSION
-  void readBytes(byteArrayAligned &data, int64_t startPos, int64_t nrBytes);
-#endif
 
   void updateFileWatchSetting();
   void clearFileCache();
diff --git a/YUViewLib/src/video/yuv/videoHandlerYUV.cpp b/YUViewLib/src/video/yuv/videoHandlerYUV.cpp
index 825ae1602..70ed0a9c2 100644
--- a/YUViewLib/src/video/yuv/videoHandlerYUV.cpp
+++ b/YUViewLib/src/video/yuv/videoHandlerYUV.cpp
@@ -40,9 +40,6 @@
 #include <type_traits>
 #include <vector>
 
-#if SSE_CONVERSION_420_ALT
-#include <xmmintrin.h>
-#endif
 #include <QDir>
 #include <QPainter>
 
@@ -454,103 +451,6 @@ bool convertYUV420ToRGB(const QByteArray         &sourceBuffer,
   Q_ASSERT(sourceBuffer.size() >= componentLenghtY + componentLengthUV +
                                       componentLengthUV); // YUV 420 must be (at least) 1.5*Y-area
 
-#if SSE_CONVERSION_420_ALT
-  quint8 *srcYRaw = (quint8 *)sourceBuffer.data();
-  quint8 *srcURaw = srcYRaw + componentLenghtY;
-  quint8 *srcVRaw = srcURaw + componentLengthUV;
-
-  quint8 *dstBuffer       = (quint8 *)targetBuffer.data();
-  quint32 dstBufferStride = frameWidth * 4;
-
-  yuv420_to_argb8888(srcYRaw,
-                     srcURaw,
-                     srcVRaw,
-                     frameWidth,
-                     frameWidth >> 1,
-                     frameWidth,
-                     frameHeight,
-                     dstBuffer,
-                     dstBufferStride);
-  return false;
-#endif
-
-#if SSE_CONVERSION
-  // Try to use SSE. If this fails use conventional algorithm
-
-  if (frameWidth % 32 == 0 && frameHeight % 2 == 0)
-  {
-    // We can use 16byte aligned read/write operations
-
-    quint8 *srcY = (quint8 *)sourceBuffer.data();
-    quint8 *srcU = srcY + componentLenghtY;
-    quint8 *srcV = srcU + componentLengthUV;
-
-    __m128i yMult  = _mm_set_epi16(75, 75, 75, 75, 75, 75, 75, 75);
-    __m128i ySub   = _mm_set_epi16(16, 16, 16, 16, 16, 16, 16, 16);
-    __m128i ugMult = _mm_set_epi16(25, 25, 25, 25, 25, 25, 25, 25);
-    //__m128i sub16  = _mm_set_epi8(16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16);
-    __m128i sub128 = _mm_set_epi8(
-        128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128);
-
-    //__m128i test = _mm_set_epi8(128, 0, 1, 2, 3, 245, 254, 255, 128, 128, 128, 128, 128, 128, 128,
-    // 128);
-
-    __m128i y, u, v, uMult, vMult;
-    __m128i RGBOut0, RGBOut1, RGBOut2;
-    __m128i tmp;
-
-    for (int yh = 0; yh < frameHeight / 2; yh++)
-    {
-      for (int x = 0; x < frameWidth / 32; x += 32)
-      {
-        // Load 16 bytes U/V
-        u = _mm_load_si128((__m128i *)&srcU[x / 2]);
-        v = _mm_load_si128((__m128i *)&srcV[x / 2]);
-        // Subtract 128 from each U/V value (16 values)
-        u = _mm_sub_epi8(u, sub128);
-        v = _mm_sub_epi8(v, sub128);
-
-        // Load 16 bytes Y from this line and the next one
-        y = _mm_load_si128((__m128i *)&srcY[x]);
-
-        // Get the lower 8 (8bit signed) Y values and put them into a 16bit register
-        tmp = _mm_srai_epi16(_mm_unpacklo_epi8(y, y), 8);
-        // Subtract 16 and multiply by 75
-        tmp = _mm_sub_epi16(tmp, ySub);
-        tmp = _mm_mullo_epi16(tmp, yMult);
-
-        // Now to add them to the 16 bit RGB output values
-        RGBOut0 = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(1, 0, 1, 0));
-        RGBOut0 = _mm_shufflelo_epi16(RGBOut0, _MM_SHUFFLE(1, 0, 0, 0));
-        RGBOut0 = _mm_shufflehi_epi16(RGBOut0, _MM_SHUFFLE(2, 2, 1, 1));
-
-        RGBOut1 = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(2, 1, 2, 1));
-        RGBOut1 = _mm_shufflelo_epi16(RGBOut1, _MM_SHUFFLE(1, 1, 1, 0));
-        RGBOut1 = _mm_shufflehi_epi16(RGBOut1, _MM_SHUFFLE(3, 2, 2, 2));
-
-        RGBOut2 = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(3, 2, 3, 2));
-        RGBOut2 = _mm_shufflelo_epi16(RGBOut2, _MM_SHUFFLE(2, 2, 1, 1));
-        RGBOut2 = _mm_shufflehi_epi16(RGBOut2, _MM_SHUFFLE(3, 3, 3, 2));
-
-        // y2 = _mm_load_si128((__m128i *) &srcY[x + 16]);
-
-        // --- Start with the left 8 values from U/V
-
-        // Get the lower 8 (8bit signed) U/V values and put them into a 16bit register
-        uMult = _mm_srai_epi16(_mm_unpacklo_epi8(u, u), 8);
-        vMult = _mm_srai_epi16(_mm_unpacklo_epi8(v, v), 8);
-
-        // Multiply
-
-        /*y3 = _mm_load_si128((__m128i *) &srcY[x + frameWidth]);
-        y4 = _mm_load_si128((__m128i *) &srcY[x + frameWidth + 16]);*/
-      }
-    }
-
-    return true;
-  }
-#endif
-
   static unsigned char *clip_buf = clp_buf + 384;
   if (!clp_buf_initialized)
     initClippingTable();
@@ -2546,158 +2446,6 @@ void videoHandlerYUV::drawFrame(QPainter *painter,
     videoHandler::drawFrame(painter, frameIdx, zoomFactor, drawRawData);
 }
 
-/// --- Convert from the current YUV input format to YUV 444
-
-#if SSE_CONVERSION_420_ALT
-void videoHandlerYUV::yuv420_to_argb8888(quint8 *yp,
-                                         quint8 *up,
-                                         quint8 *vp,
-                                         quint32 sy,
-                                         quint32 suv,
-                                         int     width,
-                                         int     height,
-                                         quint8 *rgb,
-                                         quint32 srgb)
-{
-  __m128i  y0r0, y0r1, u0, v0;
-  __m128i  y00r0, y01r0, y00r1, y01r1;
-  __m128i  u00, u01, v00, v01;
-  __m128i  rv00, rv01, gu00, gu01, gv00, gv01, bu00, bu01;
-  __m128i  r00, r01, g00, g01, b00, b01;
-  __m128i  rgb0123, rgb4567, rgb89ab, rgbcdef;
-  __m128i  gbgb;
-  __m128i  ysub, uvsub;
-  __m128i  zero, facy, facrv, facgu, facgv, facbu;
-  __m128i *srcy128r0, *srcy128r1;
-  __m128i *dstrgb128r0, *dstrgb128r1;
-  __m64   *srcu64, *srcv64;
-
-  //    Implement the following conversion:
-  //    B = 1.164(Y - 16)                   + 2.018(U - 128)
-  //    G = 1.164(Y - 16) - 0.813(V - 128)  - 0.391(U - 128)
-  //    R = 1.164(Y - 16) + 1.596(V - 128)
-
-  int x, y;
-  // constants
-  ysub  = _mm_set1_epi32(0x00100010); // value 16 for subtraction
-  uvsub = _mm_set1_epi32(0x00800080); // value 128
-
-  // multiplication factors bit shifted by 6
-  facy  = _mm_set1_epi32(0x004a004a);
-  facrv = _mm_set1_epi32(0x00660066);
-  facgu = _mm_set1_epi32(0x00190019);
-  facgv = _mm_set1_epi32(0x00340034);
-  facbu = _mm_set1_epi32(0x00810081);
-
-  zero = _mm_set1_epi32(0x00000000);
-
-  for (y = 0; y < height; y += 2)
-  {
-    srcy128r0 = (__m128i *)(yp + sy * y);
-    srcy128r1 = (__m128i *)(yp + sy * y + sy);
-    srcu64    = (__m64 *)(up + suv * (y / 2));
-    srcv64    = (__m64 *)(vp + suv * (y / 2));
-
-    // dst row 0 and row 1
-    dstrgb128r0 = (__m128i *)(rgb + srgb * y);
-    dstrgb128r1 = (__m128i *)(rgb + srgb * y + srgb);
-
-    for (x = 0; x < width; x += 16)
-    {
-      u0 = _mm_loadl_epi64((__m128i *)srcu64);
-      srcu64++;
-      v0 = _mm_loadl_epi64((__m128i *)srcv64);
-      srcv64++;
-
-      y0r0 = _mm_load_si128(srcy128r0++);
-      y0r1 = _mm_load_si128(srcy128r1++);
-
-      // expand to 16 bit, subtract and multiply constant y factors
-      y00r0 = _mm_mullo_epi16(_mm_sub_epi16(_mm_unpacklo_epi8(y0r0, zero), ysub), facy);
-      y01r0 = _mm_mullo_epi16(_mm_sub_epi16(_mm_unpackhi_epi8(y0r0, zero), ysub), facy);
-      y00r1 = _mm_mullo_epi16(_mm_sub_epi16(_mm_unpacklo_epi8(y0r1, zero), ysub), facy);
-      y01r1 = _mm_mullo_epi16(_mm_sub_epi16(_mm_unpackhi_epi8(y0r1, zero), ysub), facy);
-
-      // expand u and v so they're aligned with y values
-      u0  = _mm_unpacklo_epi8(u0, zero);
-      u00 = _mm_sub_epi16(_mm_unpacklo_epi16(u0, u0), uvsub);
-      u01 = _mm_sub_epi16(_mm_unpackhi_epi16(u0, u0), uvsub);
-
-      v0  = _mm_unpacklo_epi8(v0, zero);
-      v00 = _mm_sub_epi16(_mm_unpacklo_epi16(v0, v0), uvsub);
-      v01 = _mm_sub_epi16(_mm_unpackhi_epi16(v0, v0), uvsub);
-
-      // common factors on both rows.
-      rv00 = _mm_mullo_epi16(facrv, v00);
-      rv01 = _mm_mullo_epi16(facrv, v01);
-      gu00 = _mm_mullo_epi16(facgu, u00);
-      gu01 = _mm_mullo_epi16(facgu, u01);
-      gv00 = _mm_mullo_epi16(facgv, v00);
-      gv01 = _mm_mullo_epi16(facgv, v01);
-      bu00 = _mm_mullo_epi16(facbu, u00);
-      bu01 = _mm_mullo_epi16(facbu, u01);
-
-      // add together and bit shift to the right
-      r00 = _mm_srai_epi16(_mm_add_epi16(y00r0, rv00), 6);
-      r01 = _mm_srai_epi16(_mm_add_epi16(y01r0, rv01), 6);
-      g00 = _mm_srai_epi16(_mm_sub_epi16(_mm_sub_epi16(y00r0, gu00), gv00), 6);
-      g01 = _mm_srai_epi16(_mm_sub_epi16(_mm_sub_epi16(y01r0, gu01), gv01), 6);
-      b00 = _mm_srai_epi16(_mm_add_epi16(y00r0, bu00), 6);
-      b01 = _mm_srai_epi16(_mm_add_epi16(y01r0, bu01), 6);
-
-      r00 = _mm_packus_epi16(r00, r01);
-      g00 = _mm_packus_epi16(g00, g01);
-      b00 = _mm_packus_epi16(b00, b01);
-
-      // shuffle back together to lower 0rgb0rgb...
-      r01     = _mm_unpacklo_epi8(r00, zero);  // 0r0r...
-      gbgb    = _mm_unpacklo_epi8(b00, g00);   // gbgb...
-      rgb0123 = _mm_unpacklo_epi16(gbgb, r01); // lower 0rgb0rgb...
-      rgb4567 = _mm_unpackhi_epi16(gbgb, r01); // upper 0rgb0rgb...
-
-      // shuffle back together to upper 0rgb0rgb...
-      r01     = _mm_unpackhi_epi8(r00, zero);
-      gbgb    = _mm_unpackhi_epi8(b00, g00);
-      rgb89ab = _mm_unpacklo_epi16(gbgb, r01);
-      rgbcdef = _mm_unpackhi_epi16(gbgb, r01);
-
-      // write to dst
-      _mm_store_si128(dstrgb128r0++, rgb0123);
-      _mm_store_si128(dstrgb128r0++, rgb4567);
-      _mm_store_si128(dstrgb128r0++, rgb89ab);
-      _mm_store_si128(dstrgb128r0++, rgbcdef);
-
-      // row 1
-      r00 = _mm_srai_epi16(_mm_add_epi16(y00r1, rv00), 6);
-      r01 = _mm_srai_epi16(_mm_add_epi16(y01r1, rv01), 6);
-      g00 = _mm_srai_epi16(_mm_sub_epi16(_mm_sub_epi16(y00r1, gu00), gv00), 6);
-      g01 = _mm_srai_epi16(_mm_sub_epi16(_mm_sub_epi16(y01r1, gu01), gv01), 6);
-      b00 = _mm_srai_epi16(_mm_add_epi16(y00r1, bu00), 6);
-      b01 = _mm_srai_epi16(_mm_add_epi16(y01r1, bu01), 6);
-
-      r00 = _mm_packus_epi16(r00, r01);
-      g00 = _mm_packus_epi16(g00, g01);
-      b00 = _mm_packus_epi16(b00, b01);
-
-      r01     = _mm_unpacklo_epi8(r00, zero);
-      gbgb    = _mm_unpacklo_epi8(b00, g00);
-      rgb0123 = _mm_unpacklo_epi16(gbgb, r01);
-      rgb4567 = _mm_unpackhi_epi16(gbgb, r01);
-
-      r01     = _mm_unpackhi_epi8(r00, zero);
-      gbgb    = _mm_unpackhi_epi8(b00, g00);
-      rgb89ab = _mm_unpacklo_epi16(gbgb, r01);
-      rgbcdef = _mm_unpackhi_epi16(gbgb, r01);
-
-      _mm_store_si128(dstrgb128r1++, rgb0123);
-      _mm_store_si128(dstrgb128r1++, rgb4567);
-      _mm_store_si128(dstrgb128r1++, rgb89ab);
-      _mm_store_si128(dstrgb128r1++, rgbcdef);
-    }
-  }
-}
-#endif
-
 QLayout *videoHandlerYUV::createVideoHandlerControls(bool isSizeAndFormatFixed)
 {
   // Absolutely always only call this function once!
diff --git a/YUViewLib/src/video/yuv/videoHandlerYUV.h b/YUViewLib/src/video/yuv/videoHandlerYUV.h
index 0e79d9ce9..aade4af02 100644
--- a/YUViewLib/src/video/yuv/videoHandlerYUV.h
+++ b/YUViewLib/src/video/yuv/videoHandlerYUV.h
@@ -224,18 +224,6 @@ class videoHandlerYUV : public videoHandler
                                      const Size            frameSize,
                                      const PixelFormatYUV &sourceBufferFormat) const;
 
-#if SSE_CONVERSION_420_ALT
-  void yuv420_to_argb8888(quint8 *yp,
-                          quint8 *up,
-                          quint8 *vp,
-                          quint32 sy,
-                          quint32 suv,
-                          int     width,
-                          int     height,
-                          quint8 *rgb,
-                          quint32 srgb);
-#endif
-
   SafeUi<Ui::videoHandlerYUV> ui;
 
   bool           diffReady{};