From eaf5c3f2d4043479fe02fab2e12015654ec42eb3 Mon Sep 17 00:00:00 2001 From: Ignacio Castano Date: Fri, 31 Jul 2020 15:15:09 -0700 Subject: [PATCH 1/4] Assume SSE2 in x64, even if __SSE2__ not defined. --- icbc.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/icbc.h b/icbc.h index 82fd9fd..c36b06b 100644 --- a/icbc.h +++ b/icbc.h @@ -57,6 +57,10 @@ namespace icbc { #define ICBC_X86 1 #endif +#if defined(__x86_64__) || defined(_M_X64) + #define ICBC_X64 1 +#endif + #if (defined(__arm__) || defined(_M_ARM)) #define ICBC_ARM 1 #endif @@ -76,7 +80,7 @@ namespace icbc { #define ICBC_SIMD ICBC_AVX1 #elif __SSE4_1__ #define ICBC_SIMD ICBC_SSE41 - #elif __SSE2__ + #elif __SSE2__ || ICBC_X64 #define ICBC_SIMD ICBC_SSE2 #else #define ICBC_SIMD ICBC_SCALAR From bac19313c3a745a0374e1f477acdd0ddc2061b87 Mon Sep 17 00:00:00 2001 From: Ignacio Castano Date: Fri, 31 Jul 2020 15:28:40 -0700 Subject: [PATCH 2/4] Add results on Gulftown. --- results.txt | 48 +++++++++++++++++++++++------------------------- 1 file changed, 23 insertions(+), 25 deletions(-) diff --git a/results.txt b/results.txt index 80725ce..0b3f74a 100644 --- a/results.txt +++ b/results.txt @@ -1,26 +1,22 @@ -Rasperry Pi - Neon - gcc 8.3 - 2020/05/14 - RMSE = 6.906 PSNR = 31.346 TIME = 5.719120 (5.701821) +Rasperry Pi - gcc 8.3 - 2020/05/14 + Neon: RMSE = 6.906 PSNR = 31.346 TIME = 5.719120 (5.701821) + Float: RMSE = 6.906 PSNR = 31.346 TIME = 11.536309 (11.447170) -Rasperry Pi - Float - gcc 8.3 - 2020/05/14 - RMSE = 6.906 PSNR = 31.346 TIME = 11.536309 (11.447170) +Raspberry Pi - gcc 8.3 - 2020/05/30 + Neon: RMSE = 6.912 PSNR = 31.339 TIME = 5.520404 (5.470292) + Float: RMSE = 6.912 PSNR = 31.339 TIME = 10.290935 (10.265525) Raspberry Pi - Neon - gcc 8.3 - 2020/05/30 - RMSE = 6.912 PSNR = 31.339 TIME = 5.520404 (5.470292) - -Rasperry Pi - Float - gcc 8.3 - 2020/05/30 - RMSE = 6.912 PSNR = 31.339 TIME = 10.290935 (10.265525) - -Raspberry Pi - Neon - gcc 8.3 - 2020/05/30 - q1: RMSE = 7.394 PSNR = 30.753 TIME = 0.330453 (0.318691) - q2: RMSE = 7.380 PSNR = 30.769 TIME = 0.652932 (0.632926) - q3: RMSE = 7.202 PSNR = 30.982 TIME = 0.756020 (0.742441) - q4: RMSE = 7.049 PSNR = 31.169 TIME = 1.011262 (0.993692) - q5: RMSE = 6.971 PSNR = 31.265 TIME = 1.553520 (1.538473) - q6: RMSE = 6.938 PSNR = 31.307 TIME = 2.303139 (2.266457) - q7: RMSE = 6.926 PSNR = 31.321 TIME = 3.830098 (3.788544) - q8: RMSE = 6.916 PSNR = 31.333 TIME = 5.098788 (5.079539) - q9: RMSE = 6.894 PSNR = 31.361 TIME = 8.725668 (8.707515) + q1: RMSE = 7.394 PSNR = 30.753 TIME = 0.330453 (0.318691) + q2: RMSE = 7.380 PSNR = 30.769 TIME = 0.652932 (0.632926) + q3: RMSE = 7.202 PSNR = 30.982 TIME = 0.756020 (0.742441) + q4: RMSE = 7.049 PSNR = 31.169 TIME = 1.011262 (0.993692) + q5: RMSE = 6.971 PSNR = 31.265 TIME = 1.553520 (1.538473) + q6: RMSE = 6.938 PSNR = 31.307 TIME = 2.303139 (2.266457) + q7: RMSE = 6.926 PSNR = 31.321 TIME = 3.830098 (3.788544) + q8: RMSE = 6.916 PSNR = 31.333 TIME = 5.098788 (5.079539) + q9: RMSE = 6.894 PSNR = 31.361 TIME = 8.725668 (8.707515) Cofee Lake - clang 11.0.0 - 2020/05/30 AVX2: RMSE = 6.916 PSNR = 31.333 TIME = 0.242974 (0.225702) @@ -29,7 +25,7 @@ Cofee Lake - clang 11.0.0 - 2020/05/30 SSE2: RMSE = 6.916 PSNR = 31.333 TIME = 0.444171 (0.419927) Float: RMSE = 6.916 PSNR = 31.333 TIME = 0.959968 (0.941949) -Sky Lake - vc2009/x64 - 2020/05/30 +Sky Lake - vc2019/x64 - 2020/05/30 AVX512: RMSE = 6.916 PSNR = 31.333 TIME = 0.170101 (0.165569) AVX2: RMSE = 6.916 PSNR = 31.333 TIME = 0.272625 (0.267651) AVX1: RMSE = 6.916 PSNR = 31.333 TIME = 0.537877 (0.530867) @@ -48,9 +44,11 @@ Sky Lake - vc2009/x64 - 2020/05/31 AVX512: RMSE = 6.916 PSNR = 31.333 TIME = 0.166147 (0.161710) AVX2: RMSE = 6.916 PSNR = 31.333 TIME = 0.271278 (0.265888) -Raspberry Pi - Neon - gcc 8.3 - 2020/06/11 - RMSE = 6.916 PSNR = 31.333 TIME = 4.869383 (4.837136) - -Rasperry Pi - Float - gcc 8.3 - 2020/06/11 - RMSE = 6.916 PSNR = 31.333 TIME = 9.983787 (9.971635) +Raspberry Pi - gcc 8.3 - 2020/06/11 + Neon: RMSE = 6.916 PSNR = 31.333 TIME = 4.869383 (4.837136) + Float: RMSE = 6.916 PSNR = 31.333 TIME = 9.983787 (9.971635) +Gulftown - vc2017/x64 - 2020/07/31 + SSE4: RMSE = 6.916 PSNR = 31.333 TIME = 1.118810 (1.054073) + SSE2: RMSE = 6.916 PSNR = 31.333 TIME = 1.245808 (1.177381) + Float: RMSE = 6.916 PSNR = 31.333 TIME = 2.440240 (2.320139) From b13ac08c62f3c3e1b564bb96db5f75cbf5e7c6fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacio=20Castan=CC=83o?= Date: Sat, 1 Aug 2020 22:50:54 -0700 Subject: [PATCH 3/4] Update Haswell results. --- results.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/results.txt b/results.txt index 0b3f74a..0b7f56e 100644 --- a/results.txt +++ b/results.txt @@ -35,9 +35,9 @@ Sky Lake - vc2019/x64 - 2020/05/30 Haswell - clang 11.0.0 - 2020/05/31 AVX2: RMSE = 6.916 PSNR = 31.333 TIME = 0.568070 (0.545250) - AVX1: RMSE = 6.916 PSNR = 31.333 TIME = 0.875448 (0.849122) - SSE4: RMSE = 6.916 PSNR = 31.333 TIME = 1.098958 (1.068698) - SSE2: RMSE = 6.916 PSNR = 31.333 TIME = 1.143020 (1.112766) + AVX1: RMSE = 6.916 PSNR = 31.333 TIME = 0.863462 (0.840429) + SSE4: RMSE = 6.916 PSNR = 31.333 TIME = 1.080115 (1.053109) + SSE2: RMSE = 6.916 PSNR = 31.333 TIME = 1.135764 (1.103072) Float: RMSE = 6.916 PSNR = 31.333 TIME = 2.515645 (2.458320) Sky Lake - vc2009/x64 - 2020/05/31 From b0c83d1d81840754aa1b4362ca66ab33393c9015 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ignacio=20Castan=CC=83o?= Date: Sat, 1 Aug 2020 22:52:24 -0700 Subject: [PATCH 4/4] Fix typo. --- results.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/results.txt b/results.txt index 0b7f56e..fbd1dbf 100644 --- a/results.txt +++ b/results.txt @@ -40,7 +40,7 @@ Haswell - clang 11.0.0 - 2020/05/31 SSE2: RMSE = 6.916 PSNR = 31.333 TIME = 1.135764 (1.103072) Float: RMSE = 6.916 PSNR = 31.333 TIME = 2.515645 (2.458320) -Sky Lake - vc2009/x64 - 2020/05/31 +Sky Lake - vc2019/x64 - 2020/05/31 AVX512: RMSE = 6.916 PSNR = 31.333 TIME = 0.166147 (0.161710) AVX2: RMSE = 6.916 PSNR = 31.333 TIME = 0.271278 (0.265888)