diff --git a/README.md b/README.md index e3009e6..5d47f07 100644 --- a/README.md +++ b/README.md @@ -46,29 +46,8 @@ fixed64 is designed to find a balance between percision and performance. It is s - support speed up the multiplication and division with integer ### Performance -- forceinline -- no overflow -- no hardware int128 -Intel Core i9-12900K 3.2GHz - -|Arithmetic|Fixed64|Hardware Float| -|-|:-:|:-:| -|Addition/Subtraction|0.027 ns|0.433 ns| -|Multiplication|2.621 ns|0.837 ns| -|Division|1.316 ns|2.784 ns| - -Apple M1 pro - -|Arithmetic|Fixed64|Hardware Float| -|-|:-:|:-:| -|Addition/Subtraction|0.000001 ns*|0.953 ns| -|Multiplication|4.057 ns|1.246 ns| -|Division|1.102 ns|3.144 ns| - - * result is calculated with random operand, can not be calculated in compile time. - -see more in ``benchmark.cpp`` +see more in [benchmark](https://github.com/nustxujun/FixedPoint64/blob/main/benchmark/benchmark.md) ### Supported Switcher ```c++ #define FIXED_64_ENABLE_ROUNDING // apply rounding @@ -131,31 +110,9 @@ fixed64综合考虑了精度与性能的问题,使用了int64存储。个人 - 支持与整型的乘除法加速 ### Performance -- 开启强制内敛 -- 无溢出检测 -- 无硬件int128支持 - - -Intel Core i9-12900K 3.2GHz - -|算数操作|定点数|系统浮点数| -|-|:-:|:-:| -|加/减|0.027 ns|0.433 ns| -|乘|2.621 ns|0.837 ns| -|除|1.316 ns|2.784 ns| - - -Apple M1 pro - -|算数操作|定点数|系统浮点数| -|-|:-:|:-:| -|加/减|0.000001 ns*|0.953 ns| -|乘|4.057 ns|1.246 ns| -|除|1.102 ns|3.144 ns| - * 计算数值是随机的,不可能是编译期计算出来的 +具体参考[benchmark](https://github.com/nustxujun/FixedPoint64/blob/main/benchmark/benchmark.md) -具体请参考``benchmark.cpp`` ### 开关 ```c++ #define FIXED_64_ENABLE_ROUNDING // 使用四舍五入 diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 6d486d0..20578f1 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -40,9 +40,9 @@ struct Operand fixed fa; fixed fb; - inline Operand(float Min, float Max) + inline Operand(fp Min, fp Max) { - std::uniform_real_distribution u(Min, Max); + std::uniform_real_distribution u(Min, Max); a = u(e) ; b = u(e); fa = a; @@ -84,13 +84,27 @@ FIXED_64_FORCEINLINE void PreventOptimizedAway(fixed val) EXPR1;\ } -#pragma optimize("",off) // prevent statement reordering +// prevent statment reordering +#ifdef _MSC_VER +#pragma optimize("",off) +#elif defined(__clang__) +#pragma clang optimize off +#else +#pragma GCC push_options +#pragma GCC optimize ("O0") +#endif template void run_test(T& a, T& b, std::function&& f) { f(a,b); } -#pragma optimize("",on) +#ifdef _MSC_VER +#pragma optimize("",on) +#elif defined(__clang__) +#pragma clang optimize on +#else +#pragma GCC pop_options +#endif #define RUN_TEST(EXPR1, EXPR2, COUNT, Min, Max) \ @@ -114,21 +128,25 @@ struct TestGroup std::string name; uint64_t num_batch; uint64_t count; + fp min; + fp max; TestGroup(std::string n, uint64_t num, uint64_t c, fp min, fp max) { + this->min = min; + this->max = max; name = n; num_batch = num; count = c; totals[0] = 0; totals[1] = 0; - printf("%s [%f, %f]\n", name.c_str(), min, max); } ~TestGroup() { - printf("hard float: %lf ns, fixed point: %lf ns\n\n", - double(totals[0]) /count / num_batch - ,double(totals[1]) / count / num_batch + printf("%16s[%6.1f, %6.1f]| %3.4lf ns | %3.4lf ns |\n", + name.c_str(),(float)min, (float)max, + double(totals[1]) /count / num_batch + ,double(totals[0]) / count / num_batch ); } }; @@ -167,6 +185,8 @@ auto benchmark = [](){ const uint64_t count1 = 0xffff'ff; const uint64_t count2 = 0xffff'f; + printf(" arithmetic[ min, max]|fixed point| hard float|\n"); + RUN_BASIC_TEST_GROUP("add/sub", +, -, 0xff, count1, -100, 100); #if FIXED_64_ENABLE_INT128_ACCELERATION diff --git a/benchmark/benchmark.md b/benchmark/benchmark.md new file mode 100644 index 0000000..f015a0a --- /dev/null +++ b/benchmark/benchmark.md @@ -0,0 +1,69 @@ +## Intel Core i9-12900K 3.2GHz windows11 clang c++20 + +|Arithmetic|Fixed64|Hardware Float| +|:-|-:|-:| + add/sub[-100.0, 100.0]|0.0038 ns|0.4295 ns| +mul[-100.0, 100.0]|2.6078 ns|0.9425 ns| +mul[ 0.0, 0.5]|2.9100 ns|0.9226 ns| +mul[ 0.5, 1.0]|2.6604 ns|27.1776 ns| +mul[ 1.0, 2.0]|2.6227 ns|0.8468 ns| +mul[ 2.0, 100.0]|2.6320 ns|0.8465 ns| +div[-100.0, 100.0]|1.2967 ns|2.7318 ns| +div[ 0.0, 0.5]|12.0082 ns|2.3092 ns| +div[ 0.5, 1.0]|11.9341 ns|2.3041 ns| +div[ 1.0, 2.0]|5.8836 ns|29.7369 ns| +div[ 2.0, 100.0]|1.1417 ns|2.2860 ns| +ceil[ -2.0, 2.0]|0.3916 ns|1.9973 ns| +floor[ -2.0, 2.0]|0.3870 ns|1.7243 ns| +round[ -2.0, 2.0]|0.6668 ns|6.5249 ns| +abs[ -2.0, 2.0]|0.0538 ns|0.4180 ns| +exp[ 0.0, 1.0]|3.6085 ns|2.1249 ns| +exp2[ 0.0, 1.0]|2.7666 ns|30.3101 ns| +sqrt[ 0.0, 100.0]|13.7207 ns|1.6524 ns| +sin[ -10.0, 10.0]|1.0053 ns|3.6804 ns| +cos[ -10.0, 10.0]|1.3183 ns|3.4292 ns| +tan[ -10.0, 10.0]|7.3048 ns|3.5210 ns| +asin[ -1.0, 1.0]|23.3574 ns|2.7947 ns| +acos[ -1.0, 1.0]|22.9430 ns|2.8789 ns| +atan[ 1.0, 100.0]|6.9723 ns|3.0697 ns| +atan[ 1.0, 100.0]|6.9567 ns|3.4708 ns| + + +## Intel Core i9-12900K 3.2GHz windows11 MSVC c++20 + +|Arithmetic|Fixed64|Hardware Float| +|:-|-:|-:| +add/sub[-100.0, 100.0]| 0.2107 ns | 0.4187 ns | +mul[-100.0, 100.0]| 2.7356 ns | 0.8556 ns | +mul[ 0.0, 0.5]| 2.6802 ns | 0.9478 ns | +mul[ 0.5, 1.0]| 2.6930 ns | 0.8574 ns | +mul[ 1.0, 2.0]| 2.6725 ns | 0.8455 ns | +mul[ 2.0, 100.0]| 2.6829 ns | 0.8512 ns | +div[-100.0, 100.0]| 1.6081 ns | 3.1777 ns | +div[ 0.0, 0.5]| 13.7667 ns | 28.6761 ns | +div[ 0.5, 1.0]| 13.7861 ns | 29.7010 ns | +div[ 1.0, 2.0]| 6.2241 ns | 29.6876 ns | +div[ 2.0, 100.0]| 1.5079 ns | 2.3074 ns | +ceil[ -2.0, 2.0]| 0.4330 ns | 2.0436 ns | +floor[ -2.0, 2.0]| 0.4341 ns | 2.0402 ns | +round[ -2.0, 2.0]| 2.0854 ns | 6.4961 ns | +abs[ -2.0, 2.0]| 0.2149 ns | 0.4175 ns | +exp[ 0.0, 1.0]| 10.8654 ns | 2.0519 ns | +exp2[ 0.0, 1.0]| 9.4355 ns | 30.2611 ns | +sqrt[ 0.0, 100.0]| 10.9269 ns | 0.6271 ns | +sin[ -10.0, 10.0]| 2.3128 ns | 3.9287 ns | +cos[ -10.0, 10.0]| 2.1791 ns | 3.6846 ns | +tan[ -10.0, 10.0]| 12.0050 ns | 3.6412 ns | +asin[ -1.0, 1.0]| 19.5066 ns | 2.7914 ns | +acos[ -1.0, 1.0]| 20.2079 ns | 2.9695 ns | +atan[ 1.0, 100.0]| 17.5488 ns | 3.1872 ns | +atan[ 1.0, 100.0]| 17.1658 ns | 3.0822 ns | + + +## Apple M1 pro + +|Arithmetic|Fixed64|Hardware Float| +|-|-:|-:| +|Addition/Subtraction|0.215 ns|0.953 ns| +|Multiplication|4.057 ns|1.246 ns| +|Division|1.102 ns|3.144 ns| \ No newline at end of file