-
Notifications
You must be signed in to change notification settings - Fork 4
/
main.cpp
137 lines (113 loc) · 4.69 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#include <stdio.h>
#include <stdint.h>
#include "timer.h"
#if defined(FARSH_AVX2) || defined(FARSH_SSE2)
#include "CpuID.h"
#endif
#include "../farsh.c"
#if __GNUC__
#define ALIGN(n) __attribute__ ((aligned(n)))
#elif _MSC_VER
#define ALIGN(n) __declspec(align(n))
#else
#define ALIGN(n)
#endif
int main (int argc, char **argv)
{
bool print_table = (argc > 1); // if any cmdline parameter was given
bool x64 = (sizeof(void*) == 8); // check for 64-bit platform
#ifdef FARSH_AVX2
char simdext[] = "-avx2";
struct CpuidFeatures features; GetCpuidFeatures(&features);
if (! features.AVX2) {if (!print_table) printf("AVX2 not found!\n"); return 1;}
#elif defined(FARSH_SSE2)
const char *simdext = x64? "":"-sse2";
struct CpuidFeatures features; GetCpuidFeatures(&features);
if (! features.SSE2) {if (!print_table) printf("SSE2 not found!\n"); return 1;}
#else
const char *simdext = x64? "-nosimd":"";
#endif
#ifdef FARSH_ALIGNED_INPUT
bool ALIGNED_INPUT = true;
#else
bool ALIGNED_INPUT = false;
#endif
// Choose the display format for results
int format = argc==1? 0 :
strcmp(argv[1],"1")==0? 1 :
strcmp(argv[1],"2")==0? 2 : -1;
if (format <= 0) {
printf("FARSH 0.2 Benchmark. See https://github.com/Bulat-Ziganshin/FARSH\n"
" Usage: farsh [1|2] - choose display format\n");
if (format < 0) return 3;
}
char progname[100];
sprintf (progname, "%sfarsh-%s%s", ALIGNED_INPUT? "aligned-":"",
x64? "x64":"x86",
simdext);
// CHECK THE ZEROES HASHING
const size_t ZEROES = 64*1024;
ALIGN(64) static char zero[ZEROES] = {0};
for (int i=0; i<=ZEROES; i++)
{
//uint32_t h = farsh (zero, i);
//printf("%5d %08x\n", i, h);
//printf("%4d %08x %08x %08x %08x :: ", minbytes, (UINT)(h), (UINT)(h>>32), sum1, sum2);
}
// PREPARE TEST DATA. DATASIZE+FARSH_BASE_KEY_SIZE should be less than the L1 cache size, otherwise speed may be limited by memory reads
const size_t DATASIZE = 12*1024;
ALIGN(64) static char data_array[DATASIZE+1];
char *data = ALIGNED_INPUT? data_array : data_array + 1;
for (int i=0; i<DATASIZE; i++)
data[i] = char((123456791u*i) >> ((i%16)+8));
#ifndef FARSH_ALIGNED_INPUT
// CHECK FOR POSSIBLE DATA ALIGNMENT PROBLEMS
for (int i=0; i<=64; i++)
{
uint32_t h = farsh (data+i, DATASIZE+1-i, 0);
if (h==42) break; // anti-optimization trick
char out[32*4];
for (int j=1; j<=32; j++)
farsh_n (data+i, DATASIZE+1-i, 0, j, 0, out);
}
#endif
// BENCHMARK
const uint64_t DATASET = uint64_t(100)<<30;
if (format > 0) printf("%-24s |", progname);
else printf("Hashing %d GiB:", int(DATASET>>30));
const int EXTRA_LOOPS = (100<<20) / DATASIZE; // These extra loops are required to enable the SIMD engine and switch CPU core to the maximum frequency
Timer t;
uint32_t h = 0;
for (int i=0; i < EXTRA_LOOPS+DATASET/DATASIZE; i++)
{
if (i == EXTRA_LOOPS)
t.Start();
h = farsh (data, DATASIZE, h);
if (i == 0 && h != 0xd300ddd8) { // check hash correctness
printf("\nWrong hash value: 0x%08X !!!\n", h);
return 2;
}
}
t.Stop(); double speed = DATASET / t.Elapsed();
if (print_table) printf("%8.3lf GB/s =%7.3lf GiB/s", speed/1e9, speed/(1<<30));
else printf(" %.3lf milliseconds =%7.3lf GB/s =%7.3lf GiB/s\n", t.Elapsed()*1000, speed/1e9, speed/(1<<30));
double t1 = t.Elapsed();
const uint32_t *keys = FARSH_KEYS;
if (t.Elapsed() == 1e42) data++, keys++; // anti-optimization trick
if (format==0) printf("Internal loop: ");
t.Start();
for (int i=0; i < DATASET/FARSH_BASE_KEY_SIZE; i++)
{
uint64_t h = farsh_full_block ((uint32_t*)data, keys);
if (h==42) data[0] = i; // anti-optimization trick
}
t.Stop(); speed = DATASET / t.Elapsed();
if (print_table) printf(" |%8.3lf GB/s =%7.3lf GiB/s", speed/1e9, speed/(1<<30));
else printf(" %.3lf milliseconds =%7.3lf GB/s =%7.3lf GiB/s\n", t.Elapsed()*1000, speed/1e9, speed/(1<<30));
t1 -= t.Elapsed();
speed = DATASET / t1;
if (format==2) printf(" |%9.3lf GB/s =%8.3lf GiB/s", speed/1e9, speed/(1<<30));
else if (format==0) printf("External loop: %.3lf milliseconds = %.3lf GB/s = %.3lf GiB/s", t1*1000, speed/1e9, speed/(1<<30));
printf("\n");
return 0;
}