diff --git a/article/article.tex b/article/article.tex index d8ab7b3..c8b13fe 100644 --- a/article/article.tex +++ b/article/article.tex @@ -463,20 +463,33 @@ \subsubsection{Benchmark Quality Criteria} \begin{itemize} \item \textbf{Uniform Distribution:} A high-quality hash function distributes its output values as uniformly as possible across the output space. This ensures that, when used in applications like hash tables, the data is spread evenly, reducing clustering and the frequency of collisions. + + We can estimate the uniformity of the distribution by counting the number of times each bit is set and computing a standard deviation. This "bit distribution" criteria however does not qualifies the distributiono of the hashes a whole, so a complementary estimator is the "bucketed distribution", which be computed by placing generated hashes into a fixed size grid and counting occurences. This can also be easily displayed as a bitmap as a convenient way to visualize distribution. \item \textbf{Minimal Collisions:} While no hash function can be entirely collision-free due to the pigeonhole principle, a good non-cryptographic hash should minimize collisions for typical input sets, ensuring that different inputs usually produce distinct outputs. + + The collison rate can be computed by counting unique values with the help of an hash table. \item \textbf{Avalanche Effect:} A subtle change in the input should result in a considerably different output, ensuring sensitivity to input variations. This also contributes to lessen the risk of clustered hashes in applications like hash tables. + + The avalanche effect can be computed by fliping a single random bit for given input and checking the differences between the hashes generated before and after the bit was flipped. Ideally, half of the bit should change on average. \item \textbf{Performance:} The performance of a non cryptographic hash function is usually reflected by the performance of the application using it. For instance, a fast non-cryptographic hash function generally implies a fast hash table. This specific criteria will be tackled in the next section which is dedicated to it. + \end{itemize} \subsubsection{Quality Results} While we can compute quality metrics, the result will greatly vary depending on the actual inputs used for our hash function. Let's see how the GxHash0 algorithm qualifies against a few well known non-cryptographic algorithm in a few scenarios. -Todo: Actual funcs used ? Like what is bits distribution vs distribution ? +For comparison, we'll also include qualification results for a few other popular non-cryptographic hash algorithms such as: + +\begin{itemize} +\item \textbf{HighwayHash}\cite{highwayhash} The latest non-cryptographic hash algorithm from Google Research +\item \textbf{xxHash}\cite{twox-hash} Recently a very popular algorithm for fast non-cryptographic hashing +\item \textbf{t1ha0}\cite{rust-t1ha} Supposedly the fastest algorithm +\end{itemize} \clearpage \paragraph{Random Blobs}\leavevmode\\ -Randomly generated inputs to observe how the hash function behaves with truly unpredictable data +For the first scenario we randomly generate 1,000,000 inputs of size 4 bytes, 64 and 1000 to observe how the hash function behaves with truly unpredictable data, and for different input sizes. \begin{table}[H] \centering @@ -484,31 +497,44 @@ \subsubsection{Quality Results} \hline \textbf{Function for Random dataset} & \textbf{Collisions} & \textbf{Bits Distribution} & \textbf{Distribution} & \textbf{Avalanche} \\ \hline -Int32 GxHash0(4) & 0,0241\% & 0,001094 & 0,000002 & 0,00412 \\ -Int32 GxHash0(64) & 0,0103\% & 0,001081 & 0,000002 & 0,00296 \\ -Int32 GxHash0(1000) & 0,012\% & 0,001148 & 0,000002 & 0,00336 \\ -Int32 HighwayHash(4) & 0,0237\% & 0,001135 & 0,000002 & 0,00001 \\ -Int32 HighwayHash(64) & 0,0117\% & 0,001028 & 0,000002 & 0,00078 \\ -Int32 HighwayHash(1000) & 0,0103\% & 0,00092 & 0,000002 & 0,00032 \\ -Int32 T1ha(4) & 0,021\% & 0,001034 & 0,000002 & 0,00002 \\ -Int32 T1ha(64) & 0,0123\% & 0,000933 & 0,000002 & 0,00047 \\ -Int32 T1ha(1000) & 0,0114\% & 0,001087 & 0,000002 & 0,00027 \\ +UInt32 GxHash0(4) & 0,0241\% & 0,001094 & 0,000002 & 0,00412 \\ +UInt32 GxHash0(64) & 0,0103\% & 0,001081 & 0,000002 & 0,00296 \\ +UInt32 GxHash0(1000) & 0,012\% & 0,001148 & 0,000002 & 0,00336 \\ +UInt32 HighwayHash(4) & 0,0237\% & 0,001135 & 0,000002 & 0,00001 \\ +UInt32 HighwayHash(64) & 0,0117\% & 0,001028 & 0,000002 & 0,00078 \\ +UInt32 HighwayHash(1000) & 0,0103\% & 0,00092 & 0,000002 & 0,00032 \\ +UInt32 T1ha(4) & 0,021\% & 0,001034 & 0,000002 & 0,00002 \\ +UInt32 T1ha(64) & 0,0123\% & 0,000933 & 0,000002 & 0,00047 \\ +UInt32 T1ha(1000) & 0,0114\% & 0,001087 & 0,000002 & 0,00027 \\ UInt32 XxHash(4) & 0,0119\% & 0,00102 & 0,000002 & 0,00027 \\ UInt32 XxHash(64) & 0,013\% & 0,000871 & 0,000002 & 0,00083 \\ UInt32 XxHash(1000) & 0,0131\% & 0,001214 & 0,000002 & 0,00038 \\ -UInt32 Fnv1a(4) & 0,031\% & 0,001008 & 0,000002 & 0,20155 \\ -UInt32 Fnv1a(64) & 0,0094\% & 0,000748 & 0,000002 & 0,08599 \\ -UInt32 Fnv1a(1000) & 0,0138\% & 0,000821 & 0,000002 & 0,07861 \\ -UInt32 Crc(4) & 0,0119\% & 0,000811 & 0,000002 & 0,11689 \\ -UInt32 Crc(64) & 0,0117\% & 0,001041 & 0,000002 & 0,02473 \\ +UInt32 Fnv1a(4) & 0,031\% & 0,001008 & 0,000002 & \textcolor{red}{0,20155} \\ +UInt32 Fnv1a(64) & 0,0094\% & 0,000748 & 0,000002 & \textcolor{orange}{0,08599} \\ +UInt32 Fnv1a(1000) & 0,0138\% & 0,000821 & 0,000002 & \textcolor{orange}{0,07861} \\ +UInt32 Crc(4) & 0,0119\% & 0,000811 & 0,000002 & \textcolor{red}{0,11689} \\ +UInt32 Crc(64) & 0,0117\% & 0,001041 & 0,000002 & \textcolor{orange}{0,02473} \\ UInt32 Crc(1000) & 0,0123\% & 0,001097 & 0,000002 & 0,00514 \\ \hline \end{tabular} \caption{Your Table Caption Here} -\label{tab:my_label} +\label{tab:quality-data-random} \end{table} -Du blabla +All numbers are very low, and GxHash0 quality results is of the same order of magnitude as for other algoririthms. +We can notice however a collision rate of about 0.011\% and even 0.022\% for a few of them for the 4 bytes input. +There is however an explanation: we can derive from the birthday paradox problem the following formula to +estimate the \% of collisions: + +\begin{align*} + 100 \times \frac{n^2}{2 \times m \times n} +\end{align*} + +Where \(n\) is the number of samples and \(m\) the number of possible of values. When \(n=1000000\) and \(m=2^32\) we obtain 0.0116\%. +You can see that this value closely matches most of the collision rates benchmarked. This is because the generated hashes are of 32 bit size, +thus naturally colliding at this rate. For inputs of size 4, the inputs themselves are also likely to collide with the same odds (because inputs are randomly generated). For this reason, the collision rate is expected to be about 2 \(\times\) 0.0116\%. +We can however see however that Crc and XxHash have lower odds of collisions for 4 bytes input, which can be explained by a size-specific logic to handle small inputs bijectively. + \begin{figure}[H] \centering @@ -529,23 +555,23 @@ \subsubsection{Quality Results} \hline \textbf{Function for Sequential dataset} & \textbf{Collisions} & \textbf{Bits Distribution} & \textbf{Distribution} & \textbf{Avalanche} \\ \hline -Int32 GxHash0(4) & 0,0104\% & 0,000009 & 0,000002 & 0,00308 \\ -Int32 GxHash0(64) & 0,0106\% & 0,000009 & 0,0000019 & 0,00225 \\ -Int32 GxHash0(1000) & 0,0104\% & 0,000009 & 0,000002 & 0,00283 \\ -Int32 HighwayHash(4) & 0,0117\% & 0,00112 & 0,000002 & 0,00011 \\ -Int32 HighwayHash(64) & 0,0104\% & 0,001204 & 0,000002 & 0,00044 \\ -Int32 HighwayHash(1000) & 0,0112\% & 0,001188 & 0,000002 & 0,00131 \\ -Int32 T1ha(4) & 0,012\% & 0,000746 & 0,000002 & 0,00076 \\ -Int32 T1ha(64) & 0,0125\% & 0,000987 & 0,000002 & 0,00071 \\ -Int32 T1ha(1000) & 0,0113\% & 0,000944 & 0,000002 & 0,00003 \\ +UInt32 GxHash0(4) & 0,0104\% & 0,000009 & 0,000002 & 0,00308 \\ +UInt32 GxHash0(64) & 0,0106\% & 0,000009 & 0,0000019 & 0,00225 \\ +UInt32 GxHash0(1000) & 0,0104\% & 0,000009 & 0,000002 & 0,00283 \\ +UInt32 HighwayHash(4) & 0,0117\% & 0,00112 & 0,000002 & 0,00011 \\ +UInt32 HighwayHash(64) & 0,0104\% & 0,001204 & 0,000002 & 0,00044 \\ +UInt32 HighwayHash(1000) & 0,0112\% & 0,001188 & 0,000002 & 0,00131 \\ +UInt32 T1ha(4) & 0,012\% & 0,000746 & 0,000002 & 0,00076 \\ +UInt32 T1ha(64) & 0,0125\% & 0,000987 & 0,000002 & 0,00071 \\ +UInt32 T1ha(1000) & 0,0113\% & 0,000944 & 0,000002 & 0,00003 \\ UInt32 XxHash(4) & 0\% & 0,000933 & 0,000002 & 0,00018 \\ UInt32 XxHash(64) & 0\% & 0,000907 & 0,000002 & 0,00046 \\ UInt32 XxHash(1000) & 0\% & 0,001081 & 0,000002 & 0,0007 \\ -UInt32 Fnv1a(4) & 0\% & 0,00009 & 0,0000017 & 0,18255 \\ -UInt32 Fnv1a(64) & 0\% & 0,000064 & 0,0000022 & 0,08281 \\ -UInt32 Fnv1a(1000) & 0\% & 0,000042 & 0,0000018 & 0,08416 \\ -UInt32 Crc(4) & 0\% & 0,000003 & 0,000002 & 0,11729 \\ -UInt32 Crc(64) & 0\% & 0,000003 & 0,0000004 & 0,02542 \\ +UInt32 Fnv1a(4) & 0\% & 0,00009 & 0,0000017 & \textcolor{red}{0,18255} \\ +UInt32 Fnv1a(64) & 0\% & 0,000064 & 0,0000022 & \textcolor{orange}{0,08281} \\ +UInt32 Fnv1a(1000) & 0\% & 0,000042 & 0,0000018 & \textcolor{orange}{0,08416} \\ +UInt32 Crc(4) & 0\% & 0,000003 & 0,000002 & \textcolor{red}{0,11729} \\ +UInt32 Crc(64) & 0\% & 0,000003 & 0,0000004 & \textcolor{orange}{0,02542} \\ UInt32 Crc(1000) & 0\% & 0,00001 & 0,0000004 & 0,0046 \\ \hline \end{tabular} @@ -582,9 +608,9 @@ \subsubsection{Quality Results} UInt32 T1ha(1000) & 0,0123\% & 0,001175 & 0,000002 & 0,00135 \\ UInt32 XxHash(64) & 0,0106\% & 0,000766 & 0,000002 & 0,00046 \\ UInt32 XxHash(1000) & 0,01\% & 0,000892 & 0,000002 & 0,00021 \\ -UInt32 Fnv1a(64) & 0,0122\% & 0,000998 & 0,000002 & 0,08585 \\ -UInt32 Fnv1a(1000) & 0,0127\% & 0,000993 & 0,000002 & 0,08143 \\ -UInt32 Crc(64) & 0,0124\% & 0,000965 & 0,000002 & 0,02467 \\ +UInt32 Fnv1a(64) & 0,0122\% & 0,000998 & 0,000002 & \textcolor{orange}{0,08585} \\ +UInt32 Fnv1a(1000) & 0,0127\% & 0,000993 & 0,000002 & \textcolor{orange}{0,08143} \\ +UInt32 Crc(64) & 0,0124\% & 0,000965 & 0,000002 & \textcolor{orange}{0,02467} \\ UInt32 Crc(1000) & 0,0123\% & 0,000708 & 0,000002 & 0,00499 \\ \hline \end{tabular} @@ -607,14 +633,10 @@ \subsection{Performance} \begin{figure}[H] \centering \includegraphics[width=1\textwidth]{throughput.png} +\caption{Gibibytes of data hashed per second (throughput) per input size} \label{fig:benchmark-throughput} \end{figure} -\begin{figure}[H] -\centering -\includegraphics[width=1\textwidth]{throughput-gcp.png} -\label{fig:benchmark-throughput-gcp} -\end{figure} \section{Discussion} \subsection{Implications} diff --git a/article/references.bib b/article/references.bib index b1ade3b..0f9607c 100644 --- a/article/references.bib +++ b/article/references.bib @@ -27,6 +27,15 @@ @article{merkle_damgard_alternatives_review doi = {10.31341/jios.41.2.9}, } +@article{highwayhash, + author = {J. Alakuijala, B.Cox and J.Wassenberg}, + title = {HighwayHash, Fast keyed hash/pseudo-random function using SIMD multiply and permute}, + journal = {Google Research}, + url = {https://github.com/google/highwayhash}, + year = {2017}, + doi = {arXiv:1612.06257}, +} + @software{rust-t1ha, author = {Flier Lu}, title = {github.com/flier/rust-t1ha}, diff --git a/article/throughput-arm.png b/article/throughput-arm.png new file mode 100644 index 0000000..4c505f0 Binary files /dev/null and b/article/throughput-arm.png differ diff --git a/article/throughput-gcp.png b/article/throughput-gcp.png deleted file mode 100644 index 57db96b..0000000 Binary files a/article/throughput-gcp.png and /dev/null differ diff --git a/article/throughput-x86-gcp.png b/article/throughput-x86-gcp.png new file mode 100644 index 0000000..2f12fa5 Binary files /dev/null and b/article/throughput-x86-gcp.png differ diff --git a/article/throughput-x86.png b/article/throughput-x86.png new file mode 100644 index 0000000..05a1799 Binary files /dev/null and b/article/throughput-x86.png differ diff --git a/article/throughput.png b/article/throughput.png index 0ad8064..dd2a6d0 100644 Binary files a/article/throughput.png and b/article/throughput.png differ diff --git a/src/gxhash.rs b/src/gxhash.rs index 04e31ac..9456b9d 100644 --- a/src/gxhash.rs +++ b/src/gxhash.rs @@ -42,20 +42,47 @@ mod platform_defs { #[inline] pub unsafe fn compress(a: state, b: state) -> state { - let sum: state = vaddq_s8(a, b); - vextq_s8(sum, sum, 1) + ReinterpretUnion{ uint8: aes_encrypt_last( + ReinterpretUnion{ int8: a }.uint8, + ReinterpretUnion{ int8: b }.uint8) }.int8 + } + + #[inline] + // See https://blog.michaelbrase.com/2018/05/08/emulating-x86-aes-intrinsics-on-armv8-a + unsafe fn aes_encrypt(data: uint8x16_t, keys: uint8x16_t) -> uint8x16_t { + // Encrypt + let encrypted = vaeseq_u8(data, vdupq_n_u8(0)); + // Mix columns + let mixed = vaesmcq_u8(encrypted); + // Xor keys + veorq_u8(mixed, keys) + } + + #[inline] + // See https://blog.michaelbrase.com/2018/05/08/emulating-x86-aes-intrinsics-on-armv8-a + unsafe fn aes_encrypt_last(data: uint8x16_t, keys: uint8x16_t) -> uint8x16_t { + // Encrypt + let encrypted = vaeseq_u8(data, vdupq_n_u8(0)); + // Xor keys + veorq_u8(encrypted, keys) } #[inline] pub unsafe fn finalize(hash: state) -> u32 { - let salt = vcombine_s64(vcreate_s64(4860325414534694371), vcreate_s64(8120763769363581797)); - let keys = vmulq_s32( - ReinterpretUnion { int64: salt }.int32, - ReinterpretUnion { int8: hash }.int32); - let a = vaeseq_u8(ReinterpretUnion { int8: hash }.uint8, vdupq_n_u8(0)); - let b = vaesmcq_u8(a); - let c = veorq_u8(b, ReinterpretUnion{ int32: keys }.uint8); - let p = &ReinterpretUnion{ uint8: c }.int8 as *const state as *const u32; + // Hardcoded AES keys + let salt1 = vld1q_u32([0x713B01D0, 0x8F2F35DB, 0xAF163956, 0x85459F85].as_ptr()); + let salt2 = vld1q_u32([0x1DE09647, 0x92CFA39C, 0x3DD99ACA, 0xB89C054F].as_ptr()); + let salt3 = vld1q_u32([0xC78B122B, 0x5544B1B7, 0x689D2B7D, 0xD0012E32].as_ptr()); + + // 3 rounds of AES + let mut hash = ReinterpretUnion{ int8: hash }.uint8; + hash = aes_encrypt(hash, ReinterpretUnion{ uint32: salt1 }.uint8); + hash = aes_encrypt(hash, ReinterpretUnion{ uint32: salt2 }.uint8); + hash = aes_encrypt_last(hash, ReinterpretUnion{ uint32: salt3 }.uint8); + let hash = ReinterpretUnion{ uint8: hash }.int8; + + // Truncate to output hash size + let p = &hash as *const state as *const u32; *p } }