From b285c0bff6d0142a604f61a14955658fca0a35b0 Mon Sep 17 00:00:00 2001 From: Trent Nelson Date: Wed, 19 Feb 2020 09:15:22 -0800 Subject: [PATCH] Improve usage messages. --- include/PerfectHash.h | 17 --- include/PerfectHashErrors.h | 106 ++++++------------ src/PerfectHash/PerfectHashErrors.mc | 106 ++++++------------ src/PerfectHash/PerfectHashErrors_English.bin | Bin 85724 -> 81928 bytes 4 files changed, 74 insertions(+), 155 deletions(-) diff --git a/include/PerfectHash.h b/include/PerfectHash.h index 47a95f52..cf8b5a70 100644 --- a/include/PerfectHash.h +++ b/include/PerfectHash.h @@ -2713,23 +2713,6 @@ IsValidPerfectHashTableCreateParameterId( // registering the best graph, which should give more clarity to the // role of the X-macro. // -// N.B. The following coverage types are intended to generate worst-case hash -// tables compared to their best-case counterparts. They can be useful -// during development and performance testing to assess the performance -// benefit, if any, of things like reduced cache lines, etc. The types -// follow: -// -// LowestMaxGraphTraversalDepth -// LowestTotalGraphTraversals -// LowestNumberOfEmptyPages -// LowestNumberOfEmptyLargePages -// LowestNumberOfEmptyCacheLines -// LowestNumberOfEmptyPagesUsedByKeysSubset -// LowestNumberOfEmptyLargePagesUsedByKeysSubset -// LowestNumberOfEmptyCacheLinesUsedByKeysSubset -// LowestMaxAssignedPerCacheLineCount -// LowestMaxAssignedPerCacheLineCountForKeysSubset -// #define BEST_COVERAGE_TYPE_TABLE(FIRST_ENTRY, ENTRY, LAST_ENTRY) \ FIRST_ENTRY(NumberOfEmptyPages, Highest, >) \ diff --git a/include/PerfectHashErrors.h b/include/PerfectHashErrors.h index 4f50c3ee..b08716b6 100644 --- a/include/PerfectHashErrors.h +++ b/include/PerfectHashErrors.h @@ -255,28 +255,6 @@ Module Name: // 25 Multiply (2) // 26 MultiplyXor (4) // -// N.B. The lowest latency hash functions with good solving ability, in order of -// ascending latency, are: Crc32RotateX, Crc32RotateXY, Crc32RotateWXYZ. -// You should try these hash functions first and see if a solution can be -// found without a table resize occurring. Failing that, the Jenkins routine -// has been observed to be the least likely to require a table resize on a -// given key set -- however, it does have the highest latency of all the -// hash functions above (anywhere from 7x-10x the latency of Crc32RotateX). -// -// (The difference in latency between the X, XY and WXYZ functions is minimal; -// only a few cycles.) -// -// N.B. The three most recent hash functions are now exhibiting latency on-par with -// the Crc32Rotate functions, but with the added benefit of requiring no table -// resize events on the https://github.com/tpn/perfecthash-keys/sys32 input -// set of keys. That is, these routines should be tried in this order and -// compared against the Crc32Rotate rouintes: -// -// ShiftMultiplyXorShift -// RotateMultiplyXorRotate -// ShiftMultiplyXorShift2 -// RotateMultiplyXorRotate2 -// // Mask Functions: // // ID | Name @@ -562,18 +540,31 @@ Module Name: // // Valid coverage types: // +// HighestNumberOfEmptyPages +// HighestNumberOfEmptyLargePages // HighestNumberOfEmptyCacheLines +// HighestMaxGraphTraversalDepth +// HighestTotalGraphTraversals +// HighestMaxAssignedPerCacheLineCount // -// This predicate is based on the notion that a high number of -// empty cache lines implies a lower number of cache lines are -// required for the table data, which means better clustering of -// table data, which could result in fewer cache misses, which -// would yield greater performance. +// LowestNumberOfEmptyPages +// LowestNumberOfEmptyLargePages +// LowestNumberOfEmptyCacheLines +// LowestMaxGraphTraversalDepth +// LowestTotalGraphTraversals +// LowestMaxAssignedPerCacheLineCount // -// HighestNumberOfEmptyPages -// HighestNumberOfEmptyLargePages +// The following predicates must be used in conjunction with --KeysSubset: // -// As above, but for pages and large pages, respectively. +// HighestMaxAssignedPerCacheLineCountForKeysSubset +// HighestNumberOfPagesUsedByKeysSubset +// HighestNumberOfLargePagesUsedByKeysSubset +// HighestNumberOfCacheLinesUsedByKeysSubset +// +// LowestMaxAssignedPerCacheLineCountForKeysSubset +// LowestNumberOfPagesUsedByKeysSubset +// LowestNumberOfLargePagesUsedByKeysSubset +// LowestNumberOfCacheLinesUsedByKeysSubset // // Console Output Character Legend // @@ -894,54 +885,31 @@ Module Name: // // Valid coverage types: // -// HighestNumberOfEmptyCacheLines -// -// This predicate is based on the notion that a high number of -// empty cache lines implies a lower number of cache lines are -// required for the table data, which means better clustering of -// table data, which could result in fewer cache misses, which -// would yield greater performance. -// // HighestNumberOfEmptyPages // HighestNumberOfEmptyLargePages -// -// As above, but for pages and large pages, respectively. -// -// HighestMaxAssignedPerCacheLineCount -// -// A histogram is maintained of the number of assigned values per -// cache line; this predicate selects the graph with the highest -// histogram count (cache line occupancy) for a given graph. -// +// HighestNumberOfEmptyCacheLines // HighestMaxGraphTraversalDepth +// HighestTotalGraphTraversals +// HighestMaxAssignedPerCacheLineCount // -// This predicate selects the graph with the highest recursive -// traversal depth encountered during the graph assignment stage. -// A high value for this metric is indicative of clustering of -// vertices for one half of an assigned table lookup (and thus, -// may result in a solution with better cache behavior). +// LowestNumberOfEmptyPages +// LowestNumberOfEmptyLargePages +// LowestNumberOfEmptyCacheLines +// LowestMaxGraphTraversalDepth +// LowestTotalGraphTraversals +// LowestMaxAssignedPerCacheLineCount // -// N.B. The following predicates must be used in conjunction with -// --KeysSubset. +// The following predicates must be used in conjunction with --KeysSubset: // -// LowestNumberOfCacheLinesUsedByKeysSubset -// -// This predicate is used to to search for solutions where the -// most frequent keys consume the lowest number of cache lines. -// It is useful in scenarios where the frequency of individual -// keys being looked up is heavily skewed toward a small subset. -// For example, if 90%% of the lookups occur for 10% of the keys, -// the fewer cache lines occupied by those keys, the better. +// HighestMaxAssignedPerCacheLineCountForKeysSubset +// HighestNumberOfPagesUsedByKeysSubset +// HighestNumberOfLargePagesUsedByKeysSubset +// HighestNumberOfCacheLinesUsedByKeysSubset // +// LowestMaxAssignedPerCacheLineCountForKeysSubset // LowestNumberOfPagesUsedByKeysSubset // LowestNumberOfLargePagesUsedByKeysSubset -// -// As above, but for pages and large pages, respectively. -// -// HighestMaxAssignedPerCacheLineCountForKeysSubset -// -// Like HighestMaxAssignedPerCacheLineCount, but for a subset of -// keys. +// LowestNumberOfCacheLinesUsedByKeysSubset // // --KeysSubset=N,N+1[,N+2,N+3,...] (e.g. --KeysSubset=10,50,123,600,670) // diff --git a/src/PerfectHash/PerfectHashErrors.mc b/src/PerfectHash/PerfectHashErrors.mc index 6238e99d..c67c523b 100644 --- a/src/PerfectHash/PerfectHashErrors.mc +++ b/src/PerfectHash/PerfectHashErrors.mc @@ -201,28 +201,6 @@ Hash Functions: 25 Multiply (2) 26 MultiplyXor (4) -N.B. The lowest latency hash functions with good solving ability, in order of - ascending latency, are: Crc32RotateX, Crc32RotateXY, Crc32RotateWXYZ. - You should try these hash functions first and see if a solution can be - found without a table resize occurring. Failing that, the Jenkins routine - has been observed to be the least likely to require a table resize on a - given key set -- however, it does have the highest latency of all the - hash functions above (anywhere from 7x-10x the latency of Crc32RotateX). - - (The difference in latency between the X, XY and WXYZ functions is minimal; - only a few cycles.) - -N.B. The three most recent hash functions are now exhibiting latency on-par with - the Crc32Rotate functions, but with the added benefit of requiring no table - resize events on the https://github.com/tpn/perfecthash-keys/sys32 input - set of keys. That is, these routines should be tried in this order and - compared against the Crc32Rotate rouintes: - - ShiftMultiplyXorShift - RotateMultiplyXorRotate - ShiftMultiplyXorShift2 - RotateMultiplyXorRotate2 - Mask Functions: ID | Name @@ -507,18 +485,31 @@ Table Create Parameters: Valid coverage types: + HighestNumberOfEmptyPages + HighestNumberOfEmptyLargePages HighestNumberOfEmptyCacheLines + HighestMaxGraphTraversalDepth + HighestTotalGraphTraversals + HighestMaxAssignedPerCacheLineCount - This predicate is based on the notion that a high number of - empty cache lines implies a lower number of cache lines are - required for the table data, which means better clustering of - table data, which could result in fewer cache misses, which - would yield greater performance. + LowestNumberOfEmptyPages + LowestNumberOfEmptyLargePages + LowestNumberOfEmptyCacheLines + LowestMaxGraphTraversalDepth + LowestTotalGraphTraversals + LowestMaxAssignedPerCacheLineCount - HighestNumberOfEmptyPages - HighestNumberOfEmptyLargePages + The following predicates must be used in conjunction with --KeysSubset: - As above, but for pages and large pages, respectively. + HighestMaxAssignedPerCacheLineCountForKeysSubset + HighestNumberOfPagesUsedByKeysSubset + HighestNumberOfLargePagesUsedByKeysSubset + HighestNumberOfCacheLinesUsedByKeysSubset + + LowestMaxAssignedPerCacheLineCountForKeysSubset + LowestNumberOfPagesUsedByKeysSubset + LowestNumberOfLargePagesUsedByKeysSubset + LowestNumberOfCacheLinesUsedByKeysSubset Console Output Character Legend @@ -837,54 +828,31 @@ Table Create Parameters: Valid coverage types: - HighestNumberOfEmptyCacheLines - - This predicate is based on the notion that a high number of - empty cache lines implies a lower number of cache lines are - required for the table data, which means better clustering of - table data, which could result in fewer cache misses, which - would yield greater performance. - HighestNumberOfEmptyPages HighestNumberOfEmptyLargePages - - As above, but for pages and large pages, respectively. - - HighestMaxAssignedPerCacheLineCount - - A histogram is maintained of the number of assigned values per - cache line; this predicate selects the graph with the highest - histogram count (cache line occupancy) for a given graph. - + HighestNumberOfEmptyCacheLines HighestMaxGraphTraversalDepth + HighestTotalGraphTraversals + HighestMaxAssignedPerCacheLineCount - This predicate selects the graph with the highest recursive - traversal depth encountered during the graph assignment stage. - A high value for this metric is indicative of clustering of - vertices for one half of an assigned table lookup (and thus, - may result in a solution with better cache behavior). + LowestNumberOfEmptyPages + LowestNumberOfEmptyLargePages + LowestNumberOfEmptyCacheLines + LowestMaxGraphTraversalDepth + LowestTotalGraphTraversals + LowestMaxAssignedPerCacheLineCount - N.B. The following predicates must be used in conjunction with - --KeysSubset. + The following predicates must be used in conjunction with --KeysSubset: - LowestNumberOfCacheLinesUsedByKeysSubset - - This predicate is used to to search for solutions where the - most frequent keys consume the lowest number of cache lines. - It is useful in scenarios where the frequency of individual - keys being looked up is heavily skewed toward a small subset. - For example, if 90%% of the lookups occur for 10% of the keys, - the fewer cache lines occupied by those keys, the better. + HighestMaxAssignedPerCacheLineCountForKeysSubset + HighestNumberOfPagesUsedByKeysSubset + HighestNumberOfLargePagesUsedByKeysSubset + HighestNumberOfCacheLinesUsedByKeysSubset + LowestMaxAssignedPerCacheLineCountForKeysSubset LowestNumberOfPagesUsedByKeysSubset LowestNumberOfLargePagesUsedByKeysSubset - - As above, but for pages and large pages, respectively. - - HighestMaxAssignedPerCacheLineCountForKeysSubset - - Like HighestMaxAssignedPerCacheLineCount, but for a subset of - keys. + LowestNumberOfCacheLinesUsedByKeysSubset --KeysSubset=N,N+1[,N+2,N+3,...] (e.g. --KeysSubset=10,50,123,600,670) diff --git a/src/PerfectHash/PerfectHashErrors_English.bin b/src/PerfectHash/PerfectHashErrors_English.bin index 8274a1b6462866533633887fb90d7c3ed3ba1955..d0bf8ab7e47b2aadf649f20a2ab6a4e3dfe9be4f 100644 GIT binary patch delta 676 zcmcaJm$jpTb%H_anR^Tj!b~g=Qw9b_Aguw!0k0Ssw1IR#5IekOV3-7?=L0bh zKO+MJGs^=BAQli|WRL>V(m-4y%g7)Hq?LeJMvjp|1xU96@ez3-dvY(M_{KFNY|KYk z7$-Y632(l}#>X=Gm+(Ty{LS-3oT4WS6sJv=D|VRtAYWp#mbt*>4f$e|3tqELwtJ{G zIipBo@`YD?oFNSP3?&SS3^|i+AIeVtG+PcR>NZ(rlEh>OeU8azUdl{9bJqY&&$-1l zIj&d&Xrcs^uLL#~qK;>>#X|$Isb^mDU@;Xa3bfk-Z1ROfj>#HZMJ6wK5;mFdjnrfn zeI=l+DU&N6fZa5?=a#_awwG=o_kr0jURt2Y)WhzOUJWoVz^PR?15hFZ6Ts8~5Ig9}V?z_UDNKQIF(PCE20yeN-NkiXX}gUj#A#yC zKZMH*Y{94~ghWChIQ;`AAU_NVCNMKa1p*j;N-_+8Of>O%&gs`~V;KEuVw%%)zSHx) z=Xu}vc`tqc?X=q;rX6Sq9Dk@(UP|h{5{%emrAjf^U{w6B)B?==F`R!WbqMpv825^8 zO4(EI6<|Clv#Gh53o+K#+SEMEWfc=XoV*1I4IiNzSRSgVX^4N+8quB0Oo!IWcsvXCB)K09Tn1iYh zzl5;sRX*j%I;c+$s9Nms7cWjla6W)_Wa$seBe2~+sFWi}s6ojk`&{oTDdyC@iS+P}ZQ!Q#cbeb%<1()^WnkZm%71W?(kTZZ?Dnp$ETE}YY*oONf z*rFMHKyGvl;21S&mIUKNx57fafyZ7V*#kQ#~WM#Fp6 z8b~mfGL|D4YD}9MU|zLe+oVgsM|2T=S4=+#CaOAQOS8GMdSHBLgJXRvAQz8%76kD` z;~KAasxCYSBc_gI-mWpgI}Wh7ZkTC~ka9mQI!7$BCE@44zEh zwM~18luNS2q9Bp>;&dzC>;#ICiVosfr?yi!<`70V6b!0m6K#w$1RaX|$A>3^?8l5K z#Go&uVEBCf7J2?;9Mk$#iz;%rBlsZEYmK4u#iGx|aCB7p4G)6b&1~V7QSTObMiSf{oZW zVF7e9`bi;}Su|*vnPgZD?}nk{Qne7Db}g#|_xWL^R>DCohao$nElS6bugnQ` zIwA(JUWmc@S+xjAczc-0Cwho(WOyY3Go1L6iRfl6mcP>3$vEaaQ)8D7n4FYo5;BZg z!WhY9PJ%V$$w(wo*>}n!sQ4l>CfZ`8-EI+ZM?!$7#D-)g{~aR7w4wq7zp$cgXnn1#)buQ*Nxvmhoi{**xa7 z1B2W>T5P97?pL@nNzizZ%R8wqd3>bk&}PSqZrCjbqmZ>e46D8nl*tuje;B-?nudqT z`V)aDcj43C)SMfx9?Gq?70Ib@vgO8~ju&WW=7~;y8EMayhp5B8iChw4{0r>nJ`{Xp#iUsj95(w`2gr zfL7a-p>|DpQ{^IZe!oYW7v#!;wvQ~^S2jW3#LP(9I-oGK?og|++&_~iS90@<`71*x zs>yEh50f$ze*MU2p>tU&zjlpm-!s1Le+MV%@*i@rCg-#cmPb(+h%-;#=vpkZZ+c1T z%qGymq>h-J-^OMesfop7D;A3RoXmL|EN|x}q9sv=!6HR38(s zA1C<^{{i%%8umg%+Mn-`2lOkGX025T#Zy(BMlKC7CEC~ydtfbUIW^tJ6Qqom=dlfw z18WQ9^qWChe%2|k4bPU8pi{oPR3efW+YlhxxcBJJXn|PgV;iv4ujnXd1e)G!#);`UR^+p-j-eB|BP6hT+{I~ zN{_d!(-J4*W+O=6FzSffv|vQe#*nE!ffj4pkT*_7iQZ3|N{Z!hfhotw&N;eiXI;K< z+VY#RlJTCwKxt$!|75TpN42)p#rTz7AF=R>#XJhl$r~c39n61;7>ge5;WtL_*k8srHh4*xTvoczm#RMmW_O+ta#wIgJsMy5_YBso>pqvH&~s7 zB`u6Sqm^H4%G+sQTGR3{i=%}q!Bg4w9`hy|2{179^BM`?1`F@ZlH-R9{$26?N4a^6 nC41&8Kn!Fu`qR2{BAZ&&jX#n;V4ITNlr3YW`6DauT=oA2jq$5c