From 7363d436083eed87d704a9e45365d58ccb733011 Mon Sep 17 00:00:00 2001 From: Thibault Charbonnier Date: Tue, 8 Oct 2019 17:51:02 -0700 Subject: [PATCH 1/7] tests: x64: fixed a few minor issues with the x64 test suite. * Added missing targets to .PHONY * Made the unit_test.sh script executable * Fixed minor styling issues --- src/x64/test/Makefile | 4 ++-- src/x64/test/test.cpp | 4 ++-- src/x64/test/test_str_comp.lua | 1 - src/x64/test/unit_test.sh | 0 4 files changed, 4 insertions(+), 5 deletions(-) mode change 100644 => 100755 src/x64/test/unit_test.sh diff --git a/src/x64/test/Makefile b/src/x64/test/Makefile index 3ec44eae8..ae3cdbbbd 100644 --- a/src/x64/test/Makefile +++ b/src/x64/test/Makefile @@ -1,4 +1,4 @@ -.PHONY: default test benchmark +.PHONY: default test benchmark clean default: test benchmark @@ -33,7 +33,7 @@ benchmark: $(BENCHMARK_PROGRAM) # micro benchmark ./$(BENCHMARK_PROGRAM) -$(TEST_PROGRAM) : $(TEST_PROGRAM_OBJ) +$(TEST_PROGRAM): $(TEST_PROGRAM_OBJ) cat $(TEST_PROGRAM_OBJ:.o=.d) > dep1.txt $(CXX) $+ $(CXXFLAGS) -lm -o $@ diff --git a/src/x64/test/test.cpp b/src/x64/test/test.cpp index bc92acbbf..a92dc4054 100644 --- a/src/x64/test/test.cpp +++ b/src/x64/test/test.cpp @@ -11,7 +11,7 @@ static bool smoke_test() { fprintf(stdout, "running smoke tests...\n"); - char buf[1024]; + char buf[1024]; char c = getpid() % 'a'; for (int i = 0; i < (int)sizeof(buf); i++) { @@ -34,7 +34,7 @@ verify_log2() fprintf(stdout, "verify log2...\n"); bool err = false; std::map lm; - lm[0] =(uint32_t)-1; + lm[0] = (uint32_t) -1; lm[1] = 0; lm[2] = 1; for (int i = 2; i < 31; i++) { diff --git a/src/x64/test/test_str_comp.lua b/src/x64/test/test_str_comp.lua index 3a5c3e676..f1d79cfb3 100644 --- a/src/x64/test/test_str_comp.lua +++ b/src/x64/test/test_str_comp.lua @@ -18,7 +18,6 @@ ffi.cdef[[ long random(void); ]] - local function test_equal(len_min, len_max) -- source string is wrapped by 16-byte-junk both before and after the -- string diff --git a/src/x64/test/unit_test.sh b/src/x64/test/unit_test.sh old mode 100644 new mode 100755 From af84811cc8f9e1d6d494e3f5ece0df47e1aecc53 Mon Sep 17 00:00:00 2001 From: Thibault Charbonnier Date: Tue, 8 Oct 2019 17:51:15 -0700 Subject: [PATCH 2/7] feature: x86: Runtime detection of SSE4.2 support when compiled with '-msse4.2'. Co-authored-by: iSage Co-authored-by: Siddhesh Poyarekar --- src/Makefile | 6 +- src/lj_arch.h | 9 + src/lj_obj.h | 8 + src/lj_state.c | 3 + src/lj_str.c | 50 +---- src/lj_str.h | 3 +- .../src/lj_str_hash_x64.h => lj_str_hash.c} | 193 +++++++++++------- src/lj_str_hash.h | 13 ++ src/x64/test/Makefile | 7 +- src/x64/test/benchmark.cxx | 43 +--- src/x64/test/test.cpp | 11 +- 11 files changed, 185 insertions(+), 161 deletions(-) rename src/{x64/src/lj_str_hash_x64.h => lj_str_hash.c} (75%) create mode 100644 src/lj_str_hash.h diff --git a/src/Makefile b/src/Makefile index f7f3025f9..3fe0abb13 100644 --- a/src/Makefile +++ b/src/Makefile @@ -490,9 +490,9 @@ LJLIB_O= lib_base.o lib_math.o lib_bit.o lib_string.o lib_table.o \ LJLIB_C= $(LJLIB_O:.o=.c) LJCORE_O= lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \ - lj_str.o lj_tab.o lj_func.o lj_udata.o lj_meta.o lj_debug.o \ - lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o lj_strscan.o \ - lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \ + lj_str.o lj_str_hash.o lj_tab.o lj_func.o lj_udata.o lj_meta.o \ + lj_debug.o lj_state.o lj_dispatch.o lj_vmevent.o lj_vmmath.o \ + lj_strscan.o lj_strfmt.o lj_strfmt_num.o lj_api.o lj_profile.o \ lj_lex.o lj_parse.o lj_bcread.o lj_bcwrite.o lj_load.o \ lj_ir.o lj_opt_mem.o lj_opt_fold.o lj_opt_narrow.o \ lj_opt_dce.o lj_opt_loop.o lj_opt_split.o lj_opt_sink.o \ diff --git a/src/lj_arch.h b/src/lj_arch.h index 0232ebe6b..bde8b96f3 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -615,4 +615,13 @@ extern void *LJ_WIN_LOADLIBA(const char *path); #define LJ_52 0 #endif +/* Optimized string hashing, added by OpenResty. */ +#if LUAJIT_TARGET == LUAJIT_ARCH_X64 && defined(__GNUC__) && defined(__SSE4_2__) +#ifndef LJ_OR_DISABLE_STRHASHCRC32 +#define LJ_OR_STRHASHCRC32 1 +#endif +#else +#define LJ_OR_STRHASHCRC32 0 +#endif + #endif diff --git a/src/lj_obj.h b/src/lj_obj.h index 6c60ebdd5..aa9177fbe 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -160,6 +160,11 @@ typedef struct SBuf { MRef L; /* lua_State, used for buffer resizing. */ } SBuf; +#if LJ_OR_STRHASHCRC32 +/* String hashing functions, added by OpenResty. */ +typedef MSize (*StrHashFunction)(const char *, size_t); +#endif + /* -- Tags and values ----------------------------------------------------- */ /* Frame link. */ @@ -622,6 +627,9 @@ typedef struct global_State { MRef saved_jit_base; /* saved jit_base for lj_err_throw */ MRef ctype_state; /* Pointer to C type state. */ GCRef gcroot[GCROOT_MAX]; /* GC roots. */ +#if LJ_OR_STRHASHCRC32 + StrHashFunction strhashfn; /* String hashing function, added by OpenResty */ +#endif } global_State; #define mainthread(g) (&gcref(g->mainthref)->th) diff --git a/src/lj_state.c b/src/lj_state.c index a0fba2aca..511851f2e 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -204,6 +204,9 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) setgcref(g->uvhead.prev, obj2gco(&g->uvhead)); setgcref(g->uvhead.next, obj2gco(&g->uvhead)); g->strmask = ~(MSize)0; +#if LJ_OR_STRHASHCRC32 + lj_init_strhashfn(g); +#endif setnilV(registry(L)); setnilV(&g->nilnode.val); setnilV(&g->nilnode.key); diff --git a/src/lj_str.c b/src/lj_str.c index ba5edda5f..a91b7a726 100644 --- a/src/lj_str.c +++ b/src/lj_str.c @@ -130,49 +130,6 @@ void lj_str_resize(lua_State *L, MSize newmask) g->strhash = newhash; } -static MSize -lj_str_original_hash(const char *str, size_t lenx) -{ - MSize len = (MSize)lenx; - MSize a, b, h = len; - - /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */ - if (len >= 4) { /* Caveat: unaligned access! */ - a = lj_getu32(str); - h ^= lj_getu32(str+len-4); - b = lj_getu32(str+(len>>1)-2); - h ^= b; h -= lj_rol(b, 14); - b += lj_getu32(str+(len>>2)-1); - } else if (len > 0) { - a = *(const uint8_t *)str; - h ^= *(const uint8_t *)(str+len-1); - b = *(const uint8_t *)(str+(len>>1)); - h ^= b; h -= lj_rol(b, 14); - } else { - return 0; - } - - a ^= h; a -= lj_rol(h, 11); - b ^= a; b -= lj_rol(a, 25); - h ^= b; h -= lj_rol(b, 16); - - return h; -} - -MSize -lj_str_indep_hash(GCstr *str) -{ - return lj_str_original_hash(strdata(str), str->len); -} - -#include "x64/src/lj_str_hash_x64.h" - -#if defined(LJ_ARCH_STR_HASH) -#define LJ_STR_HASH LJ_ARCH_STR_HASH -#else -#define LJ_STR_HASH lj_str_original_hash -#endif - /* Intern a string and return string object. */ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) { @@ -189,7 +146,12 @@ GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) return &g->strempty; } - h = LJ_STR_HASH(str, lenx); +#if LJ_OR_STRHASHCRC32 + lua_assert(g->strhashfn != NULL); + h = g->strhashfn(str, lenx); +#else + h = lj_str_hash_orig(str, lenx); +#endif /* Check if the string has already been interned. */ o = gcref(g->strhash[h & g->strmask]); diff --git a/src/lj_str.h b/src/lj_str.h index 0e21432e3..697847575 100644 --- a/src/lj_str.h +++ b/src/lj_str.h @@ -9,6 +9,7 @@ #include #include "lj_obj.h" +#include "lj_str_hash.h" /* String helpers. */ LJ_FUNC int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b); @@ -24,6 +25,4 @@ LJ_FUNC void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s); #define lj_str_newz(L, s) (lj_str_new(L, s, strlen(s))) #define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1)) -MSize lj_str_indep_hash(GCstr *str); - #endif diff --git a/src/x64/src/lj_str_hash_x64.h b/src/lj_str_hash.c similarity index 75% rename from src/x64/src/lj_str_hash_x64.h rename to src/lj_str_hash.c index cf37a2d20..1ae1d30e5 100644 --- a/src/x64/src/lj_str_hash_x64.h +++ b/src/lj_str_hash.c @@ -5,39 +5,40 @@ * to 128 bytes of given string. */ -#ifndef _LJ_STR_HASH_X64_H_ -#define _LJ_STR_HASH_X64_H_ +#define lj_str_hash_c +#define LUA_CORE -#if defined(__SSE4_2__) && defined(__x86_64) && defined(__GNUC__) +#include "lj_str_hash.h" + +#if LJ_OR_STRHASHCRC32 -#include #include #include #include #include +#include "lj_vm.h" -#include "../../lj_def.h" - -#undef LJ_AINLINE -#define LJ_AINLINE +#ifndef F_CPU_SSE4_2 +#define F_CPU_SSE4_2 (1 << 20) +#endif #ifdef __MINGW32__ -#define random() ((long) rand()) -#define srandom(seed) srand(seed) +#define random() ((long) rand()) +#define srandom(seed) srand(seed) #endif -static const uint64_t* cast_uint64p(const char* str) +static LJ_AINLINE const uint64_t* cast_uint64p(const char* str) { return (const uint64_t*)(void*)str; } -static const uint32_t* cast_uint32p(const char* str) +static LJ_AINLINE const uint32_t* cast_uint32p(const char* str) { return (const uint32_t*)(void*)str; } /* hash string with len in [1, 4) */ -static LJ_AINLINE uint32_t lj_str_hash_1_4(const char* str, uint32_t len) +static LJ_NOINLINE uint32_t lj_str_hash_1_4(const char* str, uint32_t len) { #if 0 /* TODO: The if-1 part (i.e the original algorithm) is working better when @@ -66,7 +67,7 @@ static LJ_AINLINE uint32_t lj_str_hash_1_4(const char* str, uint32_t len) } /* hash string with len in [4, 16) */ -static LJ_AINLINE uint32_t lj_str_hash_4_16(const char* str, uint32_t len) +static LJ_NOINLINE uint32_t lj_str_hash_4_16(const char* str, uint32_t len) { uint64_t v1, v2, h; @@ -81,11 +82,12 @@ static LJ_AINLINE uint32_t lj_str_hash_4_16(const char* str, uint32_t len) h = _mm_crc32_u32(0, len); h = _mm_crc32_u64(h, v1); h = _mm_crc32_u64(h, v2); + return h; } /* hash string with length in [16, 128) */ -static uint32_t lj_str_hash_16_128(const char* str, uint32_t len) +static LJ_NOINLINE uint32_t lj_str_hash_16_128(const char* str, uint32_t len) { uint64_t h1, h2; uint32_t i; @@ -139,71 +141,20 @@ static LJ_AINLINE uint32_t log2_floor(uint32_t n) return 31; } -#define POW2_MASK(n) ((1L << (n)) - 1) - -/* This function is to populate `random_pos` such that random_pos[i][*] - * contains random value in the range of [2**i, 2**(i+1)). - */ -static void x64_init_random(void) -{ - int i, seed, rml; - - /* Calculate the ceil(log2(RAND_MAX)) */ - rml = log2_floor(RAND_MAX); - if (RAND_MAX & (RAND_MAX - 1)) { - rml += 1; - } - - /* Init seed */ - seed = _mm_crc32_u32(0, getpid()); - seed = _mm_crc32_u32(seed, time(NULL)); - srandom(seed); - - /* Now start to populate the random_pos[][]. */ - for (i = 0; i < 3; i++) { - /* No need to provide random value for chunk smaller than 8 bytes */ - random_pos[i][0] = random_pos[i][1] = 0; - } - - for (; i < rml; i++) { - random_pos[i][0] = random() & POW2_MASK(i+1); - random_pos[i][1] = random() & POW2_MASK(i+1); - } - - for (; i < 31; i++) { - int j; - for (j = 0; j < 2; j++) { - uint32_t v, scale; - scale = random_pos[i - rml][0]; - if (scale == 0) { - scale = 1; - } - v = (random() * scale) & POW2_MASK(i+1); - random_pos[i][j] = v; - } - } -} -#undef POW2_MASK - -void __attribute__((constructor)) x64_init_random_constructor() -{ - x64_init_random(); -} - /* Return a pre-computed random number in the range of [1**chunk_sz_order, * 1**(chunk_sz_order+1)). It is "unsafe" in the sense that the return value * may be greater than chunk-size; it is up to the caller to make sure * "chunk-base + return-value-of-this-func" has valid virtual address. */ static LJ_AINLINE uint32_t get_random_pos_unsafe(uint32_t chunk_sz_order, - uint32_t idx) + uint32_t idx) { uint32_t pos = random_pos[chunk_sz_order][idx & 1]; return pos; } static LJ_NOINLINE uint32_t lj_str_hash_128_above(const char* str, - uint32_t len) + uint32_t len) { uint32_t chunk_num, chunk_sz, chunk_sz_log2, i, pos1, pos2; uint64_t h1, h2, v; @@ -242,11 +193,12 @@ static LJ_NOINLINE uint32_t lj_str_hash_128_above(const char* str, h2 = _mm_crc32_u64(h2, *cast_uint64p(str + len - 8)); h1 = _mm_crc32_u32(h1, h2); + return h1; } /* NOTE: the "len" should not be zero */ -static LJ_AINLINE uint32_t lj_str_hash(const char* str, size_t len) +static MSize lj_str_hash_crc32(const char *str, size_t len) { if (len < 128) { if (len >= 16) { /* [16, 128) */ @@ -260,12 +212,107 @@ static LJ_AINLINE uint32_t lj_str_hash(const char* str, size_t len) /* [0, 4) */ return lj_str_hash_1_4(str, len); } + /* [128, inf) */ return lj_str_hash_128_above(str, len); } -#define LJ_ARCH_STR_HASH lj_str_hash -#else -#undef LJ_ARCH_STR_HASH +#define POW2_MASK(n) ((1L << (n)) - 1) + +/* This function is to populate `random_pos` such that random_pos[i][*] + * contains random value in the range of [2**i, 2**(i+1)). + */ +static void lj_str_hash_init_random(void) +{ + int i, seed, rml; + + /* Calculate the ceil(log2(RAND_MAX)) */ + rml = log2_floor(RAND_MAX); + if (RAND_MAX & (RAND_MAX - 1)) { + rml += 1; + } + + /* Init seed */ + seed = _mm_crc32_u32(0, getpid()); + seed = _mm_crc32_u32(seed, time(NULL)); + srandom(seed); + + /* Now start to populate the random_pos[][]. */ + for (i = 0; i < 3; i++) { + /* No need to provide random value for chunk smaller than 8 bytes */ + random_pos[i][0] = random_pos[i][1] = 0; + } + + for (; i < rml; i++) { + random_pos[i][0] = random() & POW2_MASK(i+1); + random_pos[i][1] = random() & POW2_MASK(i+1); + } + + for (; i < 31; i++) { + int j; + for (j = 0; j < 2; j++) { + uint32_t v, scale; + scale = random_pos[i - rml][0]; + if (scale == 0) { + scale = 1; + } + v = (random() * scale) & POW2_MASK(i+1); + random_pos[i][j] = v; + } + } +} + +#undef POW2_MASK + +LJ_FUNC unsigned char lj_check_crc32_support() +{ + uint32_t features[4]; + if (lj_vm_cpuid(1, features)) + return (features[2] & F_CPU_SSE4_2) != 0; + return 0; +} + +LJ_FUNC void lj_init_strhashfn(global_State *g) +{ + static StrHashFunction strhashfn; + if (strhashfn == NULL) { + if (lj_check_crc32_support()) { + lj_str_hash_init_random(); + strhashfn = lj_str_hash_crc32; + } else { + strhashfn = lj_str_hash_orig; + } + } + g->strhashfn = strhashfn; +} + #endif -#endif /*_LJ_STR_HASH_X64_H_*/ + +LJ_FUNC MSize lj_str_hash_orig(const char *str, size_t lenx) +{ + MSize len = (MSize)lenx; + MSize a, b, h = len; + + /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */ + if (len >= 4) { /* Caveat: unaligned access! */ + a = lj_getu32(str); + h ^= lj_getu32(str+len-4); + b = lj_getu32(str+(len>>1)-2); + h ^= b; h -= lj_rol(b, 14); + b += lj_getu32(str+(len>>2)-1); + } else if (len > 0) { + a = *(const uint8_t *)str; + h ^= *(const uint8_t *)(str+len-1); + b = *(const uint8_t *)(str+(len>>1)); + h ^= b; h -= lj_rol(b, 14); + } else { + return 0; + } + + a ^= h; a -= lj_rol(h, 11); + b ^= a; b -= lj_rol(a, 25); + h ^= b; h -= lj_rol(b, 16); + + return h; +} + diff --git a/src/lj_str_hash.h b/src/lj_str_hash.h new file mode 100644 index 000000000..fb9a16019 --- /dev/null +++ b/src/lj_str_hash.h @@ -0,0 +1,13 @@ +#ifndef _LJ_STR_HASH_H +#define _LJ_STR_HASH_H + +#include "lj_obj.h" + +LJ_FUNC MSize lj_str_hash_orig(const char *str, size_t lenx); + +#if LJ_OR_STRHASHCRC32 +LJ_FUNC unsigned char lj_check_crc32_support(); +LJ_FUNC void lj_init_strhashfn(global_State *g); +#endif + +#endif diff --git a/src/x64/test/Makefile b/src/x64/test/Makefile index ae3cdbbbd..df1d380a0 100644 --- a/src/x64/test/Makefile +++ b/src/x64/test/Makefile @@ -16,7 +16,8 @@ else VALGRIND := endif -CXXFLAGS := -O3 -MD -g -msse4.2 -Wall -I../src -I../../../src +CXXFLAGS := -O3 -MD -g -Wall -msse4.2 -I../.. +LDFLAGS := ../../libluajit.a -ldl -lm %.o: %.cxx $(CXX) $(CXXFLAGS) -MD -c $< @@ -35,11 +36,11 @@ benchmark: $(BENCHMARK_PROGRAM) $(TEST_PROGRAM): $(TEST_PROGRAM_OBJ) cat $(TEST_PROGRAM_OBJ:.o=.d) > dep1.txt - $(CXX) $+ $(CXXFLAGS) -lm -o $@ + $(CXX) $+ $(LDFLAGS) -o $@ $(BENCHMARK_PROGRAM): $(BENCHMARK_PROGRAM_OBJ) cat $(BENCHMARK_PROGRAM_OBJ:.o=.d) > dep2.txt - $(CXX) $+ $(CXXFLAGS) -o $@ + $(CXX) $+ $(LDFLAGS) -o $@ -include dep1.txt -include dep2.txt diff --git a/src/x64/test/benchmark.cxx b/src/x64/test/benchmark.cxx index e37edb032..b0006ea84 100644 --- a/src/x64/test/benchmark.cxx +++ b/src/x64/test/benchmark.cxx @@ -1,14 +1,15 @@ #include // for gettimeofday() -extern "C" { -#include "lj_str_hash_x64.h" -} #include #include #include #include -#include "test_util.hpp" #include #include +#include "test_util.hpp" + +extern "C" { +#include "lj_str_hash.c" +} using namespace std; @@ -17,32 +18,6 @@ using namespace std; const char* separator = "-------------------------------------------"; -static uint32_t LJ_AINLINE -lj_original_hash(const char *str, size_t len) -{ - uint32_t a, b, h = len; - if (len >= 4) { - a = lj_getu32(str); h ^= lj_getu32(str+len-4); - b = lj_getu32(str+(len>>1)-2); - h ^= b; h -= lj_rol(b, 14); - b += lj_getu32(str+(len>>2)-1); - a ^= h; a -= lj_rol(h, 11); - b ^= a; b -= lj_rol(a, 25); - h ^= b; h -= lj_rol(b, 16); - } else { - a = *(const uint8_t *)str; - h ^= *(const uint8_t *)(str+len-1); - b = *(const uint8_t *)(str+(len>>1)); - h ^= b; h -= lj_rol(b, 14); - } - - a ^= h; a -= lj_rol(h, 11); - b ^= a; b -= lj_rol(a, 25); - h ^= b; h -= lj_rol(b, 16); - - return h; -} - template double BenchmarkHashTmpl(T func, char* buf, size_t len) { @@ -65,14 +40,14 @@ BenchmarkHashTmpl(T func, char* buf, size_t len) struct TestFuncWas { uint32_t operator()(const char* buf, uint32_t len) { - return lj_original_hash(buf, len); + return lj_str_hash_orig(buf, len); } }; struct TestFuncIs { uint32_t operator()(const char* buf, uint32_t len) { - return lj_str_hash(buf, len); + return lj_str_hash_crc32(buf, len); } }; @@ -199,8 +174,8 @@ benchmarkConflictHelper(uint32_t bucketNum, const vector& strs) for (vector::const_iterator i = strs.begin(), e = strs.end(); i != e; ++i) { - uint32_t h1 = lj_original_hash(i->c_str(), i->size()); - uint32_t h2 = lj_str_hash(i->c_str(), i->size()); + uint32_t h1 = lj_str_hash_orig(i->c_str(), i->size()); + uint32_t h2 = lj_str_hash_crc32(i->c_str(), i->size()); conflictWas[h1 & mask]++; conflictIs[h2 & mask]++; diff --git a/src/x64/test/test.cpp b/src/x64/test/test.cpp index a92dc4054..1b8ea2dbb 100644 --- a/src/x64/test/test.cpp +++ b/src/x64/test/test.cpp @@ -1,15 +1,22 @@ #include #include #include +#include +#include #include #include "test_util.hpp" -#include "lj_str_hash_x64.h" + +extern "C" { +#include "lj_str_hash.c" +} using namespace std; static bool smoke_test() { + lj_str_hash_init_random(); + fprintf(stdout, "running smoke tests...\n"); char buf[1024]; char c = getpid() % 'a'; @@ -22,7 +29,7 @@ smoke_test() 255, 256, 257}; for (unsigned i = 0; i < sizeof(lens)/sizeof(lens[0]); i++) { string s(buf, lens[i]); - test_printf("%d", lj_str_hash(s.c_str(), lens[i])); + test_printf("%d", lj_str_hash_crc32(s.c_str(), lens[i])); } return true; From d0d2b47af5abe17e9a3d147ef0fdcb6b166eef3c Mon Sep 17 00:00:00 2001 From: Thibault Charbonnier Date: Tue, 8 Oct 2019 17:51:17 -0700 Subject: [PATCH 3/7] feature: implemented new extensions 'jit.crc32()' and 'jit.strhashcrc32()'. --- src/lib_jit.c | 22 ++++++++++++++++++++++ t/crc32.t | 39 +++++++++++++++++++++++++++++++++++++++ t/strhashcrc32.t | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+) create mode 100644 t/crc32.t create mode 100644 t/strhashcrc32.t diff --git a/src/lib_jit.c b/src/lib_jit.c index b84efa134..353c99a82 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -15,6 +15,7 @@ #include "lj_err.h" #include "lj_debug.h" #include "lj_str.h" +#include "lj_str_hash.h" #include "lj_tab.h" #include "lj_state.h" #include "lj_bc.h" @@ -156,6 +157,27 @@ LJLIB_CF(jit_prngstate) return 1; } +LJLIB_CF(jit_crc32) +{ +#if LJ_OR_STRHASHCRC32 + setboolV(L->top++, lj_check_crc32_support()); +#else + setboolV(L->top++, 0); +#endif + return 1; +} + +LJLIB_CF(jit_strhashcrc32) +{ +#if LJ_OR_STRHASHCRC32 + global_State *g = G(L); + setboolV(L->top++, (g->strhashfn != lj_str_hash_orig) ? 1 : 0); +#else + setboolV(L->top++, 0); +#endif + return 1; +} + LJLIB_PUSH(top-5) LJLIB_SET(os) LJLIB_PUSH(top-4) LJLIB_SET(arch) LJLIB_PUSH(top-3) LJLIB_SET(version_num) diff --git a/t/crc32.t b/t/crc32.t new file mode 100644 index 000000000..bf160fd2b --- /dev/null +++ b/t/crc32.t @@ -0,0 +1,39 @@ +# vim:set ts=4 sts=4 sw=4 et ft=: + +use lib '.'; +use t::TestLJ; + +plan tests => 3 * blocks(); + +run_tests(); + +__DATA__ + +=== TEST 1: interpreted (sanity) +flag. +--- lua +jit.off() + +jit.crc32() + +print("ok") +--- out +ok +--- err + + + +=== TEST 2: JIT (sanity) +--- lua +jit.opt.start("minstitch=100000", "hotloop=2") + +for i = 1, 50 do + jit.crc32() +end + +print("ok") +--- out +ok +--- jv +--- err eval +qr/trace too short at jit\.crc32/ diff --git a/t/strhashcrc32.t b/t/strhashcrc32.t new file mode 100644 index 000000000..a208cb412 --- /dev/null +++ b/t/strhashcrc32.t @@ -0,0 +1,42 @@ +# vim:set ts=4 sts=4 sw=4 et ft=: + +use lib '.'; +use t::TestLJ; + +plan tests => 3 * blocks(); + +run_tests(); + +__DATA__ + +=== TEST 1: interpreted (sanity) +--- lua +jit.off() + +if jit.crc32() then + assert(jit.strhashcrc32() == true, "strhashcrc32 should be enabled") +else + assert(jit.strhashcrc32() == false, "strhashcrc32 should be disabled") +end + +print("ok") +--- out +ok +--- err + + + +=== TEST 2: JIT (sanity) +--- lua +jit.opt.start("minstitch=100000", "hotloop=2") + +for i = 1, 50 do + jit.strhashcrc32() +end + +print("ok") +--- out +ok +--- jv +--- err eval +qr/trace too short at jit\.strhashcrc32/ From 4fc291939e3c7716a914609d5aabcd38a8897c25 Mon Sep 17 00:00:00 2001 From: Thibault Charbonnier Date: Tue, 8 Oct 2019 17:51:22 -0700 Subject: [PATCH 4/7] feature: arm64: added support for CRC32 string hashing optimization. Only, available in ARMv8, the CRC32 instructions are enabled when LuaJIT is compiled with `-march=armv8-a+crc`. Co-authored-by: Debayan Ghosh --- .gitignore | 4 + src/lj_arch.h | 2 +- src/lj_str_hash.c | 80 +++++++++++++------ src/x64/Makefile | 13 --- {src/x64/test => test}/Makefile | 17 +++- {src/x64/test => test}/benchmark.cxx | 0 {src/x64/test => test}/test.cpp | 0 {src/x64/test => test}/test_str_comp.lua | 0 {src/x64/test => test}/test_util.cxx | 0 {src/x64/test => test}/test_util.hpp | 0 {src/x64/test => test}/unit/ffi/test_abi.lua | 0 .../unit/ffi/test_line_directive.lua | 0 .../unit/ffi/test_pragma_pack_pushpop.lua | 0 .../unit/ffi/test_var_attribute.lua | 0 {src/x64/test => test}/unit_test.sh | 2 +- 15 files changed, 75 insertions(+), 43 deletions(-) delete mode 100644 src/x64/Makefile rename {src/x64/test => test}/Makefile (69%) rename {src/x64/test => test}/benchmark.cxx (100%) rename {src/x64/test => test}/test.cpp (100%) rename {src/x64/test => test}/test_str_comp.lua (100%) rename {src/x64/test => test}/test_util.cxx (100%) rename {src/x64/test => test}/test_util.hpp (100%) rename {src/x64/test => test}/unit/ffi/test_abi.lua (100%) rename {src/x64/test => test}/unit/ffi/test_line_directive.lua (100%) rename {src/x64/test => test}/unit/ffi/test_pragma_pack_pushpop.lua (100%) rename {src/x64/test => test}/unit/ffi/test_var_attribute.lua (100%) rename {src/x64/test => test}/unit_test.sh (92%) diff --git a/.gitignore b/.gitignore index 9dd51e85f..ddfd03355 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,7 @@ *~ tags *.swo +test/*.txt +test/*.o +test/*.d +test/ht_test diff --git a/src/lj_arch.h b/src/lj_arch.h index bde8b96f3..ae02ad24b 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -616,7 +616,7 @@ extern void *LJ_WIN_LOADLIBA(const char *path); #endif /* Optimized string hashing, added by OpenResty. */ -#if LUAJIT_TARGET == LUAJIT_ARCH_X64 && defined(__GNUC__) && defined(__SSE4_2__) +#if (LUAJIT_TARGET == LUAJIT_ARCH_X64 && defined(__SSE4_2__) || LUAJIT_TARGET == LUAJIT_ARCH_ARM64 && __ARM_FEATURE_CRC32) && defined(__GNUC__) #ifndef LJ_OR_DISABLE_STRHASHCRC32 #define LJ_OR_STRHASHCRC32 1 #endif diff --git a/src/lj_str_hash.c b/src/lj_str_hash.c index 1ae1d30e5..881866b55 100644 --- a/src/lj_str_hash.c +++ b/src/lj_str_hash.c @@ -1,8 +1,11 @@ /* - * This file defines string hash function using CRC32. It takes advantage of - * Intel hardware support (crc32 instruction, SSE 4.2) to speedup the CRC32 - * computation. The hash functions try to compute CRC32 of length and up - * to 128 bytes of given string. + * This file defines string hash function using CRC32. + * On Intel architectures, this implemantation takes advantage of hardware + * support (CRC32 instruction, SSE 4.2) to speedup the CRC32 computation. + * On ARM64 architectures, this implementation utilizes the ARMv8.1-A extension + * wich offers CRC32 instructions. + * The hash functions try to compute CRC32 of length and up to 128 bytes of + * the given string. */ #define lj_str_hash_c @@ -15,13 +18,34 @@ #include #include #include -#include #include "lj_vm.h" +#if LUAJIT_TARGET == LUAJIT_ARCH_X64 +#include + +#define lj_crc32_u32 _mm_crc32_u32 +#define lj_crc32_u64 _mm_crc32_u64 + #ifndef F_CPU_SSE4_2 #define F_CPU_SSE4_2 (1 << 20) #endif +#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64 +#include +#include +#include + +#define lj_crc32_u32 __crc32cw +#define lj_crc32_u64 __crc32cd + +#ifndef HWCAP_CRC32 +#define HWCAP_CRC32 (1 << 7) +#endif + +#else +#error "LJ_OR_STRHASHCRC32 not supported on this architecture" +#endif + #ifdef __MINGW32__ #define random() ((long) rand()) #define srandom(seed) srand(seed) @@ -49,7 +73,7 @@ static LJ_NOINLINE uint32_t lj_str_hash_1_4(const char* str, uint32_t len) v = (v << 8) | str[len >> 1]; v = (v << 8) | str[len - 1]; v = (v << 8) | len; - return _mm_crc32_u32(0, v); + return lj_crc32_u32(0, v); #else uint32_t a, b, h = len; @@ -79,9 +103,9 @@ static LJ_NOINLINE uint32_t lj_str_hash_4_16(const char* str, uint32_t len) v2 = *cast_uint32p(str + len - 4); } - h = _mm_crc32_u32(0, len); - h = _mm_crc32_u64(h, v1); - h = _mm_crc32_u64(h, v2); + h = lj_crc32_u32(0, len); + h = lj_crc32_u64(h, v1); + h = lj_crc32_u64(h, v2); return h; } @@ -92,18 +116,18 @@ static LJ_NOINLINE uint32_t lj_str_hash_16_128(const char* str, uint32_t len) uint64_t h1, h2; uint32_t i; - h1 = _mm_crc32_u32(0, len); + h1 = lj_crc32_u32(0, len); h2 = 0; for (i = 0; i < len - 16; i += 16) { - h1 += _mm_crc32_u64(h1, *cast_uint64p(str + i)); - h2 += _mm_crc32_u64(h2, *cast_uint64p(str + i + 8)); + h1 += lj_crc32_u64(h1, *cast_uint64p(str + i)); + h2 += lj_crc32_u64(h2, *cast_uint64p(str + i + 8)); }; - h1 = _mm_crc32_u64(h1, *cast_uint64p(str + len - 16)); - h2 = _mm_crc32_u64(h2, *cast_uint64p(str + len - 8)); + h1 = lj_crc32_u64(h1, *cast_uint64p(str + len - 16)); + h2 = lj_crc32_u64(h2, *cast_uint64p(str + len - 8)); - return _mm_crc32_u32(h1, h2); + return lj_crc32_u32(h1, h2); } /* ************************************************************************** @@ -167,7 +191,7 @@ static LJ_NOINLINE uint32_t lj_str_hash_128_above(const char* str, pos1 = get_random_pos_unsafe(chunk_sz_log2, 0); pos2 = get_random_pos_unsafe(chunk_sz_log2, 1); - h1 = _mm_crc32_u32(0, len); + h1 = lj_crc32_u32(0, len); h2 = 0; /* loop over 14 chunks, 2 chunks at a time */ @@ -175,24 +199,24 @@ static LJ_NOINLINE uint32_t lj_str_hash_128_above(const char* str, chunk_ptr += chunk_sz, i++) { v = *cast_uint64p(chunk_ptr + pos1); - h1 = _mm_crc32_u64(h1, v); + h1 = lj_crc32_u64(h1, v); v = *cast_uint64p(chunk_ptr + chunk_sz + pos2); - h2 = _mm_crc32_u64(h2, v); + h2 = lj_crc32_u64(h2, v); } /* the last two chunks */ v = *cast_uint64p(chunk_ptr + pos1); - h1 = _mm_crc32_u64(h1, v); + h1 = lj_crc32_u64(h1, v); v = *cast_uint64p(chunk_ptr + chunk_sz - 8 - pos2); - h2 = _mm_crc32_u64(h2, v); + h2 = lj_crc32_u64(h2, v); /* process the trailing part */ - h1 = _mm_crc32_u64(h1, *cast_uint64p(str)); - h2 = _mm_crc32_u64(h2, *cast_uint64p(str + len - 8)); + h1 = lj_crc32_u64(h1, *cast_uint64p(str)); + h2 = lj_crc32_u64(h2, *cast_uint64p(str + len - 8)); - h1 = _mm_crc32_u32(h1, h2); + h1 = lj_crc32_u32(h1, h2); return h1; } @@ -233,8 +257,8 @@ static void lj_str_hash_init_random(void) } /* Init seed */ - seed = _mm_crc32_u32(0, getpid()); - seed = _mm_crc32_u32(seed, time(NULL)); + seed = lj_crc32_u32(0, getpid()); + seed = lj_crc32_u32(seed, time(NULL)); srandom(seed); /* Now start to populate the random_pos[][]. */ @@ -266,9 +290,15 @@ static void lj_str_hash_init_random(void) LJ_FUNC unsigned char lj_check_crc32_support() { +#if LUAJIT_TARGET == LUAJIT_ARCH_X64 uint32_t features[4]; if (lj_vm_cpuid(1, features)) return (features[2] & F_CPU_SSE4_2) != 0; +#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64 + uint32_t hwcap = getauxval(AT_HWCAP); + if (hwcap != ENOENT) + return (hwcap & HWCAP_CRC32) != 0; +#endif return 0; } diff --git a/src/x64/Makefile b/src/x64/Makefile deleted file mode 100644 index 27277140d..000000000 --- a/src/x64/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -.PHONY: default test benchmark clean - -default: - @echo "make target include: test bechmark clean" - -test: - $(MAKE) -C test test - -benchmark: - $(MAKE) -C test benchmark - -clean: - $(MAKE) -C test clean diff --git a/src/x64/test/Makefile b/test/Makefile similarity index 69% rename from src/x64/test/Makefile rename to test/Makefile index df1d380a0..46aa05cdb 100644 --- a/src/x64/test/Makefile +++ b/test/Makefile @@ -16,8 +16,19 @@ else VALGRIND := endif -CXXFLAGS := -O3 -MD -g -Wall -msse4.2 -I../.. -LDFLAGS := ../../libluajit.a -ldl -lm +CXXFLAGS := -O3 -MD -g -Wall -I../src +LDFLAGS := ../src/libluajit.a -ldl -lm + +TARGET_TESTARCH=$(shell $(CC) -E ../src/lj_arch.h -dM) +ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH))) + CXXFLAGS+= -msse4.2 +else +ifneq (,$(findstring LJ_TARGET_ARM64 ,$(TARGET_TESTARCH))) + CXXFLAGS+= -march=armv8-a+crc +else + $(error Unsupported target architecture) +endif +endif %.o: %.cxx $(CXX) $(CXXFLAGS) -MD -c $< @@ -28,7 +39,7 @@ test: $(TEST_PROGRAM) ./unit_test.sh @echo "smoke test" - ../../luajit test_str_comp.lua + ../src/luajit test_str_comp.lua benchmark: $(BENCHMARK_PROGRAM) # micro benchmark diff --git a/src/x64/test/benchmark.cxx b/test/benchmark.cxx similarity index 100% rename from src/x64/test/benchmark.cxx rename to test/benchmark.cxx diff --git a/src/x64/test/test.cpp b/test/test.cpp similarity index 100% rename from src/x64/test/test.cpp rename to test/test.cpp diff --git a/src/x64/test/test_str_comp.lua b/test/test_str_comp.lua similarity index 100% rename from src/x64/test/test_str_comp.lua rename to test/test_str_comp.lua diff --git a/src/x64/test/test_util.cxx b/test/test_util.cxx similarity index 100% rename from src/x64/test/test_util.cxx rename to test/test_util.cxx diff --git a/src/x64/test/test_util.hpp b/test/test_util.hpp similarity index 100% rename from src/x64/test/test_util.hpp rename to test/test_util.hpp diff --git a/src/x64/test/unit/ffi/test_abi.lua b/test/unit/ffi/test_abi.lua similarity index 100% rename from src/x64/test/unit/ffi/test_abi.lua rename to test/unit/ffi/test_abi.lua diff --git a/src/x64/test/unit/ffi/test_line_directive.lua b/test/unit/ffi/test_line_directive.lua similarity index 100% rename from src/x64/test/unit/ffi/test_line_directive.lua rename to test/unit/ffi/test_line_directive.lua diff --git a/src/x64/test/unit/ffi/test_pragma_pack_pushpop.lua b/test/unit/ffi/test_pragma_pack_pushpop.lua similarity index 100% rename from src/x64/test/unit/ffi/test_pragma_pack_pushpop.lua rename to test/unit/ffi/test_pragma_pack_pushpop.lua diff --git a/src/x64/test/unit/ffi/test_var_attribute.lua b/test/unit/ffi/test_var_attribute.lua similarity index 100% rename from src/x64/test/unit/ffi/test_var_attribute.lua rename to test/unit/ffi/test_var_attribute.lua diff --git a/src/x64/test/unit_test.sh b/test/unit_test.sh similarity index 92% rename from src/x64/test/unit_test.sh rename to test/unit_test.sh index c6633ca2e..a80c10608 100755 --- a/src/x64/test/unit_test.sh +++ b/test/unit_test.sh @@ -2,7 +2,7 @@ DIR=$(cd $(dirname $0); pwd) cd $DIR -LUAJIT=$DIR/../../luajit +LUAJIT=$DIR/../src/luajit HASERR=0 find $DIR/unit -name "*.lua" -print | while read x; do From 26a519697d7b5493c3324036c9794b9c349e3d91 Mon Sep 17 00:00:00 2001 From: Thibault Charbonnier Date: Tue, 8 Oct 2019 17:51:19 -0700 Subject: [PATCH 5/7] travis-ci: added many missing testing suites and compilation flags. --- .travis.yml | 95 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 73 insertions(+), 22 deletions(-) diff --git a/.travis.yml b/.travis.yml index e009903c0..f9bee608a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,13 @@ -sudo: required -dist: xenial - os: linux +dist: xenial +sudo: false language: c +arch: + - amd64 + #- arm64 + compiler: - gcc - clang @@ -12,38 +15,86 @@ compiler: addons: apt: packages: - - axel + - g++-5 + - valgrind - cpanminus - libtest-base-perl - libtext-diff-perl - libtest-longstring-perl - liblist-moreutils-perl - libparallel-forkmanager-perl - - libgd-dev - libmpc-dev - - build-essential - libgtk2.0-dev - - valgrind env: global: - - JOBS=3 - - LUAJIT_PREFIX=/opt/luajit21 - - LUAJIT_SYSM_PREFIX=/opt/luajit21-sysm - - LUAJIT_COMMON_XCFLAGS="-DLUA_USE_APICHECK -DLUA_USE_ASSERT -DLUAJIT_NUMMODE=2 -msse4.2 -O1" + - JOBS=6 + - PREFIX=$HOME/luajit + - XCFLAGS="-O1 -DLUA_USE_APICHECK -DLUA_USE_ASSERT -DLUAJIT_NUMMODE=2" matrix: - - LUAJIT_XCFLAGS="$LUAJIT_COMMON_XCFLAGS" - - LUAJIT_XCFLAGS="-DLUAJIT_ENABLE_LUA52COMPAT $LUAJIT_COMMON_XCFLAGS" LUA52=1 - - LUAJIT_XCFLAGS="-DLUAJIT_USE_VALGRIND -DLUAJIT_USE_SYSMALLOC -DLUAJIT_ENABLE_LUA52COMPAT $LUAJIT_COMMON_XCFLAGS" LUA52=1 FLAGS=-v - - LUAJIT_XCFLAGS="-DLUAJIT_DISABLE_GC64 -DLUAJIT_ENABLE_LUA52COMPAT $LUAJIT_COMMON_XCFLAGS" LUA52=1 + - XCFLAGS="-O3 -DLUAJIT_ENABLE_LUA52COMPAT -DLUAJIT_NUMMODE=2" # OpenResty releases + - XCFLAGS="-DLUAJIT_ENABLE_LUA52COMPAT -DLUAJIT_USE_VALGRIND -DLUAJIT_USE_SYSMALLOC $XCFLAGS" + - XCFLAGS="-DLUAJIT_DISABLE_JIT $XCFLAGS" + - XCFLAGS="-DLUAJIT_DISABLE_GC64 $XCFLAGS" + - XCFLAGS="-DLJ_OR_DISABLE_STRHASHCRC32 $XCFLAGS" + +matrix: + fast_finish: true + allow_failures: + - arch: arm64 + +stages: + - test + - openresty tests + - string hashing tests + +jobs: + allow_failures: + - arch: arm64 + include: + - stage: openresty tests + script: prove -j$JOBS t + env: + - stage: openresty tests + script: prove -j$JOBS t + env: + arch: arm64 + - stage: openresty tests + script: prove -j$JOBS t + env: TEST_LJ_USE_VALGRIND=1 XCFLAGS="-DLUAJIT_USE_VALGRIND -DLUAJIT_USE_SYSMALLOC $XCFLAGS" + - stage: string hashing tests + script: make -C test test + env: + - stage: string hashing tests + script: make -C test test + env: + arch: arm64 + - stage: string hashing tests + script: make -C test test + env: WITH_VALGRIND=1 XCFLAGS="-DLUAJIT_USE_VALGRIND -DLUAJIT_USE_SYSMALLOC $XCFLAGS" + +before_install: + - cpanm --local-lib=$TRAVIS_BUILD_DIR/perl5 local::lib && eval $(perl -I$TRAVIS_BUILD_DIR/perl5/lib/perl5/ -Mlocal::lib) + - cpanm --notest IPC::Run3 + - valgrind --version + - export CXX=g++-5 install: - - git clone https://github.com/openresty/luajit2-test-suite.git ../luajit2-test-suite + - if [[ "$(uname -m)" == "x86_64" ]]; then XCFLAGS="$XCFLAGS -msse4.2"; fi + - if [[ "$(uname -m)" == "aarch64" ]]; then XCFLAGS="$XCFLAGS -march=armv8-a+crc"; fi + - make -j$JOBS CCDEBUG=-g Q= PREFIX=$PREFIX XCFLAGS="$XCFLAGS" >build.log 2>&1 || (cat build.log && exit 1) + - make install PREFIX=$PREFIX >build.log 2>&1 || (cat build.log && exit 1) + - export PATH="$PREFIX/bin:$PATH" script: - - valgrind --version - - cd ../luajit2 - - make -j$JOBS CCDEBUG=-g Q= PREFIX=$LUAJIT_PREFIX CC=$CC XCFLAGS="$LUAJIT_XCFLAGS" > build.log 2>&1 || (cat build.log && exit 1) - - sudo make install PREFIX=$LUAJIT_PREFIX > build.log 2>&1 || (cat build.log && exit 1) - - cd ../luajit2-test-suite - - ./run-tests -j $JOBS $FLAGS $LUAJIT_PREFIX + - if [[ "$XCFLAGS" =~ "LUAJIT_ENABLE_LUA52COMPAT" ]]; then export LUA52=1; fi + - if [[ "$XCFLAGS" =~ "LUAJIT_USE_VALGRIND" ]]; then export FLAGS=-v; fi + - if [[ "$XCFLAGS" =~ "LJ_OR_DISABLE_STRHASHCRC32" ]]; then export NO_STRHASHCRC32=1; fi + - git clone https://github.com/openresty/luajit2-test-suite.git + - pushd luajit2-test-suite + - ./run-tests -j$JOBS $FLAGS $PREFIX $PREFIX/bin/luajit $CC $CXX + +cache: + apt: true + directories: + - perl5 From fe3b2dbeed337e84ab9e9d0ec12c93e05c6a0b8d Mon Sep 17 00:00:00 2001 From: Thibault Charbonnier Date: Tue, 17 Dec 2019 14:39:42 -0800 Subject: [PATCH 6/7] feature: dynamically enable the CRC32 string hashing optimization. --- .travis.yml | 2 -- src/Makefile | 13 +++++++++-- src/lib_jit.c | 4 ---- src/lj_arch.h | 2 +- src/lj_str_hash.c | 56 +++++++++++++++++++++++++---------------------- src/lj_str_hash.h | 2 +- t/strhashcrc32.t | 4 +++- 7 files changed, 46 insertions(+), 37 deletions(-) diff --git a/.travis.yml b/.travis.yml index f9bee608a..186c79966 100644 --- a/.travis.yml +++ b/.travis.yml @@ -80,8 +80,6 @@ before_install: - export CXX=g++-5 install: - - if [[ "$(uname -m)" == "x86_64" ]]; then XCFLAGS="$XCFLAGS -msse4.2"; fi - - if [[ "$(uname -m)" == "aarch64" ]]; then XCFLAGS="$XCFLAGS -march=armv8-a+crc"; fi - make -j$JOBS CCDEBUG=-g Q= PREFIX=$PREFIX XCFLAGS="$XCFLAGS" >build.log 2>&1 || (cat build.log && exit 1) - make install PREFIX=$PREFIX >build.log 2>&1 || (cat build.log && exit 1) - export PATH="$PREFIX/bin:$PATH" diff --git a/src/Makefile b/src/Makefile index 3fe0abb13..7b8e800dd 100644 --- a/src/Makefile +++ b/src/Makefile @@ -521,6 +521,15 @@ ALL_GEN= $(LJVM_S) $(ALL_HDRGEN) $(LIB_VMDEFP) WIN_RM= *.obj *.lib *.exp *.dll *.exe *.manifest *.pdb *.ilk ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM) +ifeq (x64,$(TARGET_LJARCH)) + lj_str_hash-CFLAGS = -msse4.2 +endif +ifeq (arm64,$(TARGET_LJARCH)) + lj_str_hash-CFLAGS = -march=armv8-a+crc +endif + +F_CFLAGS = $($(patsubst %.c,%-CFLAGS,$<)) + ############################################################################## # Build mode handling. ############################################################################## @@ -685,8 +694,8 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c %.o: %.c $(E) "CC $@" - $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $< - $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $< + $(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) $(F_CFLAGS) -c -o $(@:.o=_dyn.o) $< + $(Q)$(TARGET_CC) $(TARGET_ACFLAGS) $(F_CFLAGS) -c -o $@ $< %.o: %.S $(E) "ASM $@" diff --git a/src/lib_jit.c b/src/lib_jit.c index 353c99a82..520b1eec0 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -159,11 +159,7 @@ LJLIB_CF(jit_prngstate) LJLIB_CF(jit_crc32) { -#if LJ_OR_STRHASHCRC32 setboolV(L->top++, lj_check_crc32_support()); -#else - setboolV(L->top++, 0); -#endif return 1; } diff --git a/src/lj_arch.h b/src/lj_arch.h index ae02ad24b..baebd76a2 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -616,7 +616,7 @@ extern void *LJ_WIN_LOADLIBA(const char *path); #endif /* Optimized string hashing, added by OpenResty. */ -#if (LUAJIT_TARGET == LUAJIT_ARCH_X64 && defined(__SSE4_2__) || LUAJIT_TARGET == LUAJIT_ARCH_ARM64 && __ARM_FEATURE_CRC32) && defined(__GNUC__) +#if defined(__GNUC__) && (LUAJIT_TARGET == LUAJIT_ARCH_X64 || LUAJIT_TARGET == LUAJIT_ARCH_ARM64) #ifndef LJ_OR_DISABLE_STRHASHCRC32 #define LJ_OR_STRHASHCRC32 1 #endif diff --git a/src/lj_str_hash.c b/src/lj_str_hash.c index 881866b55..0109843b8 100644 --- a/src/lj_str_hash.c +++ b/src/lj_str_hash.c @@ -13,12 +13,26 @@ #include "lj_str_hash.h" -#if LJ_OR_STRHASHCRC32 +#if LUAJIT_TARGET == LUAJIT_ARCH_X64 +#include "lj_vm.h" + +#ifndef F_CPU_SSE4_2 +#define F_CPU_SSE4_2 (1 << 20) +#endif + +#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64 +#include +#include + +#ifndef HWCAP_CRC32 +#define HWCAP_CRC32 (1 << 7) +#endif +#endif +#if LJ_OR_STRHASHCRC32 #include #include #include -#include "lj_vm.h" #if LUAJIT_TARGET == LUAJIT_ARCH_X64 #include @@ -26,22 +40,12 @@ #define lj_crc32_u32 _mm_crc32_u32 #define lj_crc32_u64 _mm_crc32_u64 -#ifndef F_CPU_SSE4_2 -#define F_CPU_SSE4_2 (1 << 20) -#endif - #elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64 -#include #include -#include #define lj_crc32_u32 __crc32cw #define lj_crc32_u64 __crc32cd -#ifndef HWCAP_CRC32 -#define HWCAP_CRC32 (1 << 7) -#endif - #else #error "LJ_OR_STRHASHCRC32 not supported on this architecture" #endif @@ -288,20 +292,6 @@ static void lj_str_hash_init_random(void) #undef POW2_MASK -LJ_FUNC unsigned char lj_check_crc32_support() -{ -#if LUAJIT_TARGET == LUAJIT_ARCH_X64 - uint32_t features[4]; - if (lj_vm_cpuid(1, features)) - return (features[2] & F_CPU_SSE4_2) != 0; -#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64 - uint32_t hwcap = getauxval(AT_HWCAP); - if (hwcap != ENOENT) - return (hwcap & HWCAP_CRC32) != 0; -#endif - return 0; -} - LJ_FUNC void lj_init_strhashfn(global_State *g) { static StrHashFunction strhashfn; @@ -318,6 +308,20 @@ LJ_FUNC void lj_init_strhashfn(global_State *g) #endif +LJ_FUNC unsigned char lj_check_crc32_support() +{ +#if LUAJIT_TARGET == LUAJIT_ARCH_X64 + uint32_t features[4]; + if (lj_vm_cpuid(1, features)) + return (features[2] & F_CPU_SSE4_2) != 0; +#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM64 + uint32_t hwcap = getauxval(AT_HWCAP); + if (hwcap != ENOENT) + return (hwcap & HWCAP_CRC32) != 0; +#endif + return 0; +} + LJ_FUNC MSize lj_str_hash_orig(const char *str, size_t lenx) { MSize len = (MSize)lenx; diff --git a/src/lj_str_hash.h b/src/lj_str_hash.h index fb9a16019..f3b84bdf8 100644 --- a/src/lj_str_hash.h +++ b/src/lj_str_hash.h @@ -4,9 +4,9 @@ #include "lj_obj.h" LJ_FUNC MSize lj_str_hash_orig(const char *str, size_t lenx); +LJ_FUNC unsigned char lj_check_crc32_support(); #if LJ_OR_STRHASHCRC32 -LJ_FUNC unsigned char lj_check_crc32_support(); LJ_FUNC void lj_init_strhashfn(global_State *g); #endif diff --git a/t/strhashcrc32.t b/t/strhashcrc32.t index a208cb412..a5fc29bcc 100644 --- a/t/strhashcrc32.t +++ b/t/strhashcrc32.t @@ -13,7 +13,9 @@ __DATA__ --- lua jit.off() -if jit.crc32() then +if os.getenv("NO_STRHASHCRC32") == "1" then + assert(jit.strhashcrc32() == false, "strhashcrc32 should be disabled (LJ_OR_DISABLE_STRHASHCRC32)") +elseif jit.crc32() then assert(jit.strhashcrc32() == true, "strhashcrc32 should be enabled") else assert(jit.strhashcrc32() == false, "strhashcrc32 should be disabled") From 8d6dca39fec0bbcf4689b0dfc2d20cf6d90493b1 Mon Sep 17 00:00:00 2001 From: Thibault Charbonnier Date: Thu, 10 Oct 2019 14:53:37 -0700 Subject: [PATCH 7/7] doc: readme.md: documented the new CRC32 and string hashing APIs and macros. --- README.md | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 60 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 3e1b59a85..c0fd48c38 100644 --- a/README.md +++ b/README.md @@ -13,9 +13,12 @@ openresty/luajit2 - OpenResty's maintained branch of LuaJIT. * [table.nkeys](#tablenkeys) * [table.clone](#tableclone) * [jit.prngstate](#jitprngstate) + * [jit.crc32](#jitcrc32) + * [jit.strhashcrc32](#jitstrhashcrc32) * [thread.exdata](#threadexdata) * [New macros](#new-macros) * [`OPENRESTY_LUAJIT`](#openresty-luajit) + * [`LJ_OR_DISABLE_STRHASHCRC32`](#lj-or-disable-strhashcrc32) * [Optimizations](#optimizations) * [Updated JIT default parameters](#updated-jit-default-parameters) * [String hashing](#string-hashing) @@ -155,6 +158,45 @@ local newstate = jit.prngstate(123456) [Back to TOC](#table-of-contents) +### jit.crc32 + +**syntax:** *ok = jit.crc32()* + +Returns a boolean value indicating if the current architecture supports a CRC32 +instruction set. CRC32 support will be checked at runtime on x64 and ARM64 +platforms. + +CRC32 support allows for this branch to use an optimized string hashing +algorithm. See the [String hashing](#string-hashing) section for details on +how to enable this optimization. + +Usage: + +```lua +local ok = jit.crc32() +``` + +[Back to TOC](#table-of-contents) + +### jit.strhashcrc32 + +**syntax:** *ok = jit.strhashcrc32()* + +Returns a boolean value indicating if the optimized string hashing algorithm +implemented by this branch is enabled. The `ok` return value will be `true` if +it is enabled, or `false` otherwise. + +See the [String hashing](#string-hashing) section for details on +how to enable this optimization. + +Usage: + +```lua +local ok = jit.strhashcrc32() +``` + +[Back to TOC](#table-of-contents) + ### thread.exdata **syntax:** *exdata = th_exdata(data?)* @@ -207,6 +249,14 @@ help distinguishing this OpenResty-specific branch of LuaJIT. [Back to TOC](#table-of-contents) +### `LJ_OR_DISABLE_STRHASHCRC32` + +When specified at compilation (`-DLJ_OR_DISABLE_STRHASHCRC32`), this flag will +disable the string hashing optimization described in the [String +hashing](#string-hashing) section. + +[Back to TOC](#table-of-contents) + ## Optimizations ### Updated JIT default parameters @@ -227,13 +277,17 @@ maxmcode=40960 -- in KB ### String hashing -This optimization only applies to Intel CPUs supporting the SSE 4.2 instruction -sets. For such CPUs, and when this branch is compiled with `-msse4.2`, the -string hashing function used for strings interning will be based on an -optimized crc32 implementation (see `lj_str_new()`). +This optimizations modifies the string hashing algorithm to use a CRC32-based +variant. This variant still provides constant-time hashing complexity (`O(n)`) +but makes hash collision attacks harder for strings up to 127 bytes of size +(see `lj_str_new()`). + +This optimization is only available for x64 and ARM64 architectures, and will +be enabled if a CRC32 instruction set is detected at runtime (see +[jit.crc32](#jitcrc32)). -This optimization still provides constant-time hashing complexity (`O(n)`), but -makes hash collision attacks harder for strings up to 127 bytes of size. +**Note:** This optimization can be disabled by compiling LuaJIT with +`-DLJ_OR_DISABLE_STRHASHCRC32`. [Back to TOC](#table-of-contents)