Skip to content

Commit

Permalink
Detect SSE4.2 support dynamically
Browse files Browse the repository at this point in the history
This is a port of the dynamic SSE4.2 detection feature from moonjit.
This makes luajit2 builds portable since SSE4.2 string hash functions
are now built separately and chosen at runtime based on whether the
CPU supports it.

This patch also includes work by Thomas Fransham in moonjit to support
Windows builds.
  • Loading branch information
siddhesh committed Oct 27, 2020
1 parent 6d6c9b3 commit 34b63ba
Show file tree
Hide file tree
Showing 10 changed files with 195 additions and 64 deletions.
18 changes: 12 additions & 6 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -507,10 +507,16 @@ LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \
lj_carith.o lj_clib.o lj_cparse.o \
lj_lib.o lj_alloc.o lib_aux.o \
$(LJLIB_O) lib_init.o
$(LJLIB_O) lib_init.o lj_str_hash.o

ifeq (x64,$(TARGET_LJARCH))
lj_str_hash-CFLAGS = -msse4.2
endif

F_CFLAGS = $($(patsubst %.c,%-CFLAGS,$<))

LJVMCORE_O= $(LJVM_O) $(LJCORE_O)
LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o)
LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o) lj_init_dyn.o

LIB_VMDEF= jit/vmdef.lua
LIB_VMDEFP= $(LIB_VMDEF)
Expand All @@ -532,7 +538,7 @@ ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM)
##############################################################################

# Mixed mode defaults.
TARGET_O= $(LUAJIT_A)
TARGET_O= lj_init.o $(LUAJIT_A)
TARGET_T= $(LUAJIT_T) $(LUAJIT_SO)
TARGET_DEP= $(LIB_VMDEF) $(LUAJIT_SO)

Expand Down Expand Up @@ -614,7 +620,7 @@ E= @echo
default all: $(TARGET_T)

amalg:
$(MAKE) all "LJCORE_O=ljamalg.o"
$(MAKE) all "LJCORE_O=ljamalg.o lj_str_hash.o"

clean:
$(HOST_RM) $(ALL_RM)
Expand Down Expand Up @@ -691,8 +697,8 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c

%.o: %.c
$(E) "CC $@"
$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) $(F_CFLAGS) -c -o $(@:.o=_dyn.o) $<
$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) $(F_CFLAGS) -c -o $@ $<

%.o: %.S
$(E) "ASM $@"
Expand Down
4 changes: 4 additions & 0 deletions src/lj_arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@
#define LJ_TARGET_GC64 1
#endif

#ifdef __GNUC__
#define LJ_HAS_OPTIMISED_HASH 1
#endif

#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM

#define LJ_ARCH_NAME "arm"
Expand Down
69 changes: 69 additions & 0 deletions src/lj_init.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#include <stdint.h>
#include "lj_arch.h"
#include "lj_jit.h"
#include "lj_vm.h"
#include "lj_str.h"

#if LJ_TARGET_ARM && LJ_TARGET_LINUX
#include <sys/utsname.h>
#endif

#ifdef _MSC_VER
/*
** Append a function pointer to the static constructor table executed by
** the C runtime.
** Based on https://stackoverflow.com/questions/1113409/attribute-constructor-equivalent-in-vc
** see also https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-initialization.
*/
#pragma section(".CRT$XCU",read)
#define LJ_INITIALIZER2_(f,p) \
static void f(void); \
__declspec(allocate(".CRT$XCU")) void (*f##_)(void) = f; \
__pragma(comment(linker,"/include:" p #f "_")) \
static void f(void)
#ifdef _WIN64
#define LJ_INITIALIZER(f) LJ_INITIALIZER2_(f,"")
#else
#define LJ_INITIALIZER(f) LJ_INITIALIZER2_(f,"_")
#endif

#else
#define LJ_INITIALIZER(f) static void __attribute__((constructor)) f(void)
#endif


#ifdef LJ_HAS_OPTIMISED_HASH
static void str_hash_init(uint32_t flags)
{
if (flags & JIT_F_SSE4_2)
str_hash_init_sse42 ();
}

/* CPU detection for interpreter features such as string hash function
selection. We choose to cherry-pick from lj_cpudetect and not have a single
initializer to make sure that merges with LuaJIT/LuaJIT remain
convenient. */
LJ_INITIALIZER(lj_init_cpuflags)
{
uint32_t flags = 0;
#if LJ_TARGET_X86ORX64

uint32_t vendor[4];
uint32_t features[4];
if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
flags |= ((features[2] >> 20)&1) * JIT_F_SSE4_2;
if (vendor[0] >= 7) {
uint32_t xfeatures[4];
lj_vm_cpuid(7, xfeatures);
flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
}
}

#endif

/* The reason why we initialized early: select our string hash functions. */
str_hash_init (flags);
}
#endif
1 change: 1 addition & 0 deletions src/lj_jit.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#define JIT_F_SSE3 (JIT_F_CPU << 0)
#define JIT_F_SSE4_1 (JIT_F_CPU << 1)
#define JIT_F_BMI2 (JIT_F_CPU << 2)
#define JIT_F_SSE4_2 (JIT_F_CPU << 3)


#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2"
Expand Down
25 changes: 18 additions & 7 deletions src/lj_str.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include "lj_str.h"
#include "lj_char.h"
#include "lj_prng.h"
#include "x64/src/lj_str_hash_x64.h"

/* -- String helpers ------------------------------------------------------ */

Expand Down Expand Up @@ -83,9 +82,22 @@ int lj_str_haspattern(GCstr *s)

/* -- String hashing ------------------------------------------------------ */

#ifndef ARCH_HASH_SPARSE
#ifdef LJ_HAS_OPTIMISED_HASH
static StrHash hash_sparse_def (uint64_t, const char *, MSize);
str_sparse_hashfn hash_sparse = hash_sparse_def;
#if LUAJIT_SECURITY_STRHASH
static StrHash hash_dense_def(uint64_t, StrHash, const char *, MSize);
str_dense_hashfn hash_dense = hash_dense_def;
#endif
#else
#define hash_sparse hash_sparse_def
#if LUAJIT_SECURITY_STRHASH
#define hash_dense hash_dense_def
#endif
#endif

/* Keyed sparse ARX string hash. Constant time. */
static StrHash hash_sparse(uint64_t seed, const char *str, MSize len)
static StrHash hash_sparse_def(uint64_t seed, const char *str, MSize len)
{
/* Constants taken from lookup3 hash by Bob Jenkins. */
StrHash a, b, h = len ^ (StrHash)seed;
Expand All @@ -106,12 +118,11 @@ static StrHash hash_sparse(uint64_t seed, const char *str, MSize len)
h ^= b; h -= lj_rol(b, 16);
return h;
}
#endif

#if LUAJIT_SECURITY_STRHASH && !defined(ARCH_HASH_DENSE)
#if LUAJIT_SECURITY_STRHASH
/* Keyed dense ARX string hash. Linear time. */
static LJ_NOINLINE StrHash hash_dense(uint64_t seed, StrHash h,
const char *str, MSize len)
static LJ_NOINLINE StrHash hash_dense_def(uint64_t seed, StrHash h,
const char *str, MSize len)
{
StrHash b = lj_bswap(lj_rol(h ^ (StrHash)(seed >> 32), 4));
if (len > 12) {
Expand Down
12 changes: 12 additions & 0 deletions src/lj_str.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,16 @@ LJ_FUNC void LJ_FASTCALL lj_str_init(lua_State *L);
#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
#define lj_str_size(len) (sizeof(GCstr) + (((len)+4) & ~(MSize)3))

#ifdef LJ_HAS_OPTIMISED_HASH
typedef StrHash (*str_sparse_hashfn) (uint64_t, const char *, MSize);
extern str_sparse_hashfn hash_sparse;

#if LUAJIT_SECURITY_STRHASH
typedef StrHash (*str_dense_hashfn) (uint64_t, StrHash, const char *, MSize);
extern str_dense_hashfn hash_dense;
#endif

extern void str_hash_init_sse42 (void);
#endif

#endif
Loading

0 comments on commit 34b63ba

Please sign in to comment.