Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamic detection of SSE42 #75

Merged
merged 1 commit into from
Oct 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -507,10 +507,16 @@ LJCORE_O= lj_assert.o lj_gc.o lj_err.o lj_char.o lj_bc.o lj_obj.o lj_buf.o \
lj_ctype.o lj_cdata.o lj_cconv.o lj_ccall.o lj_ccallback.o \
lj_carith.o lj_clib.o lj_cparse.o \
lj_lib.o lj_alloc.o lib_aux.o \
$(LJLIB_O) lib_init.o
$(LJLIB_O) lib_init.o lj_str_hash.o

ifeq (x64,$(TARGET_LJARCH))
lj_str_hash-CFLAGS = -msse4.2
endif

F_CFLAGS = $($(patsubst %.c,%-CFLAGS,$<))

LJVMCORE_O= $(LJVM_O) $(LJCORE_O)
LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o)
LJVMCORE_DYNO= $(LJVMCORE_O:.o=_dyn.o) lj_init_dyn.o

LIB_VMDEF= jit/vmdef.lua
LIB_VMDEFP= $(LIB_VMDEF)
Expand All @@ -532,7 +538,7 @@ ALL_RM= $(ALL_T) $(ALL_GEN) *.o host/*.o $(WIN_RM)
##############################################################################

# Mixed mode defaults.
TARGET_O= $(LUAJIT_A)
TARGET_O= lj_init.o $(LUAJIT_A)
TARGET_T= $(LUAJIT_T) $(LUAJIT_SO)
TARGET_DEP= $(LIB_VMDEF) $(LUAJIT_SO)

Expand Down Expand Up @@ -614,7 +620,7 @@ E= @echo
default all: $(TARGET_T)

amalg:
$(MAKE) all "LJCORE_O=ljamalg.o"
$(MAKE) all "LJCORE_O=ljamalg.o lj_str_hash.o"

clean:
$(HOST_RM) $(ALL_RM)
Expand Down Expand Up @@ -691,8 +697,8 @@ lj_folddef.h: $(BUILDVM_T) lj_opt_fold.c

%.o: %.c
$(E) "CC $@"
$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) -c -o $(@:.o=_dyn.o) $<
$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) -c -o $@ $<
$(Q)$(TARGET_DYNCC) $(TARGET_ACFLAGS) $(F_CFLAGS) -c -o $(@:.o=_dyn.o) $<
$(Q)$(TARGET_CC) $(TARGET_ACFLAGS) $(F_CFLAGS) -c -o $@ $<

%.o: %.S
$(E) "ASM $@"
Expand Down
4 changes: 4 additions & 0 deletions src/lj_arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@
#define LJ_TARGET_GC64 1
#endif

#ifdef __GNUC__
#define LJ_HAS_OPTIMISED_HASH 1
#endif

#elif LUAJIT_TARGET == LUAJIT_ARCH_ARM

#define LJ_ARCH_NAME "arm"
Expand Down
69 changes: 69 additions & 0 deletions src/lj_init.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#include <stdint.h>
#include "lj_arch.h"
#include "lj_jit.h"
#include "lj_vm.h"
#include "lj_str.h"

#if LJ_TARGET_ARM && LJ_TARGET_LINUX
#include <sys/utsname.h>
#endif

#ifdef _MSC_VER
/*
** Append a function pointer to the static constructor table executed by
** the C runtime.
** Based on https://stackoverflow.com/questions/1113409/attribute-constructor-equivalent-in-vc
** see also https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-initialization.
*/
#pragma section(".CRT$XCU",read)
#define LJ_INITIALIZER2_(f,p) \
static void f(void); \
__declspec(allocate(".CRT$XCU")) void (*f##_)(void) = f; \
__pragma(comment(linker,"/include:" p #f "_")) \
static void f(void)
#ifdef _WIN64
#define LJ_INITIALIZER(f) LJ_INITIALIZER2_(f,"")
#else
#define LJ_INITIALIZER(f) LJ_INITIALIZER2_(f,"_")
#endif

#else
#define LJ_INITIALIZER(f) static void __attribute__((constructor)) f(void)
#endif


#ifdef LJ_HAS_OPTIMISED_HASH
static void str_hash_init(uint32_t flags)
{
if (flags & JIT_F_SSE4_2)
str_hash_init_sse42 ();
}

/* CPU detection for interpreter features such as string hash function
selection. We choose to cherry-pick from lj_cpudetect and not have a single
initializer to make sure that merges with LuaJIT/LuaJIT remain
convenient. */
LJ_INITIALIZER(lj_init_cpuflags)
{
uint32_t flags = 0;
#if LJ_TARGET_X86ORX64

uint32_t vendor[4];
uint32_t features[4];
if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
flags |= ((features[2] >> 20)&1) * JIT_F_SSE4_2;
if (vendor[0] >= 7) {
uint32_t xfeatures[4];
lj_vm_cpuid(7, xfeatures);
flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
}
}

#endif

/* The reason why we initialized early: select our string hash functions. */
str_hash_init (flags);
}
#endif
1 change: 1 addition & 0 deletions src/lj_jit.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#define JIT_F_SSE3 (JIT_F_CPU << 0)
#define JIT_F_SSE4_1 (JIT_F_CPU << 1)
#define JIT_F_BMI2 (JIT_F_CPU << 2)
#define JIT_F_SSE4_2 (JIT_F_CPU << 3)


#define JIT_F_CPUSTRING "\4SSE3\6SSE4.1\4BMI2"
Expand Down
25 changes: 18 additions & 7 deletions src/lj_str.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
#include "lj_str.h"
#include "lj_char.h"
#include "lj_prng.h"
#include "x64/src/lj_str_hash_x64.h"

/* -- String helpers ------------------------------------------------------ */

Expand Down Expand Up @@ -83,9 +82,22 @@ int lj_str_haspattern(GCstr *s)

/* -- String hashing ------------------------------------------------------ */

#ifndef ARCH_HASH_SPARSE
#ifdef LJ_HAS_OPTIMISED_HASH
static StrHash hash_sparse_def (uint64_t, const char *, MSize);
str_sparse_hashfn hash_sparse = hash_sparse_def;
#if LUAJIT_SECURITY_STRHASH
static StrHash hash_dense_def(uint64_t, StrHash, const char *, MSize);
str_dense_hashfn hash_dense = hash_dense_def;
#endif
#else
#define hash_sparse hash_sparse_def
#if LUAJIT_SECURITY_STRHASH
#define hash_dense hash_dense_def
#endif
#endif

/* Keyed sparse ARX string hash. Constant time. */
static StrHash hash_sparse(uint64_t seed, const char *str, MSize len)
static StrHash hash_sparse_def(uint64_t seed, const char *str, MSize len)
{
/* Constants taken from lookup3 hash by Bob Jenkins. */
StrHash a, b, h = len ^ (StrHash)seed;
Expand All @@ -106,12 +118,11 @@ static StrHash hash_sparse(uint64_t seed, const char *str, MSize len)
h ^= b; h -= lj_rol(b, 16);
return h;
}
#endif

#if LUAJIT_SECURITY_STRHASH && !defined(ARCH_HASH_DENSE)
#if LUAJIT_SECURITY_STRHASH
/* Keyed dense ARX string hash. Linear time. */
static LJ_NOINLINE StrHash hash_dense(uint64_t seed, StrHash h,
const char *str, MSize len)
static LJ_NOINLINE StrHash hash_dense_def(uint64_t seed, StrHash h,
const char *str, MSize len)
{
StrHash b = lj_bswap(lj_rol(h ^ (StrHash)(seed >> 32), 4));
if (len > 12) {
Expand Down
12 changes: 12 additions & 0 deletions src/lj_str.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,16 @@ LJ_FUNC void LJ_FASTCALL lj_str_init(lua_State *L);
#define lj_str_newlit(L, s) (lj_str_new(L, "" s, sizeof(s)-1))
#define lj_str_size(len) (sizeof(GCstr) + (((len)+4) & ~(MSize)3))

#ifdef LJ_HAS_OPTIMISED_HASH
typedef StrHash (*str_sparse_hashfn) (uint64_t, const char *, MSize);
extern str_sparse_hashfn hash_sparse;

#if LUAJIT_SECURITY_STRHASH
typedef StrHash (*str_dense_hashfn) (uint64_t, StrHash, const char *, MSize);
extern str_dense_hashfn hash_dense;
#endif

extern void str_hash_init_sse42 (void);
#endif

#endif
Loading