Skip to content

Commit

Permalink
Improve vector reg support
Browse files Browse the repository at this point in the history
  • Loading branch information
Zoltan Herczeg committed Nov 26, 2024
1 parent 2cf8a24 commit 9da5d97
Show file tree
Hide file tree
Showing 10 changed files with 217 additions and 140 deletions.
145 changes: 64 additions & 81 deletions sljit_src/sljitLir.c

Large diffs are not rendered by default.

6 changes: 1 addition & 5 deletions sljit_src/sljitLir.h
Original file line number Diff line number Diff line change
Expand Up @@ -902,11 +902,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type);

/* Vector scratch and saved registers can be specified
by SLJIT_ENTER_VECTOR. */
#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS)
#define SLJIT_ENTER_VECTOR(regs) ((regs) << 16)
#else /* !SLJIT_SEPARATE_VECTOR_REGISTERS */
#define SLJIT_ENTER_VECTOR(regs) SLJIT_ENTER_FLOAT(regs)
#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */
#define SLJIT_ENTER_VECTOR(regs) ((regs) << 16)

/* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */
#define SLJIT_MAX_LOCAL_SIZE 1048576
Expand Down
12 changes: 8 additions & 4 deletions sljit_src/sljitNativeARM_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -1256,8 +1256,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
sljit_s32 options, sljit_s32 arg_types,
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
{
sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches);
sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds);
sljit_s32 fscratches;
sljit_s32 fsaveds;
sljit_uw imm, offset;
sljit_s32 i, tmp, size, word_arg_count;
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
Expand All @@ -1275,6 +1275,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi

scratches = ENTER_GET_REGS(scratches);
saveds = ENTER_GET_REGS(saveds);
fscratches = compiler->fscratches;
fsaveds = compiler->fsaveds;

imm = 0;
tmp = SLJIT_S0 - saveds;
Expand Down Expand Up @@ -1426,8 +1428,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
sljit_s32 options, sljit_s32 arg_types,
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
{
sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches);
sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds);
sljit_s32 fscratches;
sljit_s32 fsaveds;
sljit_s32 size;

CHECK_ERROR();
Expand All @@ -1436,6 +1438,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp

scratches = ENTER_GET_REGS(scratches);
saveds = ENTER_GET_REGS(saveds);
fscratches = compiler->fscratches;
fsaveds = compiler->fsaveds;
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);

/* Doubles are saved, so alignment is unaffected. */
Expand Down
13 changes: 9 additions & 4 deletions sljit_src/sljitNativeARM_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -1233,8 +1233,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
sljit_s32 options, sljit_s32 arg_types,
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
{
sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches);
sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds);
sljit_s32 fscratches;
sljit_s32 fsaveds;
sljit_s32 prev, fprev, saved_regs_size, i, tmp;
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
sljit_ins offs;
Expand All @@ -1245,6 +1245,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi

scratches = ENTER_GET_REGS(scratches);
saveds = ENTER_GET_REGS(saveds);
fscratches = compiler->fscratches;
fsaveds = compiler->fsaveds;

saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 2);
saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);

Expand Down Expand Up @@ -1412,8 +1415,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
sljit_s32 options, sljit_s32 arg_types,
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
{
sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches);
sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds);
sljit_s32 fscratches;
sljit_s32 fsaveds;
sljit_s32 saved_regs_size;

CHECK_ERROR();
Expand All @@ -1422,6 +1425,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp

scratches = ENTER_GET_REGS(scratches);
saveds = ENTER_GET_REGS(saveds);
fscratches = compiler->fscratches;
fsaveds = compiler->fsaveds;
saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 2);
saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);

Expand Down
12 changes: 8 additions & 4 deletions sljit_src/sljitNativeARM_T2_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -1395,8 +1395,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
sljit_s32 options, sljit_s32 arg_types,
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
{
sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches);
sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds);
sljit_s32 fscratches;
sljit_s32 fsaveds;
sljit_s32 size, i, tmp, word_arg_count;
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
sljit_uw offset;
Expand All @@ -1415,6 +1415,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi

scratches = ENTER_GET_REGS(scratches);
saveds = ENTER_GET_REGS(saveds);
fscratches = compiler->fscratches;
fsaveds = compiler->fsaveds;

tmp = SLJIT_S0 - saveds;
for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--)
Expand Down Expand Up @@ -1610,8 +1612,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
sljit_s32 options, sljit_s32 arg_types,
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
{
sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches);
sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds);
sljit_s32 fscratches;
sljit_s32 fsaveds;
sljit_s32 size;

CHECK_ERROR();
Expand All @@ -1620,6 +1622,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp

scratches = ENTER_GET_REGS(scratches);
saveds = ENTER_GET_REGS(saveds);
fscratches = compiler->fscratches;
fsaveds = compiler->fsaveds;
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);

/* Doubles are saved, so alignment is unaffected. */
Expand Down
12 changes: 8 additions & 4 deletions sljit_src/sljitNativeLOONGARCH_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -913,8 +913,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
sljit_s32 options, sljit_s32 arg_types,
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
{
sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches);
sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds);
sljit_s32 fscratches;
sljit_s32 fsaveds;
sljit_s32 i, tmp, offset;
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);

Expand All @@ -924,6 +924,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi

scratches = ENTER_GET_REGS(scratches);
saveds = ENTER_GET_REGS(saveds);
fscratches = compiler->fscratches;
fsaveds = compiler->fsaveds;
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);

Expand Down Expand Up @@ -1001,15 +1003,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
sljit_s32 options, sljit_s32 arg_types,
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
{
sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches);
sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds);
sljit_s32 fscratches;
sljit_s32 fsaveds;

CHECK_ERROR();
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, local_size));
set_set_context(compiler, options, arg_types, scratches, saveds, local_size);

scratches = ENTER_GET_REGS(scratches);
saveds = ENTER_GET_REGS(saveds);
fscratches = compiler->fscratches;
fsaveds = compiler->fsaveds;
local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);

Expand Down
6 changes: 4 additions & 2 deletions sljit_src/sljitNativeS390X.c
Original file line number Diff line number Diff line change
Expand Up @@ -1664,8 +1664,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
sljit_s32 options, sljit_s32 arg_types,
sljit_s32 scratches, sljit_s32 saveds, sljit_s32 local_size)
{
sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches);
sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds);
sljit_s32 fscratches;
sljit_s32 fsaveds;
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
sljit_s32 offset, i, tmp;

Expand All @@ -1678,6 +1678,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi

scratches = ENTER_GET_REGS(scratches);
saveds = ENTER_GET_REGS(saveds);
fscratches = compiler->fscratches;
fsaveds = compiler->fsaveds;

offset = 2 * SSIZE_OF(sw);
if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
Expand Down
12 changes: 8 additions & 4 deletions sljit_src/sljitNativeX86_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
sljit_s32 saved_regs_size, tmp, i;
#ifdef _WIN64
sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches);
sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds);
sljit_s32 fscratches;
sljit_s32 fsaveds;
sljit_s32 saved_float_regs_size;
sljit_s32 saved_float_regs_offset = 0;
sljit_s32 float_arg_count = 0;
Expand All @@ -477,6 +477,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
scratches = ENTER_GET_REGS(scratches);
#ifdef _WIN64
saveds = ENTER_GET_REGS(saveds);
fscratches = compiler->fscratches;
fsaveds = compiler->fsaveds;
#endif /* _WIN64 */

if (options & SLJIT_ENTER_REG_ARG)
Expand Down Expand Up @@ -642,8 +644,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
{
sljit_s32 saved_regs_size;
#ifdef _WIN64
sljit_s32 fscratches = ENTER_GET_FLOAT_REGS(scratches);
sljit_s32 fsaveds = ENTER_GET_FLOAT_REGS(saveds);
sljit_s32 fscratches;
sljit_s32 fsaveds;
sljit_s32 saved_float_regs_size;
#endif /* _WIN64 */

Expand All @@ -655,6 +657,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp

#ifdef _WIN64
saveds = ENTER_GET_REGS(saveds);
fscratches = compiler->fscratches;
fsaveds = compiler->fsaveds;

local_size += SLJIT_LOCALS_OFFSET;
saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg);
Expand Down
5 changes: 3 additions & 2 deletions test_src/sljitTest.c
Original file line number Diff line number Diff line change
Expand Up @@ -9290,10 +9290,11 @@ int sljit_test(int argc, char* argv[])
test_simd8();
test_simd9();
test_simd10();
test_simd11();
} else {
if (verbose)
printf("no simd available, simd tests are skipped\n");
successful_tests += 10;
successful_tests += 11;
}

if (verbose)
Expand All @@ -9307,7 +9308,7 @@ int sljit_test(int argc, char* argv[])
sljit_free_unused_memory_exec();
#endif

# define TEST_COUNT 125
# define TEST_COUNT 126

printf("SLJIT tests: ");
if (successful_tests == TEST_COUNT)
Expand Down
Loading

0 comments on commit 9da5d97

Please sign in to comment.