diff --git a/sljit_src/sljitLir.c b/sljit_src/sljitLir.c index ac726ccb..fc36a14f 100644 --- a/sljit_src/sljitLir.c +++ b/sljit_src/sljitLir.c @@ -142,6 +142,9 @@ /* Mask for sljit_emit_enter. */ #define ENTER_GET_REGS(regs) ((regs) & 0xff) #define ENTER_GET_FLOAT_REGS(regs) (((regs) >> 8) & 0xff) +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) +#define ENTER_GET_VECTOR_REGS(regs) (((regs) >> 16) & 0xff) +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */ #define SLJIT_KEPT_SAVEDS_COUNT(options) ((options) & 0x3) /* Getters for simd operations, which returns with log2(size). */ @@ -312,6 +315,12 @@ (((fscratches < SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS ? 0 : (fscratches - SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)) + \ (fsaveds)) * SSIZE_OF(type)) +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) +#define GET_SAVED_VECTOR_REGISTERS_SIZE(vscratches, vsaveds, type) \ + (((vscratches < SLJIT_NUMBER_OF_SCRATCH_VECTOR_REGISTERS ? 0 : (vscratches - SLJIT_NUMBER_OF_SCRATCH_VECTOR_REGISTERS)) + \ + (vsaveds)) * SSIZE_OF(type)) +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */ + #define ADJUST_LOCAL_OFFSET(p, i) \ if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ (i) += SLJIT_LOCALS_OFFSET; @@ -504,6 +513,10 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo compiler->saveds = -1; compiler->fscratches = -1; compiler->fsaveds = -1; +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) + compiler->vscratches = -1; + compiler->vsaveds = -1; +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */ compiler->local_size = -1; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) @@ -767,6 +780,10 @@ static SLJIT_INLINE void set_emit_enter(struct sljit_compiler *compiler, compiler->saveds = ENTER_GET_REGS(saveds); compiler->fscratches = ENTER_GET_FLOAT_REGS(scratches); compiler->fsaveds = ENTER_GET_FLOAT_REGS(saveds); +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) + compiler->vscratches = ENTER_GET_VECTOR_REGS(scratches); + compiler->vsaveds = ENTER_GET_VECTOR_REGS(saveds); +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */ #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->last_return = args & SLJIT_ARG_MASK; compiler->logical_local_size = local_size; @@ -785,6 +802,10 @@ static SLJIT_INLINE void set_set_context(struct sljit_compiler *compiler, compiler->saveds = ENTER_GET_REGS(saveds); compiler->fscratches = ENTER_GET_FLOAT_REGS(scratches); compiler->fsaveds = ENTER_GET_FLOAT_REGS(saveds); +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) + compiler->vscratches = ENTER_GET_VECTOR_REGS(scratches); + compiler->vsaveds = ENTER_GET_VECTOR_REGS(saveds); +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */ #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->last_return = args & SLJIT_ARG_MASK; compiler->logical_local_size = local_size; @@ -982,10 +1003,23 @@ static sljit_s32 function_check_dst(struct sljit_compiler *compiler, sljit_s32 p function_check_is_freg(compiler, (fr), (is_32)) static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32); +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) +#define FUNCTION_CHECK_IS_VREG(vr, is_32) \ + function_check_is_vreg(compiler, (vr), (is_32)) + +static sljit_s32 function_check_is_vreg(struct sljit_compiler *compiler, sljit_s32 vr, sljit_s32 is_32); +#else /* !SLJIT_SEPARATE_VECTOR_REGISTERS */ +#define FUNCTION_CHECK_IS_VREG(vr, is_32) \ + function_check_is_freg(compiler, (vr), (is_32)) +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */ #define FUNCTION_FCHECK(p, i, is_32) \ CHECK_ARGUMENT(function_fcheck(compiler, (p), (i), (is_32))); +#define FUNCTION_VCHECK(p, i, is_32) FUNCTION_FCHECK(p, i, is_32) + +#define FUNCTION_FVCHECK(p, i, is_32) FUNCTION_FCHECK(p, i, is_32) + static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i, sljit_s32 is_32) { if (compiler->scratches == -1) @@ -1010,6 +1044,23 @@ static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0) || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS)); } +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) +#define FUNCTION_CHECK_IS_VREG(vr, is_32) \ + function_check_is_vreg(compiler, (vr)) + +static sljit_s32 function_check_is_vreg(struct sljit_compiler *compiler, sljit_s32 vr) +{ + if (compiler->scratches == -1) + return 0; + + return (vr >= SLJIT_VR0 && vr < (SLJIT_VR0 + compiler->vscratches)) + || (vr > (SLJIT_VS0 - compiler->vsaveds) && vr <= SLJIT_VS0) + || (vr >= SLJIT_TMP_VREGISTER_BASE && vr < (SLJIT_TMP_VREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS)); +} +#else /* !SLJIT_SEPARATE_VECTOR_REGISTERS */ +#define FUNCTION_CHECK_IS_VREG(vr, is_32) \ + function_check_is_freg(compiler, (vr)) +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */ #define FUNCTION_FCHECK(p, i, is_32) \ CHECK_ARGUMENT(function_fcheck(compiler, (p), (i))); @@ -1027,6 +1078,28 @@ static sljit_s32 function_fcheck(struct sljit_compiler *compiler, sljit_s32 p, s return function_check_src_mem(compiler, p, i); } +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) +#define FUNCTION_VCHECK(p, i, is_32) \ + CHECK_ARGUMENT(function_vcheck(compiler, (p), (i))) +static sljit_s32 function_vcheck(struct sljit_compiler *compiler, sljit_s32 p, sljit_sw i) +{ + if (compiler->scratches == -1) + return 0; + + if ((p >= SLJIT_VR0 && p < SLJIT_VR0 + compiler->vscratches) + || (p > (SLJIT_VS0 - compiler->vsaveds) && p <= SLJIT_VS0) + || (p >= SLJIT_TMP_VREGISTER_BASE && p < (SLJIT_TMP_VREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS))) + return (i==0); + + return function_check_src_mem(compiler, p, i); +} +#define FUNCTION_FVCHECK(p, i, is_32) \ + CHECK_ARGUMENT(function_fcheck(compiler, (p), (i)) || function_vcheck(compiler, (p), (i))); +#else /* !SLJIT_SEPARATE_VECTOR_REGISTERS */ +#define FUNCTION_VCHECK(p, i, is_32) FUNCTION_FCHECK(p, i, is_32) +#define FUNCTION_FVCHECK(p, i, is_32) FUNCTION_FCHECK(p, i, is_32) +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */ + #endif /* SLJIT_CONFIG_ARM_32 || SLJIT_CONFIG_MIPS_32 */ #endif /* SLJIT_ARGUMENT_CHECKS */ @@ -1280,7 +1353,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil sljit_s32 real_saveds = ENTER_GET_REGS(saveds); sljit_s32 real_fscratches = ENTER_GET_FLOAT_REGS(scratches); sljit_s32 real_fsaveds = ENTER_GET_FLOAT_REGS(saveds); -#endif +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) + sljit_s32 real_vscratches = ENTER_GET_VECTOR_REGS(scratches); + sljit_s32 real_vsaveds = ENTER_GET_VECTOR_REGS(saveds); +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */ +#endif /* SLJIT_ARGUMENT_CHECKS */ SLJIT_UNUSED_ARG(compiler); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -1290,20 +1367,25 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil CHECK_ARGUMENT((options & ~SLJIT_ENTER_CPU_SPECIFIC_OPTIONS) == 0); } CHECK_ARGUMENT(SLJIT_KEPT_SAVEDS_COUNT(options) <= 3 && SLJIT_KEPT_SAVEDS_COUNT(options) <= saveds); - CHECK_ARGUMENT((scratches & ~0xffff) == 0 && (saveds & ~0xffff) == 0); + CHECK_ARGUMENT((scratches & ~0xffffff) == 0 && (saveds & ~0xffffff) == 0); CHECK_ARGUMENT(real_scratches >= 0 && real_scratches <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(real_saveds >= 0 && real_saveds <= SLJIT_NUMBER_OF_SAVED_REGISTERS); CHECK_ARGUMENT(real_scratches + real_saveds <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(real_fscratches >= 0 && real_fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(real_fsaveds >= 0 && real_fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS); CHECK_ARGUMENT(real_fscratches + real_fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) + CHECK_ARGUMENT(real_vscratches >= 0 && real_vscratches <= SLJIT_NUMBER_OF_VECTOR_REGISTERS); + CHECK_ARGUMENT(real_vsaveds >= 0 && real_vsaveds <= SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS); + CHECK_ARGUMENT(real_vscratches + real_vsaveds <= SLJIT_NUMBER_OF_VECTOR_REGISTERS); +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */ CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) <= SLJIT_ARG_TYPE_F32); CHECK_ARGUMENT(function_check_arguments(arg_types, real_scratches, (options & SLJIT_ENTER_REG_ARG) ? 0 : real_saveds, real_fscratches)); compiler->last_flags = 0; -#endif +#endif /* SLJIT_ARGUMENT_CHECKS */ #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { fprintf(compiler->verbose, " enter ret[%s", call_arg_names[arg_types & SLJIT_ARG_MASK]); @@ -1335,8 +1417,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil } #endif /* !SLJIT_CONFIG_X86 */ +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) + fprintf(compiler->verbose, " scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, vscratches:%d, vsaveds:%d, local_size:%d\n", + ENTER_GET_REGS(scratches), ENTER_GET_REGS(saveds), ENTER_GET_FLOAT_REGS(scratches), ENTER_GET_FLOAT_REGS(saveds), + ENTER_GET_VECTOR_REGS(scratches), ENTER_GET_VECTOR_REGS(saveds), local_size); +#else /* !SLJIT_SEPARATE_VECTOR_REGISTERS */ fprintf(compiler->verbose, " scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", ENTER_GET_REGS(scratches), ENTER_GET_REGS(saveds), ENTER_GET_FLOAT_REGS(scratches), ENTER_GET_FLOAT_REGS(saveds), local_size); +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */ } #endif CHECK_RETURN_OK; @@ -1351,6 +1439,10 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi sljit_s32 real_saveds = ENTER_GET_REGS(saveds); sljit_s32 real_fscratches = ENTER_GET_FLOAT_REGS(scratches); sljit_s32 real_fsaveds = ENTER_GET_FLOAT_REGS(saveds); +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) + sljit_s32 real_vscratches = ENTER_GET_VECTOR_REGS(scratches); + sljit_s32 real_vsaveds = ENTER_GET_VECTOR_REGS(saveds); +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */ #endif SLJIT_UNUSED_ARG(compiler); @@ -1368,6 +1460,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi CHECK_ARGUMENT(real_fscratches >= 0 && real_fscratches <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(real_fsaveds >= 0 && real_fsaveds <= SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS); CHECK_ARGUMENT(real_fscratches + real_fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) + CHECK_ARGUMENT(real_vscratches >= 0 && real_vscratches <= SLJIT_NUMBER_OF_VECTOR_REGISTERS); + CHECK_ARGUMENT(real_vsaveds >= 0 && real_vsaveds <= SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS); + CHECK_ARGUMENT(real_vscratches + real_vsaveds <= SLJIT_NUMBER_OF_VECTOR_REGISTERS); +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */ CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); CHECK_ARGUMENT((arg_types & SLJIT_ARG_FULL_MASK) < SLJIT_ARG_TYPE_F64); CHECK_ARGUMENT(function_check_arguments(arg_types, real_scratches, @@ -1406,8 +1503,14 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi } #endif /* !SLJIT_CONFIG_X86 */ +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) + fprintf(compiler->verbose, " scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, vscratches:%d, vsaveds:%d, local_size:%d\n", + ENTER_GET_REGS(scratches), ENTER_GET_REGS(saveds), ENTER_GET_FLOAT_REGS(scratches), ENTER_GET_FLOAT_REGS(saveds), + ENTER_GET_VECTOR_REGS(scratches), ENTER_GET_VECTOR_REGS(saveds), local_size); +#else /* !SLJIT_SEPARATE_VECTOR_REGISTERS */ fprintf(compiler->verbose, " scratches:%d, saveds:%d, fscratches:%d, fsaveds:%d, local_size:%d\n", ENTER_GET_REGS(scratches), ENTER_GET_REGS(saveds), ENTER_GET_FLOAT_REGS(scratches), ENTER_GET_FLOAT_REGS(saveds), local_size); +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */ } #endif CHECK_RETURN_OK; @@ -1878,8 +1981,15 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_register_index(sljit_s32 t if (type == SLJIT_GP_REGISTER) { CHECK_ARGUMENT((reg > 0 && reg <= SLJIT_NUMBER_OF_REGISTERS) || (reg >= SLJIT_TMP_REGISTER_BASE && reg < (SLJIT_TMP_REGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_REGISTERS))); - } else { - CHECK_ARGUMENT(type == SLJIT_FLOAT_REGISTER || ((type >> 12) == 0 || ((type >> 12) >= 3 && (type >> 12) <= 6))); + } +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) + else if (((type >> 12) == 0 || ((type >> 12) >= 3 && (type >> 12) <= 6))) { + CHECK_ARGUMENT((reg > 0 && reg <= SLJIT_NUMBER_OF_VECTOR_REGISTERS) + || (reg >= SLJIT_TMP_VREGISTER_BASE && reg < (SLJIT_TMP_VREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_VECTOR_REGISTERS))); + } +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */ + else { + CHECK_ARGUMENT(type == SLJIT_FLOAT_REGISTER || ((type >> 12) == 0 || ((type >> 12) >= 3 && (type >> 12) <= 6) || (type & (3 << 12)) || (type & (4 << 12)) || (type & (5 << 12)) || (type & (6 << 12)))); CHECK_ARGUMENT((reg > 0 && reg <= SLJIT_NUMBER_OF_FLOAT_REGISTERS) || (reg >= SLJIT_TMP_FREGISTER_BASE && reg < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS))); } @@ -1936,8 +2046,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compile CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); CHECK_ARGUMENT(SLJIT_CHECK_OPCODE(op, 0) >= SLJIT_MOV_F64 && SLJIT_CHECK_OPCODE(op, 0) <= SLJIT_ABS_F64); CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); - FUNCTION_FCHECK(src, srcw, op & SLJIT_32); - FUNCTION_FCHECK(dst, dstw, op & SLJIT_32); + FUNCTION_FVCHECK(src, srcw, op & SLJIT_32); + FUNCTION_FVCHECK(dst, dstw, op & SLJIT_32); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -2767,8 +2877,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_mov(struct sljit_com CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) <= SLJIT_SIMD_GET_REG_SIZE(type)); CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM2_SIZE(type) <= (srcdst & SLJIT_MEM) ? SLJIT_SIMD_GET_REG_SIZE(type) : 0); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(vreg, 0)); - FUNCTION_FCHECK(srcdst, srcdstw, 0); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(vreg, 0)); + FUNCTION_VCHECK(srcdst, srcdstw, 0); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -2808,13 +2918,13 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_replicate(struct slj CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(0)) == 0); CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(vreg, 0)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(vreg, 0)); if (type & SLJIT_SIMD_FLOAT) { if (src == SLJIT_IMM) { CHECK_ARGUMENT(srcw == 0); } else { - FUNCTION_FCHECK(src, srcw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2); + FUNCTION_VCHECK(src, srcw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2); } } else if (src != SLJIT_IMM) { FUNCTION_CHECK_DST(src, srcw); @@ -2859,11 +2969,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_lane_mov(struct slji CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); CHECK_ARGUMENT(!(type & SLJIT_32) || SLJIT_SIMD_GET_ELEM_SIZE(type) <= 2); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(vreg, 0)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(vreg, 0)); CHECK_ARGUMENT(lane_index >= 0 && lane_index < (1 << (SLJIT_SIMD_GET_REG_SIZE(type) - SLJIT_SIMD_GET_ELEM_SIZE(type)))); if (type & SLJIT_SIMD_FLOAT) { - FUNCTION_FCHECK(srcdst, srcdstw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2); + FUNCTION_VCHECK(srcdst, srcdstw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2); } else if ((type & SLJIT_SIMD_STORE) || srcdst != SLJIT_IMM) { FUNCTION_CHECK_DST(srcdst, srcdstw); } @@ -2907,8 +3017,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_lane_replicate(struc CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(0)) == 0); CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(vreg, 0)); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src, 0)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(vreg, 0)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(src, 0)); CHECK_ARGUMENT(src_lane_index >= 0 && src_lane_index < (1 << (SLJIT_SIMD_GET_REG_SIZE(type) - SLJIT_SIMD_GET_ELEM_SIZE(type)))); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) @@ -2945,8 +3055,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_extend(struct sljit_ CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM2_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_ELEM2_SIZE(type)); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(vreg, 0)); - FUNCTION_FCHECK(src, srcw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(vreg, 0)); + FUNCTION_VCHECK(src, srcw, SLJIT_SIMD_GET_ELEM_SIZE(type) == 2); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -2983,7 +3093,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_sign(struct sljit_co CHECK_ARGUMENT((type & SLJIT_SIMD_TYPE_MASK(SLJIT_32)) == SLJIT_SIMD_STORE); CHECK_ARGUMENT(SLJIT_SIMD_CHECK_REG(type)); CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) < SLJIT_SIMD_GET_REG_SIZE(type)); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(vreg, 0)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(vreg, 0)); FUNCTION_CHECK_DST(dst, dstw); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) @@ -3020,9 +3130,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_simd_op2(struct sljit_com CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM_SIZE(type) <= SLJIT_SIMD_GET_REG_SIZE(type)); CHECK_ARGUMENT(SLJIT_SIMD_GET_OPCODE(type) != SLJIT_SIMD_OP2_SHUFFLE || (SLJIT_SIMD_GET_ELEM_SIZE(type) == 0 && !(type & SLJIT_SIMD_FLOAT))); CHECK_ARGUMENT(SLJIT_SIMD_GET_ELEM2_SIZE(type) <= (src2 & SLJIT_MEM) ? SLJIT_SIMD_GET_REG_SIZE(type) : 0); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(dst_vreg, 0)); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_FREG(src1_vreg, 0)); - FUNCTION_FCHECK(src2, src2w, 0); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(dst_vreg, 0)); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_VREG(src1_vreg, 0)); + FUNCTION_VCHECK(src2, src2w, 0); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { diff --git a/sljit_src/sljitLir.h b/sljit_src/sljitLir.h index be0d742f..ef8a8688 100644 --- a/sljit_src/sljitLir.h +++ b/sljit_src/sljitLir.h @@ -529,6 +529,12 @@ struct sljit_compiler { sljit_s32 fscratches; /* Available float saved registers. */ sljit_s32 fsaveds; +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) + /* Available vector scratch registers. */ + sljit_s32 vscratches; + /* Available vector saved registers. */ + sljit_s32 vsaveds; +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */ /* Local stack size. */ sljit_s32 local_size; /* Maximum code size. */ @@ -894,6 +900,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type); specified by SLJIT_ENTER_FLOAT. */ #define SLJIT_ENTER_FLOAT(regs) ((regs) << 8) +/* Vector scratch and saved registers can be specified + by SLJIT_ENTER_VECTOR. */ +#if (defined SLJIT_SEPARATE_VECTOR_REGISTERS && SLJIT_SEPARATE_VECTOR_REGISTERS) +#define SLJIT_ENTER_VECTOR(regs) ((regs) << 16) +#else /* !SLJIT_SEPARATE_VECTOR_REGISTERS */ +#define SLJIT_ENTER_VECTOR(regs) SLJIT_ENTER_FLOAT(regs) +#endif /* SLJIT_SEPARATE_VECTOR_REGISTERS */ + /* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */ #define SLJIT_MAX_LOCAL_SIZE 1048576 diff --git a/sljit_src/sljitNativeRISCV_common.c b/sljit_src/sljitNativeRISCV_common.c index 36f4238f..2e9684cb 100644 --- a/sljit_src/sljitNativeRISCV_common.c +++ b/sljit_src/sljitNativeRISCV_common.c @@ -2399,10 +2399,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, slji if (type == SLJIT_GP_REGISTER) return reg_map[reg]; - if (type != SLJIT_FLOAT_REGISTER) - return -1; + if (type == SLJIT_FLOAT_REGISTER) + return freg_map[reg]; - return freg_map[reg]; + return vreg_map[reg]; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, diff --git a/test_src/sljitTestSimd.h b/test_src/sljitTestSimd.h index f24779a6..78a49384 100644 --- a/test_src/sljitTestSimd.h +++ b/test_src/sljitTestSimd.h @@ -79,7 +79,7 @@ static void test_simd1(void) sljit_u8 supported[1]; sljit_u8* buf; sljit_u8 data[63 + 880]; - sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5; + sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5; if (verbose) printf("Run test_simd1\n"); @@ -113,7 +113,7 @@ static void test_simd1(void) simd_set(buf + 704, 85, 32); simd_set(buf + 801, 215, 32); - sljit_emit_enter(compiler, options, SLJIT_ARGS1V(P), 2 | SLJIT_ENTER_FLOAT(6), 2 | SLJIT_ENTER_FLOAT(SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? 2 : 0), 64); + sljit_emit_enter(compiler, options, SLJIT_ARGS1V(P), 2 | SLJIT_ENTER_VECTOR(6), 2 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? 2 : 0), 64); type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_MEM_ALIGNED_128; sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0); @@ -302,7 +302,7 @@ static void test_simd2(void) sljit_u32 f32_result = 0; sljit_sw result[6]; sljit_s32 result32[5]; - sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5; + sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5; if (verbose) printf("Run test_simd2\n"); @@ -327,7 +327,7 @@ static void test_simd2(void) compiler = sljit_create_compiler(NULL); FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, options, SLJIT_ARGS1V(P), 4 | SLJIT_ENTER_FLOAT(6), 4 | SLJIT_ENTER_FLOAT(SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? 2 : 0), 16); + sljit_emit_enter(compiler, options, SLJIT_ARGS1V(P), 4 | SLJIT_ENTER_VECTOR(6), 4 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? 2 : 0), 16); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)tmp - 100000); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)tmp + 1000); sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 100000 / 2); @@ -727,7 +727,7 @@ static void test_simd3(void) sljit_u8 supported[1]; sljit_u8* buf; sljit_u8 data[63 + 768]; - sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5; + sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5; if (verbose) printf("Run test_simd3\n"); @@ -746,7 +746,7 @@ static void test_simd3(void) compiler = sljit_create_compiler(NULL); FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, options, SLJIT_ARGS1V(P), 4 | SLJIT_ENTER_FLOAT(6), 4 | SLJIT_ENTER_FLOAT(SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? 2 : 0), 16); + sljit_emit_enter(compiler, options, SLJIT_ARGS1V(P), 4 | SLJIT_ENTER_VECTOR(6), 4 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? 2 : 0), 16); type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8; sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 32); @@ -1026,7 +1026,7 @@ static void test_simd4(void) sljit_u8 supported[1]; sljit_u8* buf; sljit_u8 data[63 + 992]; - sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5; + sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5; if (verbose) printf("Run test_simd4\n"); @@ -1045,7 +1045,7 @@ static void test_simd4(void) compiler = sljit_create_compiler(NULL); FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, options, SLJIT_ARGS1V(P), 4 | SLJIT_ENTER_FLOAT(6), 4 | SLJIT_ENTER_FLOAT(SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? 2 : 0), 16); + sljit_emit_enter(compiler, options, SLJIT_ARGS1V(P), 4 | SLJIT_ENTER_VECTOR(6), 4 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? 2 : 0), 16); type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8; sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0); @@ -1339,7 +1339,7 @@ static void test_simd5(void) sljit_u8 supported[1]; sljit_u8* buf; sljit_u8 data[63 + 672]; - sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5; + sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5; if (verbose) printf("Run test_simd5\n"); @@ -1358,7 +1358,7 @@ static void test_simd5(void) compiler = sljit_create_compiler(NULL); FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, options, SLJIT_ARGS1V(P), 4 | SLJIT_ENTER_FLOAT(6), 4 | SLJIT_ENTER_FLOAT(SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? 2 : 0), 16); + sljit_emit_enter(compiler, options, SLJIT_ARGS1V(P), 4 | SLJIT_ENTER_VECTOR(6), 4 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? 2 : 0), 16); sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 100000); sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 10000); @@ -1704,7 +1704,7 @@ static void test_simd6(void) sljit_u8 supported[1]; sljit_u8* buf; sljit_u8 data[63 + 1088]; - sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5; + sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5; if (verbose) printf("Run test_simd6\n"); @@ -1732,7 +1732,7 @@ static void test_simd6(void) compiler = sljit_create_compiler(NULL); FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, options, SLJIT_ARGS1V(P), 4 | SLJIT_ENTER_FLOAT(6), 4 | SLJIT_ENTER_FLOAT(SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? 2 : 0), 32); + sljit_emit_enter(compiler, options, SLJIT_ARGS1V(P), 4 | SLJIT_ENTER_VECTOR(6), 4 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? 2 : 0), 32); type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_EXTEND_16; sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 128); @@ -2059,7 +2059,7 @@ static void test_simd7(void) sljit_u8 supported[1]; sljit_u8* buf; sljit_u8 data[63 + 288]; - sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5; + sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5; sljit_uw resw[9]; sljit_u32 res32[7]; @@ -2097,7 +2097,7 @@ static void test_simd7(void) compiler = sljit_create_compiler(NULL); FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, options, SLJIT_ARGS3V(P, P, P), 4 | SLJIT_ENTER_FLOAT(6), 4 | SLJIT_ENTER_FLOAT(SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? 2 : 0), 16); + sljit_emit_enter(compiler, options, SLJIT_ARGS3V(P, P, P), 4 | SLJIT_ENTER_VECTOR(6), 4 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? 2 : 0), 16); type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8; sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0); @@ -2250,7 +2250,7 @@ static void test_simd8(void) sljit_u8 supported[1]; sljit_u8* buf; sljit_u8 data[63 + 1024]; - sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? SLJIT_VS1 : SLJIT_VR5; + sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? SLJIT_VS1 : SLJIT_VR5; if (verbose) printf("Run test_simd8\n"); @@ -2269,7 +2269,7 @@ static void test_simd8(void) compiler = sljit_create_compiler(NULL); FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, options, SLJIT_ARGS3V(P, P, P), 4 | SLJIT_ENTER_FLOAT(6), 4 | SLJIT_ENTER_FLOAT(SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? 2 : 0), 16); + sljit_emit_enter(compiler, options, SLJIT_ARGS3V(P, P, P), 4 | SLJIT_ENTER_VECTOR(6), 4 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? 2 : 0), 16); type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8; sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0); @@ -2424,7 +2424,7 @@ static void test_simd9(void) compiler = sljit_create_compiler(NULL); FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, options, SLJIT_ARGS2V(P, P), 4 | SLJIT_ENTER_FLOAT(6), 4, 16); + sljit_emit_enter(compiler, options, SLJIT_ARGS2V(P, P), 4 | SLJIT_ENTER_VECTOR(6), 4, 16); type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_MEM_ALIGNED_128; sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, WCONST(0x1ca0ca0ca0ca0ca0, 0x1ca0ca0c)); @@ -2522,7 +2522,7 @@ static void test_simd10(void) sljit_s32 i, type; sljit_u8* buf; sljit_u8 data[63 + 288]; - sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? SLJIT_VS1 : SLJIT_VR5; + sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? SLJIT_VS1 : SLJIT_VR5; if (verbose) printf("Run test_simd10\n"); @@ -2545,7 +2545,7 @@ static void test_simd10(void) compiler = sljit_create_compiler(NULL); FAILED(!compiler, "cannot create compiler\n"); - sljit_emit_enter(compiler, options, SLJIT_ARGS3V(P, P, P), 4 | SLJIT_ENTER_FLOAT(6), 4 | SLJIT_ENTER_FLOAT(SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0 ? 2 : 0), 32); + sljit_emit_enter(compiler, options, SLJIT_ARGS3V(P, P, P), 4 | SLJIT_ENTER_VECTOR(6), 4 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? 2 : 0), 32); type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_MEM_ALIGNED_64; sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);