diff --git a/src/jit/ByteCodeParser.cpp b/src/jit/ByteCodeParser.cpp index 67b2c28c1..9c3943569 100644 --- a/src/jit/ByteCodeParser.cpp +++ b/src/jit/ByteCodeParser.cpp @@ -257,8 +257,6 @@ static bool isFloatGlobal(uint32_t globalIndex, Module* module) OL2(OTV128ToI32, /* SD */ V128 | TMP, I32) \ OL4(OTOp3V128, /* SSSD */ V128 | TMP, V128 | TMP, V128 | NOTMP, V128 | TMP | S2) \ OL2(OTExtractLaneI64, /* SD */ V128 | TMP, I64) \ - OL2(OTExtractLaneF32, /* SD */ V128 | TMP, F32 | S0) \ - OL2(OTExtractLaneF64, /* SD */ V128 | TMP, F64 | S0) \ OL3(OTReplaceLaneI32, /* SSD */ V128 | NOTMP, I32, V128 | TMP | S0) \ OL3(OTReplaceLaneI64, /* SSD */ V128 | NOTMP, I64, V128 | TMP | S0) \ OL3(OTReplaceLaneF32, /* SSD */ V128 | NOTMP, F32 | NOTMP, V128 | TMP | S0) \ @@ -273,6 +271,8 @@ static bool isFloatGlobal(uint32_t globalIndex, Module* module) OL4(OTOp2V128Tmp, /* SSDT */ V128 | NOTMP, V128 | TMP, V128 | TMP | S0, V128) \ OL3(OTOp2V128Rev, /* SSD */ V128 | TMP, V128 | NOTMP, V128 | TMP | S1) \ OL5(OTOp3DotAddV128, /* SSSDT */ V128 | TMP, V128 | TMP, V128 | NOTMP, V128 | TMP | S2, V128) \ + OL2(OTExtractLaneF32, /* SD */ V128 | TMP, F32 | S0) \ + OL2(OTExtractLaneF64, /* SD */ V128 | TMP, F64 | S0) \ OL3(OTShuffleV128, /* SSD */ V128 | NOTMP, V128 | NOTMP, V128 | TMP | S0) \ OL3(OTPopcntV128, /* SDT */ V128 | NOTMP, V128 | TMP | S0, V128) \ OL3(OTShiftV128, /* SSD */ V128 | NOTMP, I32, V128 | TMP | S0) \ @@ -289,6 +289,8 @@ static bool isFloatGlobal(uint32_t globalIndex, Module* module) #define OPERAND_TYPE_LIST_SIMD_ARCH \ OL3(OTOp2V128, /* SSD */ V128 | TMP, V128 | TMP, V128 | TMP | S0 | S1) \ OL3(OTPMinMaxV128, /* SSD */ V128 | TMP, V128 | TMP, V128 | TMP) \ + OL2(OTExtractLaneF32, /* SD */ V128 | TMP, F32 | S0) \ + OL2(OTExtractLaneF64, /* SD */ V128 | TMP, F64 | S0) \ OL3(OTShuffleV128, /* SSD */ V128 | NOTMP, V128 | NOTMP, V128 | TMP | S0 | S1) \ OL3(OTShiftV128, /* SSD */ V128 | NOTMP, I32, V128 | TMP | S0) @@ -309,6 +311,8 @@ static bool isFloatGlobal(uint32_t globalIndex, Module* module) OL2(OTOp1V128CB, /* SD */ V128 | NOTMP, V128 | NOTMP) \ OL3(OTOp2V128, /* SSD */ V128 | TMP, V128 | TMP, V128 | TMP | S0 | S1) \ OL3(OTMinMaxV128, /* SSD */ V128 | NOTMP, V128 | NOTMP, V128 | NOTMP) \ + OL2(OTExtractLaneF32, /* SD */ V128 | TMP, F32 | S0) \ + OL2(OTExtractLaneF64, /* SD */ V128 | TMP, F64 | S0) \ OL3(OTSwizzleV128, /* SSD */ V128 | TMP, V128 | NOTMP, V128 | TMP | S1) \ OL3(OTShuffleV128, /* SSD */ V128 | TMP, V128 | TMP, V128 | TMP) \ OL3(OTShiftV128, /* SSD */ V128 | NOTMP, I32, V128 | TMP | S0) @@ -328,6 +332,8 @@ static bool isFloatGlobal(uint32_t globalIndex, Module* module) OL2(OTOp1V128CB, /* SD */ V128 | NOTMP, V128 | NOTMP) \ OL3(OTOp2V128, /* SSD */ V128 | TMP, V128 | TMP, V128 | TMP | S0 | S1) \ OL3(OTOp1V128Tmp, /* SDT */ V128 | NOTMP, V128 | TMP | S0, V128) \ + OL2(OTExtractLaneF32, /* SD */ V128 | TMP, F32) \ + OL2(OTExtractLaneF64, /* SD */ V128 | TMP, F64) \ OL3(OTSwizzleV128, /* SSD */ V128 | TMP, V128 | NOTMP, V128 | TMP | S1) \ OL3(OTShuffleV128, /* SSD */ V128 | TMP, V128 | TMP, V128 | TMP) \ OL3(OTShiftV128, /* SSD */ V128 | NOTMP, I32, V128 | TMP | S0) diff --git a/src/jit/SimdRiscvInl.h b/src/jit/SimdRiscvInl.h index bb8f4e5cf..9cd10905a 100644 --- a/src/jit/SimdRiscvInl.h +++ b/src/jit/SimdRiscvInl.h @@ -77,6 +77,7 @@ enum TypeOpcode : uint32_t { vminu_vv = InstructionType::opivv | OPCODE(0x4), vmseq_vi = InstructionType::opivi | OPCODE(0x18), vmseq_vv = InstructionType::opivv | OPCODE(0x18), + vmsgt_vi = InstructionType::opivi | OPCODE(0x1f), vmsle_vv = InstructionType::opivv | OPCODE(0x1d), vmsleu_vv = InstructionType::opivv | OPCODE(0x1c), vmslt_vv = InstructionType::opivv | OPCODE(0x1b), @@ -112,6 +113,7 @@ enum TypeOpcode : uint32_t { vssub_vv = InstructionType::opivv | OPCODE(0x23), vssubu_vv = InstructionType::opivv | OPCODE(0x22), vsub_vv = InstructionType::opivv | OPCODE(0x2), + vsub_vx = InstructionType::opivx | OPCODE(0x2), vwmul_vv = InstructionType::opmvv | OPCODE(0x3b), vwmulu_vv = InstructionType::opmvv | OPCODE(0x38), vxor_vi = InstructionType::opivi | OPCODE(0xb), @@ -418,13 +420,12 @@ static void simdEmitPopcnt(sljit_compiler* compiler, sljit_s32 type, sljit_s32 r static void simdEmitSwizzle(sljit_compiler* compiler, sljit_s32 type, sljit_s32 rd, sljit_s32 rn, sljit_s32 rm) { - sljit_s32 tmp = SLJIT_TMP_DEST_VREG; - if (rd == rn) { - simdEmitTypedOp(compiler, type, SimdOp::vrgather_vv, tmp, rn, rm); - simdEmitOp(compiler, SimdOp::vmv_vv, rd, 0, tmp); - } else { - simdEmitTypedOp(compiler, type, SimdOp::vrgather_vv, rd, rn, rm); - } + sljit_s32 tmp1 = SLJIT_TMP_DEST_VREG; + sljit_s32 tmp2 = SLJIT_VR0; + + simdEmitTypedOp(compiler, type, SimdOp::vrgather_vv, tmp1, rn, rm); + simdEmitOp(compiler, SimdOp::vmsgt_vi, tmp2, rm, 0xf, SimdOp::rmIsImm); + simdEmitOp(compiler, SimdOp::vmerge_vi, rd, tmp1, 0, SimdOp::rmIsImm); } static void emitUnarySIMD(sljit_compiler* compiler, Instruction* instr) @@ -1309,6 +1310,32 @@ static void emitSelectSIMD(sljit_compiler* compiler, Instruction* instr) static void emitShuffleSIMD(sljit_compiler* compiler, Instruction* instr) { + Operand* operands = instr->operands(); + JITArg args[3]; + + simdOperandToArg(compiler, operands, args[0], SLJIT_SIMD_ELEM_128, SLJIT_TMP_FR0); + simdOperandToArg(compiler, operands + 1, args[1], SLJIT_SIMD_ELEM_128, SLJIT_TMP_FR1); + args[2].set(operands + 2); + sljit_s32 dst = GET_TARGET_REG(args[2].arg, instr->requiredReg(0)); + + I8X16Shuffle* shuffle = reinterpret_cast(instr->byteCode()); + sljit_s32 tmp1 = SLJIT_TMP_DEST_VREG; + sljit_s32 tmp2 = SLJIT_VR0; + + sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8, tmp2, + SLJIT_MEM0(), reinterpret_cast(shuffle->value())); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 16); + + simdEmitTypedOp(compiler, SLJIT_SIMD_ELEM_8, SimdOp::vrgather_vv, tmp1, args[0].arg, tmp2); + simdEmitOp(compiler, SimdOp::vsub_vx, tmp2, tmp2, SLJIT_TMP_DEST_REG, SimdOp::rmIsGpr); + simdEmitOp(compiler, SimdOp::vrgather_vv, dst, args[1].arg, tmp2); + + simdEmitOp(compiler, SimdOp::vmsgt_vi, tmp2, tmp2, 0x1f, SimdOp::rmIsImm); + simdEmitOp(compiler, SimdOp::vmerge_vv, dst, tmp1, dst); + + if (args[2].arg != dst) { + sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_128, dst, args[2].arg, args[2].argw); + } } static void emitShiftSIMD(sljit_compiler* compiler, Instruction* instr)