diff --git a/src/arm/assembler-arm.cc b/src/arm/assembler-arm.cc index 32c60d1f8b55..2cc173cf37bd 100644 --- a/src/arm/assembler-arm.cc +++ b/src/arm/assembler-arm.cc @@ -4486,13 +4486,16 @@ void Assembler::vrsqrts(QwNeonRegister dst, QwNeonRegister src1, emit(EncodeNeonBinOp(VRSQRTS, dst, src1, src2)); } -enum NeonPairwiseOp { VPMIN, VPMAX }; +enum NeonPairwiseOp { VPADD, VPMIN, VPMAX }; static Instr EncodeNeonPairwiseOp(NeonPairwiseOp op, NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1, DwVfpRegister src2) { int op_encoding = 0; switch (op) { + case VPADD: + op_encoding = 0xB * B8 | B4; + break; case VPMIN: op_encoding = 0xA * B8 | B4; break; @@ -4515,6 +4518,30 @@ static Instr EncodeNeonPairwiseOp(NeonPairwiseOp op, NeonDataType dt, n * B7 | m * B5 | vm | op_encoding; } +void Assembler::vpadd(DwVfpRegister dst, DwVfpRegister src1, + DwVfpRegister src2) { + DCHECK(IsEnabled(NEON)); + // Dd = vpadd(Dn, Dm) SIMD integer pairwise ADD. + // Instruction details available in ARM DDI 0406C.b, A8-982. + int vd, d; + dst.split_code(&vd, &d); + int vn, n; + src1.split_code(&vn, &n); + int vm, m; + src2.split_code(&vm, &m); + + emit(0x1E6U * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 | + m * B5 | vm); +} + +void Assembler::vpadd(NeonSize size, DwVfpRegister dst, DwVfpRegister src1, + DwVfpRegister src2) { + DCHECK(IsEnabled(NEON)); + // Dd = vpadd(Dn, Dm) SIMD integer pairwise ADD. + // Instruction details available in ARM DDI 0406C.b, A8-980. + emit(EncodeNeonPairwiseOp(VPADD, NeonSizeToDatatype(size), dst, src1, src2)); +} + void Assembler::vpmin(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1, DwVfpRegister src2) { DCHECK(IsEnabled(NEON)); diff --git a/src/arm/assembler-arm.h b/src/arm/assembler-arm.h index f16aee6e715d..bc81f3172909 100644 --- a/src/arm/assembler-arm.h +++ b/src/arm/assembler-arm.h @@ -1371,6 +1371,9 @@ class Assembler : public AssemblerBase { void vmax(QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2); void vmax(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1, QwNeonRegister src2); + void vpadd(DwVfpRegister dst, DwVfpRegister src1, DwVfpRegister src2); + void vpadd(NeonSize size, DwVfpRegister dst, DwVfpRegister src1, + DwVfpRegister src2); void vpmin(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1, DwVfpRegister src2); void vpmax(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1, diff --git a/src/arm/constants-arm.h b/src/arm/constants-arm.h index 0b86f3e149d0..338ab40bae5f 100644 --- a/src/arm/constants-arm.h +++ b/src/arm/constants-arm.h @@ -324,6 +324,8 @@ enum LFlag { Short = 0 << 22 // Short load/store coprocessor. }; +// Neon sizes. +enum NeonSize { Neon8 = 0x0, Neon16 = 0x1, Neon32 = 0x2, Neon64 = 0x3 }; // NEON data type enum NeonDataType { @@ -339,6 +341,11 @@ enum NeonDataType { inline int NeonU(NeonDataType dt) { return static_cast(dt) >> 2; } inline int NeonSz(NeonDataType dt) { return static_cast(dt) & 0x3; } +// Convert sizes to data types (U bit is clear). +inline NeonDataType NeonSizeToDatatype(NeonSize size) { + return static_cast(size); +} + enum NeonListType { nlt_1 = 0x7, nlt_2 = 0xA, @@ -346,13 +353,6 @@ enum NeonListType { nlt_4 = 0x2 }; -enum NeonSize { - Neon8 = 0x0, - Neon16 = 0x1, - Neon32 = 0x2, - Neon64 = 0x3 -}; - // ----------------------------------------------------------------------------- // Supervisor Call (svc) specific support. diff --git a/src/arm/disasm-arm.cc b/src/arm/disasm-arm.cc index c9e7b1844b90..225cd7b41197 100644 --- a/src/arm/disasm-arm.cc +++ b/src/arm/disasm-arm.cc @@ -1950,6 +1950,13 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { op, size, Vd, Vn, Vm); break; } + case 0xb: { + // vpadd.i Dd, Dm, Dn. + out_buffer_pos_ += + SNPrintF(out_buffer_ + out_buffer_pos_, "vpadd.i%d d%d, d%d, d%d", + size, Vd, Vn, Vm); + break; + } case 0xd: { if (instr->Bit(4) == 0) { const char* op = (instr->Bits(21, 20) == 0) ? "vadd" : "vsub"; @@ -2130,10 +2137,16 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) { break; } case 0xd: { - if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) { - // vmul.f32 Qd, Qn, Qm + if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 && + instr->Bit(4) == 1) { + // vmul.f32 Qd, Qm, Qn out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, "vmul.f32 q%d, q%d, q%d", Vd, Vn, Vm); + } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 0 && + instr->Bit(4) == 0) { + // vpadd.f32 Dd, Dm, Dn. + out_buffer_pos_ += SNPrintF(out_buffer_ + out_buffer_pos_, + "vpadd.f32 d%d, d%d, d%d", Vd, Vn, Vm); } else { Unknown(instr); } diff --git a/src/arm/simulator-arm.cc b/src/arm/simulator-arm.cc index cfc5e355af4a..2f43121a01cb 100644 --- a/src/arm/simulator-arm.cc +++ b/src/arm/simulator-arm.cc @@ -4278,6 +4278,20 @@ void PairwiseMinMax(Simulator* simulator, int Vd, int Vm, int Vn, bool min) { simulator->set_neon_register(Vd, dst); } +template +void PairwiseAdd(Simulator* simulator, int Vd, int Vm, int Vn) { + static const int kElems = kDoubleSize / sizeof(T); + static const int kPairs = kElems / 2; + T dst[kElems], src1[kElems], src2[kElems]; + simulator->get_neon_register(Vn, src1); + simulator->get_neon_register(Vm, src2); + for (int i = 0; i < kPairs; i++) { + dst[i] = src1[i * 2] + src1[i * 2 + 1]; + dst[i + kPairs] = src2[i * 2] + src2[i * 2 + 1]; + } + simulator->set_neon_register(Vd, dst); +} + void Simulator::DecodeSpecialCondition(Instruction* instr) { switch (instr->SpecialValue()) { case 4: { @@ -4489,6 +4503,25 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { } break; } + case 0xb: { + // vpadd.i Dd, Dm, Dn. + NeonSize size = static_cast(instr->Bits(21, 20)); + switch (size) { + case Neon8: + PairwiseAdd(this, Vd, Vm, Vn); + break; + case Neon16: + PairwiseAdd(this, Vd, Vm, Vn); + break; + case Neon32: + PairwiseAdd(this, Vd, Vm, Vn); + break; + default: + UNREACHABLE(); + break; + } + break; + } case 0xd: { if (instr->Bit(4) == 0) { float src1[4], src2[4]; @@ -4837,7 +4870,8 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { break; } case 0xd: { - if (instr->Bit(21) == 0 && instr->Bit(6) == 1 && instr->Bit(4) == 1) { + if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 1 && + instr->Bit(4) == 1) { // vmul.f32 Qd, Qn, Qm float src1[4], src2[4]; get_neon_register(Vn, src1); @@ -4846,6 +4880,10 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) { src1[i] = src1[i] * src2[i]; } set_neon_register(Vd, src1); + } else if (instr->Bits(21, 20) == 0 && instr->Bit(6) == 0 && + instr->Bit(4) == 0) { + // vpadd.f32 Dd, Dn, Dm + PairwiseAdd(this, Vd, Vm, Vn); } else { UNIMPLEMENTED(); } diff --git a/src/compiler/arm/code-generator-arm.cc b/src/compiler/arm/code-generator-arm.cc index e8c94eab889a..331a866662e3 100644 --- a/src/compiler/arm/code-generator-arm.cc +++ b/src/compiler/arm/code-generator-arm.cc @@ -496,6 +496,41 @@ Condition FlagsConditionToCondition(FlagsCondition condition) { DCHECK_EQ(LeaveCC, i.OutputSBit()); \ } while (0) +#define ASSEMBLE_NEON_NARROWING_OP(dt) \ + do { \ + Simd128Register dst = i.OutputSimd128Register(), \ + src0 = i.InputSimd128Register(0), \ + src1 = i.InputSimd128Register(1); \ + if (dst.is(src0) && dst.is(src1)) { \ + __ vqmovn(dt, dst.low(), src0); \ + __ vmov(dst.high(), dst.low()); \ + } else if (dst.is(src0)) { \ + __ vqmovn(dt, dst.low(), src0); \ + __ vqmovn(dt, dst.high(), src1); \ + } else { \ + __ vqmovn(dt, dst.high(), src1); \ + __ vqmovn(dt, dst.low(), src0); \ + } \ + } while (0) + +#define ASSEMBLE_NEON_PAIRWISE_OP(op, size) \ + do { \ + Simd128Register dst = i.OutputSimd128Register(), \ + src0 = i.InputSimd128Register(0), \ + src1 = i.InputSimd128Register(1); \ + if (dst.is(src0)) { \ + __ op(size, dst.low(), src0.low(), src0.high()); \ + if (dst.is(src1)) { \ + __ vmov(dst.high(), dst.low()); \ + } else { \ + __ op(size, dst.high(), src1.low(), src1.high()); \ + } \ + } else { \ + __ op(size, dst.high(), src1.low(), src1.high()); \ + __ op(size, dst.low(), src0.low(), src0.high()); \ + } \ + } while (0) + void CodeGenerator::AssembleDeconstructFrame() { __ LeaveFrame(StackFrame::MANUAL); unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset()); @@ -1611,6 +1646,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(1)); break; } + case kArmF32x4AddHoriz: { + Simd128Register dst = i.OutputSimd128Register(), + src0 = i.InputSimd128Register(0), + src1 = i.InputSimd128Register(1); + // Make sure we don't overwrite source data before it's used. + if (dst.is(src0)) { + __ vpadd(dst.low(), src0.low(), src0.high()); + if (dst.is(src1)) { + __ vmov(dst.high(), dst.low()); + } else { + __ vpadd(dst.high(), src1.low(), src1.high()); + } + } else { + __ vpadd(dst.high(), src1.low(), src1.high()); + __ vpadd(dst.low(), src0.low(), src0.high()); + } + break; + } case kArmF32x4Sub: { __ vsub(i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputSimd128Register(1)); @@ -1699,6 +1752,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(1)); break; } + case kArmI32x4AddHoriz: + ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon32); + break; case kArmI32x4Sub: { __ vsub(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputSimd128Register(1)); @@ -1818,25 +1874,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputInt4(1)); break; } - case kArmI16x8SConvertI32x4: { - Simd128Register dst = i.OutputSimd128Register(), - src0 = i.InputSimd128Register(0), - src1 = i.InputSimd128Register(1); - // Take care not to overwrite a source register before it's used. - if (dst.is(src0) && dst.is(src1)) { - __ vqmovn(NeonS16, dst.low(), src0); - __ vmov(dst.high(), dst.low()); - } else if (dst.is(src0)) { - // dst is src0, so narrow src0 first. - __ vqmovn(NeonS16, dst.low(), src0); - __ vqmovn(NeonS16, dst.high(), src1); - } else { - // dst may alias src1, so narrow src1 first. - __ vqmovn(NeonS16, dst.high(), src1); - __ vqmovn(NeonS16, dst.low(), src0); - } + case kArmI16x8SConvertI32x4: + ASSEMBLE_NEON_NARROWING_OP(NeonS16); break; - } case kArmI16x8Add: { __ vadd(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputSimd128Register(1)); @@ -1847,6 +1887,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputSimd128Register(1)); break; } + case kArmI16x8AddHoriz: + ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon16); + break; case kArmI16x8Sub: { __ vsub(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputSimd128Register(1)); @@ -1909,25 +1952,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputInt4(1)); break; } - case kArmI16x8UConvertI32x4: { - Simd128Register dst = i.OutputSimd128Register(), - src0 = i.InputSimd128Register(0), - src1 = i.InputSimd128Register(1); - // Take care not to overwrite a source register before it's used. - if (dst.is(src0) && dst.is(src1)) { - __ vqmovn(NeonU16, dst.low(), src0); - __ vmov(dst.high(), dst.low()); - } else if (dst.is(src0)) { - // dst is src0, so narrow src0 first. - __ vqmovn(NeonU16, dst.low(), src0); - __ vqmovn(NeonU16, dst.high(), src1); - } else { - // dst may alias src1, so narrow src1 first. - __ vqmovn(NeonU16, dst.high(), src1); - __ vqmovn(NeonU16, dst.low(), src0); - } + case kArmI16x8UConvertI32x4: + ASSEMBLE_NEON_NARROWING_OP(NeonU16); break; - } case kArmI16x8AddSaturateU: { __ vqadd(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputSimd128Register(1)); @@ -1986,25 +2013,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputInt3(1)); break; } - case kArmI8x16SConvertI16x8: { - Simd128Register dst = i.OutputSimd128Register(), - src0 = i.InputSimd128Register(0), - src1 = i.InputSimd128Register(1); - // Take care not to overwrite a source register before it's used. - if (dst.is(src0) && dst.is(src1)) { - __ vqmovn(NeonS8, dst.low(), src0); - __ vmov(dst.high(), dst.low()); - } else if (dst.is(src0)) { - // dst is src0, so narrow src0 first. - __ vqmovn(NeonS8, dst.low(), src0); - __ vqmovn(NeonS8, dst.high(), src1); - } else { - // dst may alias src1, so narrow src1 first. - __ vqmovn(NeonS8, dst.high(), src1); - __ vqmovn(NeonS8, dst.low(), src0); - } + case kArmI8x16SConvertI16x8: + ASSEMBLE_NEON_NARROWING_OP(NeonS8); break; - } case kArmI8x16Add: { __ vadd(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputSimd128Register(1)); @@ -2066,25 +2077,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( i.InputInt3(1)); break; } - case kArmI8x16UConvertI16x8: { - Simd128Register dst = i.OutputSimd128Register(), - src0 = i.InputSimd128Register(0), - src1 = i.InputSimd128Register(1); - // Take care not to overwrite a source register before it's used. - if (dst.is(src0) && dst.is(src1)) { - __ vqmovn(NeonU8, dst.low(), src0); - __ vmov(dst.high(), dst.low()); - } else if (dst.is(src0)) { - // dst is src0, so narrow src0 first. - __ vqmovn(NeonU8, dst.low(), src0); - __ vqmovn(NeonU8, dst.high(), src1); - } else { - // dst may alias src1, so narrow src1 first. - __ vqmovn(NeonU8, dst.high(), src1); - __ vqmovn(NeonU8, dst.low(), src0); - } + case kArmI8x16UConvertI16x8: + ASSEMBLE_NEON_NARROWING_OP(NeonU8); break; - } case kArmI8x16AddSaturateU: { __ vqadd(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0), i.InputSimd128Register(1)); diff --git a/src/compiler/arm/instruction-codes-arm.h b/src/compiler/arm/instruction-codes-arm.h index 316a8aac3822..667da3d11335 100644 --- a/src/compiler/arm/instruction-codes-arm.h +++ b/src/compiler/arm/instruction-codes-arm.h @@ -134,6 +134,7 @@ namespace compiler { V(ArmF32x4RecipApprox) \ V(ArmF32x4RecipSqrtApprox) \ V(ArmF32x4Add) \ + V(ArmF32x4AddHoriz) \ V(ArmF32x4Sub) \ V(ArmF32x4Mul) \ V(ArmF32x4Min) \ @@ -152,6 +153,7 @@ namespace compiler { V(ArmI32x4Shl) \ V(ArmI32x4ShrS) \ V(ArmI32x4Add) \ + V(ArmI32x4AddHoriz) \ V(ArmI32x4Sub) \ V(ArmI32x4Mul) \ V(ArmI32x4MinS) \ @@ -179,6 +181,7 @@ namespace compiler { V(ArmI16x8SConvertI32x4) \ V(ArmI16x8Add) \ V(ArmI16x8AddSaturateS) \ + V(ArmI16x8AddHoriz) \ V(ArmI16x8Sub) \ V(ArmI16x8SubSaturateS) \ V(ArmI16x8Mul) \ diff --git a/src/compiler/arm/instruction-scheduler-arm.cc b/src/compiler/arm/instruction-scheduler-arm.cc index 3d60a696b330..afe8892d3239 100644 --- a/src/compiler/arm/instruction-scheduler-arm.cc +++ b/src/compiler/arm/instruction-scheduler-arm.cc @@ -118,6 +118,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArmF32x4RecipApprox: case kArmF32x4RecipSqrtApprox: case kArmF32x4Add: + case kArmF32x4AddHoriz: case kArmF32x4Sub: case kArmF32x4Mul: case kArmF32x4Min: @@ -136,6 +137,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArmI32x4Shl: case kArmI32x4ShrS: case kArmI32x4Add: + case kArmI32x4AddHoriz: case kArmI32x4Sub: case kArmI32x4Mul: case kArmI32x4MinS: @@ -163,6 +165,7 @@ int InstructionScheduler::GetTargetInstructionFlags( case kArmI16x8SConvertI32x4: case kArmI16x8Add: case kArmI16x8AddSaturateS: + case kArmI16x8AddHoriz: case kArmI16x8Sub: case kArmI16x8SubSaturateS: case kArmI16x8Mul: diff --git a/src/compiler/arm/instruction-selector-arm.cc b/src/compiler/arm/instruction-selector-arm.cc index 40bf824269f3..d82859e01023 100644 --- a/src/compiler/arm/instruction-selector-arm.cc +++ b/src/compiler/arm/instruction-selector-arm.cc @@ -2441,78 +2441,81 @@ VISIT_ATOMIC_BINOP(Xor) V(I8x16ShrS) \ V(I8x16ShrU) -#define SIMD_BINOP_LIST(V) \ - V(F32x4Add, kArmF32x4Add) \ - V(F32x4Sub, kArmF32x4Sub) \ - V(F32x4Mul, kArmF32x4Mul) \ - V(F32x4Min, kArmF32x4Min) \ - V(F32x4Max, kArmF32x4Max) \ - V(F32x4Eq, kArmF32x4Eq) \ - V(F32x4Ne, kArmF32x4Ne) \ - V(F32x4Lt, kArmF32x4Lt) \ - V(F32x4Le, kArmF32x4Le) \ - V(I32x4Add, kArmI32x4Add) \ - V(I32x4Sub, kArmI32x4Sub) \ - V(I32x4Mul, kArmI32x4Mul) \ - V(I32x4MinS, kArmI32x4MinS) \ - V(I32x4MaxS, kArmI32x4MaxS) \ - V(I32x4Eq, kArmI32x4Eq) \ - V(I32x4Ne, kArmI32x4Ne) \ - V(I32x4LtS, kArmI32x4LtS) \ - V(I32x4LeS, kArmI32x4LeS) \ - V(I32x4MinU, kArmI32x4MinU) \ - V(I32x4MaxU, kArmI32x4MaxU) \ - V(I32x4LtU, kArmI32x4LtU) \ - V(I32x4LeU, kArmI32x4LeU) \ - V(I16x8SConvertI32x4, kArmI16x8SConvertI32x4) \ - V(I16x8Add, kArmI16x8Add) \ - V(I16x8AddSaturateS, kArmI16x8AddSaturateS) \ - V(I16x8Sub, kArmI16x8Sub) \ - V(I16x8SubSaturateS, kArmI16x8SubSaturateS) \ - V(I16x8Mul, kArmI16x8Mul) \ - V(I16x8MinS, kArmI16x8MinS) \ - V(I16x8MaxS, kArmI16x8MaxS) \ - V(I16x8Eq, kArmI16x8Eq) \ - V(I16x8Ne, kArmI16x8Ne) \ - V(I16x8LtS, kArmI16x8LtS) \ - V(I16x8LeS, kArmI16x8LeS) \ - V(I16x8UConvertI32x4, kArmI16x8UConvertI32x4) \ - V(I16x8AddSaturateU, kArmI16x8AddSaturateU) \ - V(I16x8SubSaturateU, kArmI16x8SubSaturateU) \ - V(I16x8MinU, kArmI16x8MinU) \ - V(I16x8MaxU, kArmI16x8MaxU) \ - V(I16x8LtU, kArmI16x8LtU) \ - V(I16x8LeU, kArmI16x8LeU) \ - V(I8x16SConvertI16x8, kArmI8x16SConvertI16x8) \ - V(I8x16Add, kArmI8x16Add) \ - V(I8x16AddSaturateS, kArmI8x16AddSaturateS) \ - V(I8x16Sub, kArmI8x16Sub) \ - V(I8x16SubSaturateS, kArmI8x16SubSaturateS) \ - V(I8x16Mul, kArmI8x16Mul) \ - V(I8x16MinS, kArmI8x16MinS) \ - V(I8x16MaxS, kArmI8x16MaxS) \ - V(I8x16Eq, kArmI8x16Eq) \ - V(I8x16Ne, kArmI8x16Ne) \ - V(I8x16LtS, kArmI8x16LtS) \ - V(I8x16LeS, kArmI8x16LeS) \ - V(I8x16UConvertI16x8, kArmI8x16UConvertI16x8) \ - V(I8x16AddSaturateU, kArmI8x16AddSaturateU) \ - V(I8x16SubSaturateU, kArmI8x16SubSaturateU) \ - V(I8x16MinU, kArmI8x16MinU) \ - V(I8x16MaxU, kArmI8x16MaxU) \ - V(I8x16LtU, kArmI8x16LtU) \ - V(I8x16LeU, kArmI8x16LeU) \ - V(S128And, kArmS128And) \ - V(S128Or, kArmS128Or) \ - V(S128Xor, kArmS128Xor) \ - V(S1x4And, kArmS128And) \ - V(S1x4Or, kArmS128Or) \ - V(S1x4Xor, kArmS128Xor) \ - V(S1x8And, kArmS128And) \ - V(S1x8Or, kArmS128Or) \ - V(S1x8Xor, kArmS128Xor) \ - V(S1x16And, kArmS128And) \ - V(S1x16Or, kArmS128Or) \ +#define SIMD_BINOP_LIST(V) \ + V(F32x4Add, kArmF32x4Add) \ + V(F32x4AddHoriz, kArmF32x4AddHoriz) \ + V(F32x4Sub, kArmF32x4Sub) \ + V(F32x4Mul, kArmF32x4Mul) \ + V(F32x4Min, kArmF32x4Min) \ + V(F32x4Max, kArmF32x4Max) \ + V(F32x4Eq, kArmF32x4Eq) \ + V(F32x4Ne, kArmF32x4Ne) \ + V(F32x4Lt, kArmF32x4Lt) \ + V(F32x4Le, kArmF32x4Le) \ + V(I32x4Add, kArmI32x4Add) \ + V(I32x4AddHoriz, kArmI32x4AddHoriz) \ + V(I32x4Sub, kArmI32x4Sub) \ + V(I32x4Mul, kArmI32x4Mul) \ + V(I32x4MinS, kArmI32x4MinS) \ + V(I32x4MaxS, kArmI32x4MaxS) \ + V(I32x4Eq, kArmI32x4Eq) \ + V(I32x4Ne, kArmI32x4Ne) \ + V(I32x4LtS, kArmI32x4LtS) \ + V(I32x4LeS, kArmI32x4LeS) \ + V(I32x4MinU, kArmI32x4MinU) \ + V(I32x4MaxU, kArmI32x4MaxU) \ + V(I32x4LtU, kArmI32x4LtU) \ + V(I32x4LeU, kArmI32x4LeU) \ + V(I16x8SConvertI32x4, kArmI16x8SConvertI32x4) \ + V(I16x8Add, kArmI16x8Add) \ + V(I16x8AddSaturateS, kArmI16x8AddSaturateS) \ + V(I16x8AddHoriz, kArmI16x8AddHoriz) \ + V(I16x8Sub, kArmI16x8Sub) \ + V(I16x8SubSaturateS, kArmI16x8SubSaturateS) \ + V(I16x8Mul, kArmI16x8Mul) \ + V(I16x8MinS, kArmI16x8MinS) \ + V(I16x8MaxS, kArmI16x8MaxS) \ + V(I16x8Eq, kArmI16x8Eq) \ + V(I16x8Ne, kArmI16x8Ne) \ + V(I16x8LtS, kArmI16x8LtS) \ + V(I16x8LeS, kArmI16x8LeS) \ + V(I16x8UConvertI32x4, kArmI16x8UConvertI32x4) \ + V(I16x8AddSaturateU, kArmI16x8AddSaturateU) \ + V(I16x8SubSaturateU, kArmI16x8SubSaturateU) \ + V(I16x8MinU, kArmI16x8MinU) \ + V(I16x8MaxU, kArmI16x8MaxU) \ + V(I16x8LtU, kArmI16x8LtU) \ + V(I16x8LeU, kArmI16x8LeU) \ + V(I8x16SConvertI16x8, kArmI8x16SConvertI16x8) \ + V(I8x16Add, kArmI8x16Add) \ + V(I8x16AddSaturateS, kArmI8x16AddSaturateS) \ + V(I8x16Sub, kArmI8x16Sub) \ + V(I8x16SubSaturateS, kArmI8x16SubSaturateS) \ + V(I8x16Mul, kArmI8x16Mul) \ + V(I8x16MinS, kArmI8x16MinS) \ + V(I8x16MaxS, kArmI8x16MaxS) \ + V(I8x16Eq, kArmI8x16Eq) \ + V(I8x16Ne, kArmI8x16Ne) \ + V(I8x16LtS, kArmI8x16LtS) \ + V(I8x16LeS, kArmI8x16LeS) \ + V(I8x16UConvertI16x8, kArmI8x16UConvertI16x8) \ + V(I8x16AddSaturateU, kArmI8x16AddSaturateU) \ + V(I8x16SubSaturateU, kArmI8x16SubSaturateU) \ + V(I8x16MinU, kArmI8x16MinU) \ + V(I8x16MaxU, kArmI8x16MaxU) \ + V(I8x16LtU, kArmI8x16LtU) \ + V(I8x16LeU, kArmI8x16LeU) \ + V(S128And, kArmS128And) \ + V(S128Or, kArmS128Or) \ + V(S128Xor, kArmS128Xor) \ + V(S1x4And, kArmS128And) \ + V(S1x4Or, kArmS128Or) \ + V(S1x4Xor, kArmS128Xor) \ + V(S1x8And, kArmS128And) \ + V(S1x8Or, kArmS128Or) \ + V(S1x8Xor, kArmS128Xor) \ + V(S1x16And, kArmS128And) \ + V(S1x16Or, kArmS128Or) \ V(S1x16Xor, kArmS128Xor) #define SIMD_SHUFFLE_OP_LIST(V) \ diff --git a/src/compiler/instruction-selector.cc b/src/compiler/instruction-selector.cc index e637657e3816..5d2200fcba29 100644 --- a/src/compiler/instruction-selector.cc +++ b/src/compiler/instruction-selector.cc @@ -1509,6 +1509,8 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitF32x4RecipSqrtApprox(node); case IrOpcode::kF32x4Add: return MarkAsSimd128(node), VisitF32x4Add(node); + case IrOpcode::kF32x4AddHoriz: + return MarkAsSimd128(node), VisitF32x4AddHoriz(node); case IrOpcode::kF32x4Sub: return MarkAsSimd128(node), VisitF32x4Sub(node); case IrOpcode::kF32x4Mul: @@ -1545,6 +1547,8 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitI32x4ShrS(node); case IrOpcode::kI32x4Add: return MarkAsSimd128(node), VisitI32x4Add(node); + case IrOpcode::kI32x4AddHoriz: + return MarkAsSimd128(node), VisitI32x4AddHoriz(node); case IrOpcode::kI32x4Sub: return MarkAsSimd128(node), VisitI32x4Sub(node); case IrOpcode::kI32x4Mul: @@ -1599,6 +1603,8 @@ void InstructionSelector::VisitNode(Node* node) { return MarkAsSimd128(node), VisitI16x8Add(node); case IrOpcode::kI16x8AddSaturateS: return MarkAsSimd128(node), VisitI16x8AddSaturateS(node); + case IrOpcode::kI16x8AddHoriz: + return MarkAsSimd128(node), VisitI16x8AddHoriz(node); case IrOpcode::kI16x8Sub: return MarkAsSimd128(node), VisitI16x8Sub(node); case IrOpcode::kI16x8SubSaturateS: @@ -2149,7 +2155,13 @@ void InstructionSelector::VisitF32x4RecipSqrtApprox(Node* node) { } void InstructionSelector::VisitF32x4Add(Node* node) { UNIMPLEMENTED(); } +#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64 + +#if !V8_TARGET_ARCH_ARM +void InstructionSelector::VisitF32x4AddHoriz(Node* node) { UNIMPLEMENTED(); } +#endif // !V8_TARGET_ARCH_ARM +#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64 void InstructionSelector::VisitF32x4Sub(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Mul(Node* node) { UNIMPLEMENTED(); } @@ -2207,6 +2219,10 @@ void InstructionSelector::VisitI32x4ShrU(Node* node) { UNIMPLEMENTED(); } #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS && // !V8_TARGET_ARCH_MIPS64 +#if !V8_TARGET_ARCH_ARM +void InstructionSelector::VisitI32x4AddHoriz(Node* node) { UNIMPLEMENTED(); } +#endif // !V8_TARGET_ARCH_ARM + #if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64 void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) { UNIMPLEMENTED(); @@ -2261,7 +2277,13 @@ void InstructionSelector::VisitI16x8Add(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI16x8AddSaturateS(Node* node) { UNIMPLEMENTED(); } +#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM + +#if !V8_TARGET_ARCH_ARM +void InstructionSelector::VisitI16x8AddHoriz(Node* node) { UNIMPLEMENTED(); } +#endif // !V8_TARGET_ARCH_ARM +#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM void InstructionSelector::VisitI16x8Sub(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI16x8SubSaturateS(Node* node) { @@ -2355,7 +2377,9 @@ void InstructionSelector::VisitI8x16Add(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI8x16AddSaturateS(Node* node) { UNIMPLEMENTED(); } +#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM +#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM void InstructionSelector::VisitI8x16Sub(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitI8x16SubSaturateS(Node* node) { diff --git a/src/compiler/machine-operator.cc b/src/compiler/machine-operator.cc index 95363be7face..23f8515c35ed 100644 --- a/src/compiler/machine-operator.cc +++ b/src/compiler/machine-operator.cc @@ -232,6 +232,7 @@ MachineType AtomicOpRepresentationOf(Operator const* op) { V(F32x4RecipApprox, Operator::kNoProperties, 1, 0, 1) \ V(F32x4RecipSqrtApprox, Operator::kNoProperties, 1, 0, 1) \ V(F32x4Add, Operator::kCommutative, 2, 0, 1) \ + V(F32x4AddHoriz, Operator::kNoProperties, 2, 0, 1) \ V(F32x4Sub, Operator::kNoProperties, 2, 0, 1) \ V(F32x4Mul, Operator::kCommutative, 2, 0, 1) \ V(F32x4Min, Operator::kCommutative, 2, 0, 1) \ @@ -246,6 +247,7 @@ MachineType AtomicOpRepresentationOf(Operator const* op) { V(I32x4SConvertI16x8High, Operator::kNoProperties, 1, 0, 1) \ V(I32x4Neg, Operator::kNoProperties, 1, 0, 1) \ V(I32x4Add, Operator::kCommutative, 2, 0, 1) \ + V(I32x4AddHoriz, Operator::kNoProperties, 2, 0, 1) \ V(I32x4Sub, Operator::kNoProperties, 2, 0, 1) \ V(I32x4Mul, Operator::kCommutative, 2, 0, 1) \ V(I32x4MinS, Operator::kCommutative, 2, 0, 1) \ @@ -268,6 +270,7 @@ MachineType AtomicOpRepresentationOf(Operator const* op) { V(I16x8SConvertI32x4, Operator::kNoProperties, 2, 0, 1) \ V(I16x8Add, Operator::kCommutative, 2, 0, 1) \ V(I16x8AddSaturateS, Operator::kCommutative, 2, 0, 1) \ + V(I16x8AddHoriz, Operator::kNoProperties, 2, 0, 1) \ V(I16x8Sub, Operator::kNoProperties, 2, 0, 1) \ V(I16x8SubSaturateS, Operator::kNoProperties, 2, 0, 1) \ V(I16x8Mul, Operator::kCommutative, 2, 0, 1) \ diff --git a/src/compiler/machine-operator.h b/src/compiler/machine-operator.h index bca8d85f07ab..0f846ac2329a 100644 --- a/src/compiler/machine-operator.h +++ b/src/compiler/machine-operator.h @@ -444,6 +444,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final const Operator* F32x4RecipApprox(); const Operator* F32x4RecipSqrtApprox(); const Operator* F32x4Add(); + const Operator* F32x4AddHoriz(); const Operator* F32x4Sub(); const Operator* F32x4Mul(); const Operator* F32x4Div(); @@ -464,6 +465,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final const Operator* I32x4Shl(int32_t); const Operator* I32x4ShrS(int32_t); const Operator* I32x4Add(); + const Operator* I32x4AddHoriz(); const Operator* I32x4Sub(); const Operator* I32x4Mul(); const Operator* I32x4MinS(); @@ -493,6 +495,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final const Operator* I16x8SConvertI32x4(); const Operator* I16x8Add(); const Operator* I16x8AddSaturateS(); + const Operator* I16x8AddHoriz(); const Operator* I16x8Sub(); const Operator* I16x8SubSaturateS(); const Operator* I16x8Mul(); diff --git a/src/compiler/opcodes.h b/src/compiler/opcodes.h index 3d61172c1a9b..85db2a0f49e3 100644 --- a/src/compiler/opcodes.h +++ b/src/compiler/opcodes.h @@ -578,6 +578,7 @@ V(F32x4RecipApprox) \ V(F32x4RecipSqrtApprox) \ V(F32x4Add) \ + V(F32x4AddHoriz) \ V(F32x4Sub) \ V(F32x4Mul) \ V(F32x4Min) \ @@ -598,6 +599,7 @@ V(I32x4Shl) \ V(I32x4ShrS) \ V(I32x4Add) \ + V(I32x4AddHoriz) \ V(I32x4Sub) \ V(I32x4Mul) \ V(I32x4MinS) \ @@ -629,6 +631,7 @@ V(I16x8SConvertI32x4) \ V(I16x8Add) \ V(I16x8AddSaturateS) \ + V(I16x8AddHoriz) \ V(I16x8Sub) \ V(I16x8SubSaturateS) \ V(I16x8Mul) \ diff --git a/src/compiler/wasm-compiler.cc b/src/compiler/wasm-compiler.cc index 7272f6286ea9..158c70fc5c03 100644 --- a/src/compiler/wasm-compiler.cc +++ b/src/compiler/wasm-compiler.cc @@ -3197,6 +3197,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, case wasm::kExprF32x4Add: return graph()->NewNode(jsgraph()->machine()->F32x4Add(), inputs[0], inputs[1]); + case wasm::kExprF32x4AddHoriz: + return graph()->NewNode(jsgraph()->machine()->F32x4AddHoriz(), inputs[0], + inputs[1]); case wasm::kExprF32x4Sub: return graph()->NewNode(jsgraph()->machine()->F32x4Sub(), inputs[0], inputs[1]); @@ -3246,6 +3249,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, case wasm::kExprI32x4Add: return graph()->NewNode(jsgraph()->machine()->I32x4Add(), inputs[0], inputs[1]); + case wasm::kExprI32x4AddHoriz: + return graph()->NewNode(jsgraph()->machine()->I32x4AddHoriz(), inputs[0], + inputs[1]); case wasm::kExprI32x4Sub: return graph()->NewNode(jsgraph()->machine()->I32x4Sub(), inputs[0], inputs[1]); @@ -3319,6 +3325,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, case wasm::kExprI16x8AddSaturateS: return graph()->NewNode(jsgraph()->machine()->I16x8AddSaturateS(), inputs[0], inputs[1]); + case wasm::kExprI16x8AddHoriz: + return graph()->NewNode(jsgraph()->machine()->I16x8AddHoriz(), inputs[0], + inputs[1]); case wasm::kExprI16x8Sub: return graph()->NewNode(jsgraph()->machine()->I16x8Sub(), inputs[0], inputs[1]); diff --git a/src/wasm/wasm-opcodes.cc b/src/wasm/wasm-opcodes.cc index c279bc3ca8f6..2bafaa0aa89e 100644 --- a/src/wasm/wasm-opcodes.cc +++ b/src/wasm/wasm-opcodes.cc @@ -175,6 +175,7 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) { CASE_SIMD_OP(Sub, "sub") CASE_SIMD_OP(Mul, "mul") CASE_F32x4_OP(Abs, "abs") + CASE_F32x4_OP(AddHoriz, "add_horizontal") CASE_F32x4_OP(RecipApprox, "recip_approx") CASE_F32x4_OP(RecipSqrtApprox, "recip_sqrt_approx") CASE_F32x4_OP(Min, "min") @@ -203,6 +204,8 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) { CASE_SIGN_OP(SIMDI, Ge, "ge") CASE_SIGN_OP(SIMDI, Shr, "shr") CASE_SIMDI_OP(Shl, "shl") + CASE_I32x4_OP(AddHoriz, "add_horizontal") + CASE_I16x8_OP(AddHoriz, "add_horizontal") CASE_SIGN_OP(I16x8, AddSaturate, "add_saturate") CASE_SIGN_OP(I8x16, AddSaturate, "add_saturate") CASE_SIGN_OP(I16x8, SubSaturate, "sub_saturate") diff --git a/src/wasm/wasm-opcodes.h b/src/wasm/wasm-opcodes.h index 6aba971802fa..b6248f27057e 100644 --- a/src/wasm/wasm-opcodes.h +++ b/src/wasm/wasm-opcodes.h @@ -289,6 +289,7 @@ constexpr WasmCodePosition kNoCodePosition = -1; V(F32x4RecipApprox, 0xe506, s_s) \ V(F32x4RecipSqrtApprox, 0xe507, s_s) \ V(F32x4Add, 0xe508, s_ss) \ + V(F32x4AddHoriz, 0xe5b9, s_ss) \ V(F32x4Sub, 0xe509, s_ss) \ V(F32x4Mul, 0xe50a, s_ss) \ V(F32x4Min, 0xe50c, s_ss) \ @@ -304,6 +305,7 @@ constexpr WasmCodePosition kNoCodePosition = -1; V(I32x4Splat, 0xe51b, s_i) \ V(I32x4Neg, 0xe51e, s_s) \ V(I32x4Add, 0xe51f, s_ss) \ + V(I32x4AddHoriz, 0xe5ba, s_ss) \ V(I32x4Sub, 0xe520, s_ss) \ V(I32x4Mul, 0xe521, s_ss) \ V(I32x4MinS, 0xe522, s_ss) \ @@ -330,6 +332,7 @@ constexpr WasmCodePosition kNoCodePosition = -1; V(I16x8Neg, 0xe53b, s_s) \ V(I16x8Add, 0xe53c, s_ss) \ V(I16x8AddSaturateS, 0xe53d, s_ss) \ + V(I16x8AddHoriz, 0xe5bb, s_ss) \ V(I16x8Sub, 0xe53e, s_ss) \ V(I16x8SubSaturateS, 0xe53f, s_ss) \ V(I16x8Mul, 0xe540, s_ss) \ diff --git a/test/cctest/test-assembler-arm.cc b/test/cctest/test-assembler-arm.cc index 056dd9ffa505..ea1d0c2a2cc5 100644 --- a/test/cctest/test-assembler-arm.cc +++ b/test/cctest/test-assembler-arm.cc @@ -1297,9 +1297,10 @@ TEST(15) { uint32_t vabs_s8[4], vabs_s16[4], vabs_s32[4]; uint32_t vneg_s8[4], vneg_s16[4], vneg_s32[4]; uint32_t veor[4], vand[4], vorr[4]; - float vdupf[4], vaddf[4], vsubf[4], vmulf[4]; + float vdupf[4], vaddf[4], vpaddf[2], vsubf[4], vmulf[4]; uint32_t vmin_s8[4], vmin_u16[4], vmin_s32[4]; uint32_t vmax_s8[4], vmax_u16[4], vmax_s32[4]; + uint32_t vpadd_i8[2], vpadd_i16[2], vpadd_i32[2]; uint32_t vpmin_s8[2], vpmin_u16[2], vpmin_s32[2]; uint32_t vpmax_s8[2], vpmax_u16[2], vpmax_s32[2]; uint32_t vadd8[4], vadd16[4], vadd32[4]; @@ -1545,6 +1546,13 @@ TEST(15) { __ vadd(q1, q1, q0); __ add(r4, r0, Operand(static_cast(offsetof(T, vaddf)))); __ vst1(Neon8, NeonListOperand(q1), NeonMemOperand(r4)); + // vpadd (float). + __ vmov(s0, 1.0); + __ vmov(s1, 2.0); + __ vmov(s2, 3.0); + __ vmov(s3, 4.0); + __ vpadd(d2, d0, d1); + __ vstr(d2, r0, offsetof(T, vpaddf)); // vsub (float). __ vmov(s4, 2.0); __ vdup(q0, s4); @@ -1637,6 +1645,17 @@ TEST(15) { __ add(r4, r0, Operand(static_cast(offsetof(T, vmax_s32)))); __ vst1(Neon8, NeonListOperand(q2), NeonMemOperand(r4)); + // vpadd integer. + __ mov(r4, Operand(0x03)); + __ vdup(Neon16, q0, r4); + __ vdup(Neon8, q1, r4); + __ vpadd(Neon8, d0, d0, d2); + __ vstr(d0, r0, offsetof(T, vpadd_i8)); + __ vpadd(Neon16, d0, d0, d2); + __ vstr(d0, r0, offsetof(T, vpadd_i16)); + __ vpadd(Neon32, d0, d0, d2); + __ vstr(d0, r0, offsetof(T, vpadd_i32)); + // vpmin/vpmax integer. __ mov(r4, Operand(0x03)); __ vdup(Neon16, q0, r4); @@ -2115,6 +2134,7 @@ TEST(15) { CHECK_EQ_SPLAT(vand, 0x00fe00feu); CHECK_EQ_SPLAT(vorr, 0x00ff00ffu); CHECK_EQ_SPLAT(vaddf, 2.0); + CHECK_EQ_32X2(vpaddf, 3.0, 7.0); CHECK_EQ_SPLAT(vminf, 1.0); CHECK_EQ_SPLAT(vmaxf, 2.0); CHECK_EQ_SPLAT(vsubf, -1.0); @@ -2137,6 +2157,9 @@ TEST(15) { CHECK_EQ_SPLAT(vmin_s32, 0xffffffffu); CHECK_EQ_SPLAT(vmax_s32, 0xffu); // [0, 3, 0, 3, ...] and [3, 3, 3, 3, ...] + CHECK_EQ_32X2(vpadd_i8, 0x03030303u, 0x06060606u); + CHECK_EQ_32X2(vpadd_i16, 0x0c0c0606u, 0x06060606u); + CHECK_EQ_32X2(vpadd_i32, 0x12120c0cu, 0x06060606u); CHECK_EQ_32X2(vpmin_s8, 0x00000000u, 0x03030303u); CHECK_EQ_32X2(vpmax_s8, 0x03030303u, 0x03030303u); // [0, ffff, 0, ffff] and [ffff, ffff] diff --git a/test/cctest/test-disasm-arm.cc b/test/cctest/test-disasm-arm.cc index 450986d3d296..3df476500a18 100644 --- a/test/cctest/test-disasm-arm.cc +++ b/test/cctest/test-disasm-arm.cc @@ -1044,6 +1044,14 @@ TEST(Neon) { "f3142670 vmin.u16 q1, q2, q8"); COMPARE(vmax(NeonS32, q15, q0, q8), "f260e660 vmax.s32 q15, q0, q8"); + COMPARE(vpadd(d0, d1, d2), + "f3010d02 vpadd.f32 d0, d1, d2"); + COMPARE(vpadd(Neon8, d0, d1, d2), + "f2010b12 vpadd.i8 d0, d1, d2"); + COMPARE(vpadd(Neon16, d0, d1, d2), + "f2110b12 vpadd.i16 d0, d1, d2"); + COMPARE(vpadd(Neon32, d0, d1, d2), + "f2210b12 vpadd.i32 d0, d1, d2"); COMPARE(vpmax(NeonS8, d0, d1, d2), "f2010a02 vpmax.s8 d0, d1, d2"); COMPARE(vpmin(NeonU16, d1, d2, d8), diff --git a/test/cctest/wasm/test-run-wasm-simd.cc b/test/cctest/wasm/test-run-wasm-simd.cc index bd0133a3b72b..770add6b3549 100644 --- a/test/cctest/wasm/test-run-wasm-simd.cc +++ b/test/cctest/wasm/test-run-wasm-simd.cc @@ -1569,8 +1569,9 @@ WASM_SIMD_SELECT_TEST(8x16) #endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_X64 #if V8_TARGET_ARCH_ARM +// Test unary ops with a lane test pattern, all lanes distinct. template -void RunUnaryPermuteOpTest( +void RunUnaryLaneOpTest( WasmOpcode simd_op, const std::array& expected) { FLAG_wasm_simd_prototype = true; @@ -1591,35 +1592,35 @@ void RunUnaryPermuteOpTest( } WASM_EXEC_COMPILED_TEST(S32x2Reverse) { - RunUnaryPermuteOpTest(kExprS32x2Reverse, {{1, 0, 3, 2}}); + RunUnaryLaneOpTest(kExprS32x2Reverse, {{1, 0, 3, 2}}); } WASM_EXEC_COMPILED_TEST(S16x4Reverse) { - RunUnaryPermuteOpTest(kExprS16x4Reverse, {{3, 2, 1, 0, 7, 6, 5, 4}}); + RunUnaryLaneOpTest(kExprS16x4Reverse, {{3, 2, 1, 0, 7, 6, 5, 4}}); } WASM_EXEC_COMPILED_TEST(S16x2Reverse) { - RunUnaryPermuteOpTest(kExprS16x2Reverse, {{1, 0, 3, 2, 5, 4, 7, 6}}); + RunUnaryLaneOpTest(kExprS16x2Reverse, {{1, 0, 3, 2, 5, 4, 7, 6}}); } WASM_EXEC_COMPILED_TEST(S8x8Reverse) { - RunUnaryPermuteOpTest(kExprS8x8Reverse, {{7, 6, 5, 4, 3, 2, 1, 0, 15, - 14, 13, 12, 11, 10, 9, 8}}); + RunUnaryLaneOpTest(kExprS8x8Reverse, {{7, 6, 5, 4, 3, 2, 1, 0, 15, 14, + 13, 12, 11, 10, 9, 8}}); } WASM_EXEC_COMPILED_TEST(S8x4Reverse) { - RunUnaryPermuteOpTest(kExprS8x4Reverse, {{3, 2, 1, 0, 7, 6, 5, 4, 11, - 10, 9, 8, 15, 14, 13, 12}}); + RunUnaryLaneOpTest(kExprS8x4Reverse, {{3, 2, 1, 0, 7, 6, 5, 4, 11, 10, + 9, 8, 15, 14, 13, 12}}); } WASM_EXEC_COMPILED_TEST(S8x2Reverse) { - RunUnaryPermuteOpTest( - kExprS8x2Reverse, - {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}}); + RunUnaryLaneOpTest(kExprS8x2Reverse, {{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, + 11, 10, 13, 12, 15, 14}}); } +// Test binary ops with two lane test patterns, all lanes distinct. template -void RunBinaryPermuteOpTest( +void RunBinaryLaneOpTest( WasmOpcode simd_op, const std::array& expected) { FLAG_wasm_simd_prototype = true; @@ -1643,92 +1644,104 @@ void RunBinaryPermuteOpTest( } } +WASM_EXEC_COMPILED_TEST(F32x4AddHoriz) { + RunBinaryLaneOpTest(kExprF32x4AddHoriz, {{1.0f, 5.0f, 9.0f, 13.0f}}); +} + +WASM_EXEC_COMPILED_TEST(I32x4AddHoriz) { + RunBinaryLaneOpTest(kExprI32x4AddHoriz, {{1, 5, 9, 13}}); +} + +WASM_EXEC_COMPILED_TEST(I16x8AddHoriz) { + RunBinaryLaneOpTest(kExprI16x8AddHoriz, + {{1, 5, 9, 13, 17, 21, 25, 29}}); +} + WASM_EXEC_COMPILED_TEST(S32x4ZipLeft) { - RunBinaryPermuteOpTest(kExprS32x4ZipLeft, {{0, 4, 1, 5}}); + RunBinaryLaneOpTest(kExprS32x4ZipLeft, {{0, 4, 1, 5}}); } WASM_EXEC_COMPILED_TEST(S32x4ZipRight) { - RunBinaryPermuteOpTest(kExprS32x4ZipRight, {{2, 6, 3, 7}}); + RunBinaryLaneOpTest(kExprS32x4ZipRight, {{2, 6, 3, 7}}); } WASM_EXEC_COMPILED_TEST(S32x4UnzipLeft) { - RunBinaryPermuteOpTest(kExprS32x4UnzipLeft, {{0, 2, 4, 6}}); + RunBinaryLaneOpTest(kExprS32x4UnzipLeft, {{0, 2, 4, 6}}); } WASM_EXEC_COMPILED_TEST(S32x4UnzipRight) { - RunBinaryPermuteOpTest(kExprS32x4UnzipRight, {{1, 3, 5, 7}}); + RunBinaryLaneOpTest(kExprS32x4UnzipRight, {{1, 3, 5, 7}}); } WASM_EXEC_COMPILED_TEST(S32x4TransposeLeft) { - RunBinaryPermuteOpTest(kExprS32x4TransposeLeft, {{0, 4, 2, 6}}); + RunBinaryLaneOpTest(kExprS32x4TransposeLeft, {{0, 4, 2, 6}}); } WASM_EXEC_COMPILED_TEST(S32x4TransposeRight) { - RunBinaryPermuteOpTest(kExprS32x4TransposeRight, {{1, 5, 3, 7}}); + RunBinaryLaneOpTest(kExprS32x4TransposeRight, {{1, 5, 3, 7}}); } WASM_EXEC_COMPILED_TEST(S16x8ZipLeft) { - RunBinaryPermuteOpTest(kExprS16x8ZipLeft, - {{0, 8, 1, 9, 2, 10, 3, 11}}); + RunBinaryLaneOpTest(kExprS16x8ZipLeft, {{0, 8, 1, 9, 2, 10, 3, 11}}); } WASM_EXEC_COMPILED_TEST(S16x8ZipRight) { - RunBinaryPermuteOpTest(kExprS16x8ZipRight, - {{4, 12, 5, 13, 6, 14, 7, 15}}); + RunBinaryLaneOpTest(kExprS16x8ZipRight, + {{4, 12, 5, 13, 6, 14, 7, 15}}); } WASM_EXEC_COMPILED_TEST(S16x8UnzipLeft) { - RunBinaryPermuteOpTest(kExprS16x8UnzipLeft, - {{0, 2, 4, 6, 8, 10, 12, 14}}); + RunBinaryLaneOpTest(kExprS16x8UnzipLeft, + {{0, 2, 4, 6, 8, 10, 12, 14}}); } WASM_EXEC_COMPILED_TEST(S16x8UnzipRight) { - RunBinaryPermuteOpTest(kExprS16x8UnzipRight, - {{1, 3, 5, 7, 9, 11, 13, 15}}); + RunBinaryLaneOpTest(kExprS16x8UnzipRight, + {{1, 3, 5, 7, 9, 11, 13, 15}}); } WASM_EXEC_COMPILED_TEST(S16x8TransposeLeft) { - RunBinaryPermuteOpTest(kExprS16x8TransposeLeft, - {{0, 8, 2, 10, 4, 12, 6, 14}}); + RunBinaryLaneOpTest(kExprS16x8TransposeLeft, + {{0, 8, 2, 10, 4, 12, 6, 14}}); } WASM_EXEC_COMPILED_TEST(S16x8TransposeRight) { - RunBinaryPermuteOpTest(kExprS16x8TransposeRight, - {{1, 9, 3, 11, 5, 13, 7, 15}}); + RunBinaryLaneOpTest(kExprS16x8TransposeRight, + {{1, 9, 3, 11, 5, 13, 7, 15}}); } WASM_EXEC_COMPILED_TEST(S8x16ZipLeft) { - RunBinaryPermuteOpTest( + RunBinaryLaneOpTest( kExprS8x16ZipLeft, {{0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}}); } WASM_EXEC_COMPILED_TEST(S8x16ZipRight) { - RunBinaryPermuteOpTest( + RunBinaryLaneOpTest( kExprS8x16ZipRight, {{8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}}); } WASM_EXEC_COMPILED_TEST(S8x16UnzipLeft) { - RunBinaryPermuteOpTest( + RunBinaryLaneOpTest( kExprS8x16UnzipLeft, {{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}}); } WASM_EXEC_COMPILED_TEST(S8x16UnzipRight) { - RunBinaryPermuteOpTest( + RunBinaryLaneOpTest( kExprS8x16UnzipRight, {{1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}}); } WASM_EXEC_COMPILED_TEST(S8x16TransposeLeft) { - RunBinaryPermuteOpTest( + RunBinaryLaneOpTest( kExprS8x16TransposeLeft, {{0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}}); } WASM_EXEC_COMPILED_TEST(S8x16TransposeRight) { - RunBinaryPermuteOpTest( + RunBinaryLaneOpTest( kExprS8x16TransposeRight, {{1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}}); }