Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FMA3 #29

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft

FMA3 #29

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions build_cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ set(CodeGenTest_SRC
../tests/ExternJumpTest.h
../tests/FpIntMixTest.cpp
../tests/FpuTest.cpp
../tests/FloatFusedMultiAddTest.cpp
../tests/DoubleFusedMultiAddTest.cpp
../tests/HugeJumpTest.cpp
../tests/HugeJumpTestLiteral.cpp
../tests/HugeJumpTestLiteral.h
Expand Down
5 changes: 5 additions & 0 deletions include/Jitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ namespace Jitter
void FP_Max();
void FP_Min();
void FP_Mul();
void FP_MulAdd();
void FP_MulSub();
void FP_Div();
void FP_Cmp(CONDITION);
void FP_Neg();
Expand Down Expand Up @@ -228,6 +230,8 @@ namespace Jitter
void MD_UnpackUpperHW();
void MD_UnpackUpperWD();
void MD_Xor();
void MD_MulAdd();
void MD_MulSub();

CCodeGen* GetCodeGen();

Expand Down Expand Up @@ -285,6 +289,7 @@ namespace Jitter

void InsertUnaryStatement(Jitter::OPERATION);
void InsertBinaryStatement(Jitter::OPERATION);
void InsertTernaryStatement(Jitter::OPERATION);
void InsertUnaryMdStatement(Jitter::OPERATION);
void InsertBinaryMdStatement(Jitter::OPERATION);

Expand Down
22 changes: 22 additions & 0 deletions include/Jitter_CodeGen_x86.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,16 @@ namespace Jitter
static OpEdAvxType OpEdAvx() { return &CX86Assembler::VmulssEd; }
};

struct FPUOP_MULADD213 : public FPUOP_BASE
{
static OpEdAvxType OpEdAvx() { return &CX86Assembler::Vfmadd213ssVo; }
};

struct FPUOP_MULSUB213 : public FPUOP_BASE
{
static OpEdAvxType OpEdAvx() { return &CX86Assembler::Vfnmadd213ssVo; }
};

struct FPUOP_DIV : public FPUOP_BASE
{
static OpEdType OpEd() { return &CX86Assembler::DivssEd; }
Expand Down Expand Up @@ -390,6 +400,16 @@ namespace Jitter
static OpVoType OpVoAvx() { return &CX86Assembler::Vcvtdq2psVo; }
};

struct MDOP_MULADD213 : public MDOP_BASE
{
static OpVoAvxType OpVoAvx() { return &CX86Assembler::Vfmadd213psVo; }
};

struct MDOP_MULSUB213 : public MDOP_BASE
{
static OpVoAvxType OpVoAvx() { return &CX86Assembler::Vfnmadd213psVo; }
};

//MDOP SHIFT -----------------------------------------------------
struct MDOP_SHIFT_BASE
{
Expand Down Expand Up @@ -722,6 +742,7 @@ namespace Jitter
//FPUOP AVX
template <typename> void Emit_Fpu_Avx_MemMem(const STATEMENT&);
template <typename> void Emit_Fpu_Avx_MemMemMem(const STATEMENT&);
template <typename> void Emit_Fpu_Avx_MemMemMemMem(const STATEMENT&);

void Emit_Fp_Avx_Cmp_VarMemMem(const STATEMENT&);
void Emit_Fp_Avx_Rsqrt_MemMem(const STATEMENT&);
Expand All @@ -733,6 +754,7 @@ namespace Jitter
template <typename> void Emit_Md_Avx_VarVar(const STATEMENT&);
template <typename> void Emit_Md_Avx_VarVarVar(const STATEMENT&);
template <typename> void Emit_Md_Avx_VarVarVarRev(const STATEMENT&);
template <typename> void Emit_Md_Avx_VarVarVarVar(const STATEMENT&);
template <typename, uint8> void
Emit_Md_Avx_Shift_VarVarCst(const STATEMENT&);
void Emit_Md_Avx_Mov_RegVar(const STATEMENT&);
Expand Down
5 changes: 5 additions & 0 deletions include/Jitter_Statement.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,14 @@ namespace Jitter
OP_MD_CMPLT_S,
OP_MD_CMPGT_S,

OP_MD_MULADD,
OP_MD_MULSUB,

OP_FP_ADD,
OP_FP_SUB,
OP_FP_MUL,
OP_FP_MULADD,
OP_FP_MULSUB,
OP_FP_DIV,
OP_FP_SQRT,
OP_FP_RSQRT,
Expand Down
5 changes: 5 additions & 0 deletions include/X86Assembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,11 @@ class CX86Assembler
void VcmppsVo(XMMREGISTER, XMMREGISTER, const CAddress&, SSE_CMP_TYPE);

void VblendpsVo(XMMREGISTER, XMMREGISTER, const CAddress&, uint8);
void Vfmadd213ssVo(XMMREGISTER, XMMREGISTER, const CAddress&);
void Vfnmadd213ssVo(XMMREGISTER, XMMREGISTER, const CAddress&);
void Vfmadd213psVo(XMMREGISTER, XMMREGISTER, const CAddress&);
void Vfnmadd213psVo(XMMREGISTER, XMMREGISTER, const CAddress&);

void VshufpsVo(XMMREGISTER, XMMREGISTER, const CAddress&, uint8);

private:
Expand Down
75 changes: 75 additions & 0 deletions src/Jitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -987,6 +987,36 @@ void CJitter::FP_Mul()
m_shadow.Push(tempSym);
}

void CJitter::FP_MulAdd()
{
SymbolPtr tempSym = MakeSymbol(SYM_FP_TMP_SINGLE, m_nextTemporary++);

STATEMENT statement;
statement.op = OP_FP_MULADD;
statement.src1 = MakeSymbolRef(m_shadow.Pull());
statement.src2 = MakeSymbolRef(m_shadow.Pull());
statement.src3 = MakeSymbolRef(m_shadow.Pull());
statement.dst = MakeSymbolRef(tempSym);
InsertStatement(statement);

m_shadow.Push(tempSym);
}

void CJitter::FP_MulSub()
{
SymbolPtr tempSym = MakeSymbol(SYM_FP_TMP_SINGLE, m_nextTemporary++);

STATEMENT statement;
statement.op = OP_FP_MULSUB;
statement.src1 = MakeSymbolRef(m_shadow.Pull());
statement.src2 = MakeSymbolRef(m_shadow.Pull());
statement.src3 = MakeSymbolRef(m_shadow.Pull());
statement.dst = MakeSymbolRef(tempSym);
InsertStatement(statement);

m_shadow.Push(tempSym);
}

void CJitter::FP_Div()
{
SymbolPtr tempSym = MakeSymbol(SYM_FP_TMP_SINGLE, m_nextTemporary++);
Expand Down Expand Up @@ -1585,6 +1615,36 @@ void CJitter::MD_ToSingle()
InsertUnaryMdStatement(OP_MD_TOSINGLE);
}

void CJitter::MD_MulAdd()
{
SymbolPtr tempSym = MakeSymbol(SYM_TEMPORARY128, m_nextTemporary++);

STATEMENT statement;
statement.op = OP_MD_MULADD;
statement.src1 = MakeSymbolRef(m_shadow.Pull());
statement.src2 = MakeSymbolRef(m_shadow.Pull());
statement.src3 = MakeSymbolRef(m_shadow.Pull());
statement.dst = MakeSymbolRef(tempSym);
InsertStatement(statement);

m_shadow.Push(tempSym);
}

void CJitter::MD_MulSub()
{
SymbolPtr tempSym = MakeSymbol(SYM_TEMPORARY128, m_nextTemporary++);

STATEMENT statement;
statement.op = OP_MD_MULSUB;
statement.src1 = MakeSymbolRef(m_shadow.Pull());
statement.src2 = MakeSymbolRef(m_shadow.Pull());
statement.src3 = MakeSymbolRef(m_shadow.Pull());
statement.dst = MakeSymbolRef(tempSym);
InsertStatement(statement);

m_shadow.Push(tempSym);
}

//Generic Statement Inserters
//------------------------------------------------

Expand Down Expand Up @@ -1615,6 +1675,21 @@ void CJitter::InsertBinaryStatement(Jitter::OPERATION operation)
m_shadow.Push(tempSym);
}

void CJitter::InsertTernaryStatement(Jitter::OPERATION operation)
{
auto tempSym = MakeSymbol(SYM_TEMPORARY, m_nextTemporary++);

STATEMENT statement;
statement.op = operation;
statement.src3 = MakeSymbolRef(m_shadow.Pull());
statement.src2 = MakeSymbolRef(m_shadow.Pull());
statement.src1 = MakeSymbolRef(m_shadow.Pull());
statement.dst = MakeSymbolRef(tempSym);
InsertStatement(statement);

m_shadow.Push(tempSym);
}

void CJitter::InsertUnaryMdStatement(Jitter::OPERATION operation)
{
auto tempSym = MakeSymbol(SYM_TEMPORARY128, m_nextTemporary++);
Expand Down
19 changes: 19 additions & 0 deletions src/Jitter_CodeGen_x86_Fpu_Avx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,23 @@ void CCodeGen_x86::Emit_Fpu_Avx_MemMemMem(const STATEMENT& statement)
m_assembler.VmovssEd(MakeMemoryFpSingleSymbolAddress(dst), dstRegister);
}

template <typename FPUOP>
void CCodeGen_x86::Emit_Fpu_Avx_MemMemMemMem(const STATEMENT& statement)
{
auto dst = statement.dst->GetSymbol().get();
auto src1 = statement.src1->GetSymbol().get();
auto src2 = statement.src2->GetSymbol().get();
auto src3 = statement.src3->GetSymbol().get();

auto dstRegister = CX86Assembler::xMM0;
auto src2Register = CX86Assembler::xMM1;

m_assembler.VmovssEd(dstRegister, MakeMemoryFpSingleSymbolAddress(src1));
m_assembler.VmovssEd(src2Register, MakeMemoryFpSingleSymbolAddress(src2));
((m_assembler).*(FPUOP::OpEdAvx()))(dstRegister, src2Register, MakeMemoryFpSingleSymbolAddress(src3));
m_assembler.VmovssEd(MakeMemoryFpSingleSymbolAddress(dst), dstRegister);
}

void CCodeGen_x86::Emit_Fp_Avx_Cmp_VarMemMem(const STATEMENT& statement)
{
auto dst = statement.dst->GetSymbol().get();
Expand Down Expand Up @@ -101,6 +118,8 @@ void CCodeGen_x86::Emit_Fp_Avx_ToIntTrunc_RelRel(const STATEMENT& statement)

CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_fpuAvxConstMatchers[] =
{
{ OP_FP_MULADD, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMemMem<FPUOP_MULADD213> },
{ OP_FP_MULSUB, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMemMem<FPUOP_MULSUB213> },
{ OP_FP_ADD, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem<FPUOP_ADD> },
{ OP_FP_SUB, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem<FPUOP_SUB> },
{ OP_FP_MUL, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_MEMORY_FP_SINGLE, MATCH_NIL, &CCodeGen_x86::Emit_Fpu_Avx_MemMemMem<FPUOP_MUL> },
Expand Down
24 changes: 24 additions & 0 deletions src/Jitter_CodeGen_x86_Md_Avx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,27 @@ void CCodeGen_x86::Emit_Md_Avx_VarVarVarRev(const STATEMENT& statement)
CommitSymbolRegisterMdAvx(dst, dstRegister);
}

template <typename MDOP>
void CCodeGen_x86::Emit_Md_Avx_VarVarVarVar(const STATEMENT& statement)
{
auto dst = statement.dst->GetSymbol().get();
auto src1 = statement.src1->GetSymbol().get();
auto src2 = statement.src2->GetSymbol().get();
auto src3 = statement.src3->GetSymbol().get();

auto dstRegister = PrepareSymbolRegisterDefMd(dst, CX86Assembler::xMM0);
auto src1Register = PrepareSymbolRegisterUseMdAvx(src1, CX86Assembler::xMM1);
auto src2Register = PrepareSymbolRegisterUseMdAvx(src2, CX86Assembler::xMM2);

if(dstRegister != src1Register)
{
m_assembler.VmovapsVo(dstRegister, CX86Assembler::MakeXmmRegisterAddress(src1Register));
}

((m_assembler).*(MDOP::OpVoAvx()))(dstRegister, src2Register, MakeVariable128SymbolAddress(src3));
CommitSymbolRegisterMdAvx(dst, dstRegister);
}

template <typename MDOPSHIFT, uint8 SAMASK>
void CCodeGen_x86::Emit_Md_Avx_Shift_VarVarCst(const STATEMENT& statement)
{
Expand Down Expand Up @@ -620,6 +641,9 @@ CCodeGen_x86::CONSTMATCHER CCodeGen_x86::g_mdAvxConstMatchers[] =
{ OP_MD_TOWORD_TRUNCATE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Avx_VarVar<MDOP_TOWORD_TRUNCATE> },
{ OP_MD_TOSINGLE, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Avx_VarVar<MDOP_TOSINGLE> },

{OP_MD_MULADD, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, &CCodeGen_x86::Emit_Md_Avx_VarVarVarVar<MDOP_MULADD213>},
{OP_MD_MULSUB, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, MATCH_VARIABLE128, &CCodeGen_x86::Emit_Md_Avx_VarVarVarVar<MDOP_MULSUB213>},

{ OP_MD_EXPAND, MATCH_VARIABLE128, MATCH_VARIABLE, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Avx_Expand_VarVar },
{ OP_MD_EXPAND, MATCH_VARIABLE128, MATCH_CONSTANT, MATCH_NIL, MATCH_NIL, &CCodeGen_x86::Emit_Md_Avx_Expand_VarCst },

Expand Down
20 changes: 20 additions & 0 deletions src/X86Assembler_Avx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,26 @@ void CX86Assembler::VblendpsVo(XMMREGISTER dst, XMMREGISTER src1, const CAddress
WriteByte(mask);
}

void CX86Assembler::Vfmadd213ssVo(XMMREGISTER dst, const XMMREGISTER src1, const CAddress& src2)
{
WriteVexVoOp(VEX_OPCODE_MAP_66_38, 0xA9, dst, src1, src2);
}

void CX86Assembler::Vfnmadd213ssVo(XMMREGISTER dst, const XMMREGISTER src1, const CAddress& src2)
{
WriteVexVoOp(VEX_OPCODE_MAP_66_38, 0xAD, dst, src1, src2);
}

void CX86Assembler::Vfmadd213psVo(XMMREGISTER dst, const XMMREGISTER src1, const CAddress& src2)
{
WriteVexVoOp(VEX_OPCODE_MAP_66_38, 0xA8, dst, src1, src2);
}

void CX86Assembler::Vfnmadd213psVo(XMMREGISTER dst, const XMMREGISTER src1, const CAddress& src2)
{
WriteVexVoOp(VEX_OPCODE_MAP_66_38, 0xAC, dst, src1, src2);
}

void CX86Assembler::VshufpsVo(XMMREGISTER dst, XMMREGISTER src1, const CAddress& src2, uint8 shuffleByte)
{
WriteVexVoOp(VEX_OPCODE_MAP_NONE, 0xC6, dst, src1, src2);
Expand Down
81 changes: 81 additions & 0 deletions tests/DoubleFusedMultiAddTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#include "DoubleFusedMultiAddTest.h"
#include "MemStream.h"

CDoubleFusedMultiAddTest::CDoubleFusedMultiAddTest()
{
}

CDoubleFusedMultiAddTest::~CDoubleFusedMultiAddTest()
{
}

void CDoubleFusedMultiAddTest::Compile(Jitter::CJitter& jitter)
{
Framework::CMemStream codeStream;
jitter.SetStream(&codeStream);

jitter.Begin();
{
jitter.MD_PushRel(offsetof(CONTEXT, number3));
jitter.MD_PushRel(offsetof(CONTEXT, number2));
jitter.MD_PushRel(offsetof(CONTEXT, number1));
jitter.MD_MulAdd();
jitter.MD_PullRel(offsetof(CONTEXT, res1));

jitter.MD_PushRel(offsetof(CONTEXT, number4));
jitter.MD_PushRel(offsetof(CONTEXT, number2));
jitter.MD_PushRel(offsetof(CONTEXT, number2));
jitter.MD_MulAdd();
jitter.MD_PullRel(offsetof(CONTEXT, res2));

jitter.MD_PushRel(offsetof(CONTEXT, number3));
jitter.MD_PushRel(offsetof(CONTEXT, number4));
jitter.MD_PushRel(offsetof(CONTEXT, number2));
jitter.MD_MulAdd();
jitter.MD_PullRel(offsetof(CONTEXT, res3));

jitter.MD_PushRel(offsetof(CONTEXT, number3));
jitter.MD_PushRel(offsetof(CONTEXT, number2));
jitter.MD_PushRel(offsetof(CONTEXT, number1));
jitter.MD_MulS();
jitter.MD_AddS();
jitter.MD_PullRel(offsetof(CONTEXT, res4));

jitter.MD_PushRel(offsetof(CONTEXT, number4));
jitter.MD_PushRel(offsetof(CONTEXT, number2));
jitter.MD_PushRel(offsetof(CONTEXT, number2));
jitter.MD_MulS();
jitter.MD_AddS();
jitter.MD_PullRel(offsetof(CONTEXT, res5));

jitter.MD_PushRel(offsetof(CONTEXT, number3));
jitter.MD_PushRel(offsetof(CONTEXT, number4));
jitter.MD_PushRel(offsetof(CONTEXT, number2));
jitter.MD_MulS();
jitter.MD_AddS();
jitter.MD_PullRel(offsetof(CONTEXT, res6));
}
jitter.End();

m_function = CMemoryFunction(codeStream.GetBuffer(), codeStream.GetSize());
}

void CDoubleFusedMultiAddTest::Run()
{
memset(&m_context, 0, sizeof(CONTEXT));
m_context.number1[1] = 1 ;
m_context.number2[1] = 2 ;
m_context.number3[1] = 4 ;
m_context.number4[1] = 16;
m_context.number1[2] = 1 ;
m_context.number2[2] = 2 ;
m_context.number3[2] = 4 ;
m_context.number4[2] = 16;
m_function(&m_context);
for(int i = 0; i < 4; ++i)
{
TEST_VERIFY(m_context.res1[i] == m_context.res4[i]);
TEST_VERIFY(m_context.res2[i] == m_context.res5[i]);
TEST_VERIFY(m_context.res3[i] == m_context.res6[i]);
}
}
Loading