Replies: 7 comments 10 replies
-
It should be simple enough to just change the check: void drcbe_arm64::call_arm_addr(a64::Assembler &a, const void *offs) const
{
const uint64_t codeoffs = a.code()->baseAddress() + a.offset();
const int64_t reloffs = (int64_t)offs - codeoffs;
if (is_valid_immediate_signed(reloffs, 26 + 2))
{
a.bl(offs);
}
else
{
get_imm_relative(a, SCRATCH_REG1, uintptr_t(offs));
a.blr(SCRATCH_REG1);
}
} |
Beta Was this translation helpful? Give feedback.
-
template <unsigned Shift>
void drcbe_arm64::emit_ldr_str_base_mem(a64::Assembler &a, a64::Inst::Id opcode, const a64::Reg ®, const void *ptr) const
{
// If it can fit as a constant offset
const int64_t diff = (int64_t)ptr - (int64_t)m_baseptr;
if (is_valid_immediate_signed(diff / size, 9 + Shift))
{
a.emit(opcode, reg, arm::Mem(BASE_REG, diff));
return;
}
// If it can fit as an offset relative to PC
const uint64_t codeoffs = a.code()->baseAddress() + a.offset();
const int64_t reloffs = (int64_t)ptr - codeoffs;
if (is_valid_immediate_signed(reloffs, 21))
{
a.adr(MEM_SCRATCH_REG, ptr);
a.emit(opcode, reg, arm::Mem(MEM_SCRATCH_REG));
return;
}
if (diff > 0 && is_valid_immediate(diff, 16))
{
a.mov(MEM_SCRATCH_REG, diff);
a.emit(opcode, reg, arm::Mem(BASE_REG, MEM_SCRATCH_REG));
return;
}
if (diff > 0 && emit_add_optimized(a, MEM_SCRATCH_REG, BASE_REG, diff))
{
a.emit(opcode, reg, arm::Mem(MEM_SCRATCH_REG));
return;
}
else if (diff < 0 && emit_sub_optimized(a, MEM_SCRATCH_REG, BASE_REG, diff))
{
a.emit(opcode, reg, arm::Mem(MEM_SCRATCH_REG));
return;
}
if (diff >= 0)
{
int shift = 0;
int max_shift = 0;
if (opcode == a64::Inst::kIdLdrb || opcode == a64::Inst::kIdLdrsb)
max_shift = 0;
else if (opcode == a64::Inst::kIdLdrh || opcode == a64::Inst::kIdLdrsh)
max_shift = 1;
else if (opcode == a64::Inst::kIdLdrsw)
max_shift = 2;
else
max_shift = (reg.isGpW() || reg.isVecS()) ? 2 : 3;
for (int i = 0; i < 64 && max_shift > 0; i++)
{
if ((uint64_t)ptr & ((uint64_t)(1) << i))
{
shift = i;
break;
}
}
if (shift > max_shift)
shift = max_shift;
if (is_valid_immediate(diff >> shift, 32))
{
a.mov(MEM_SCRATCH_REG, diff >> shift);
if (shift)
a.emit(opcode, reg, arm::Mem(BASE_REG, MEM_SCRATCH_REG, arm::Shift(arm::ShiftOp::kLSL, shift)));
else
a.emit(opcode, reg, arm::Mem(BASE_REG, MEM_SCRATCH_REG));
return;
}
}
const uint64_t pagebase = codeoffs & ~make_bitmask<uint64_t>(12);
const int64_t pagerel = (int64_t)ptr - pagebase;
if (is_valid_immediate_signed(pagerel, 33))
{
const uint64_t targetpage = (uint64_t)ptr & ~make_bitmask<uint64_t>(12);
const uint64_t pageoffs = (uint64_t)ptr & util::make_bitmask<uint64_t>(12);
a.adrp(MEM_SCRATCH_REG, targetpage);
if (is_valid_immediate_signed(pageoffs, 9 + Shift))
{
a.emit(opcode, reg, arm::Mem(MEM_SCRATCH_REG, pageoffs));
}
else
{
a.add(MEM_SCRATCH_REG, MEM_SCRATCH_REG, pageoffs);
a.emit(opcode, reg, arm::Mem(MEM_SCRATCH_REG));
}
return;
}
// Can't optimize it at all, most likely becomes 4 MOV commands
a.mov(MEM_SCRATCH_REG, ptr);
a.emit(opcode, reg, arm::Mem(MEM_SCRATCH_REG));
} Then update the |
Beta Was this translation helpful? Give feedback.
-
|
Beta Was this translation helpful? Give feedback.
-
|
Beta Was this translation helpful? Give feedback.
-
These kinds of masks can be generated with PowerPC |
Beta Was this translation helpful? Give feedback.
-
|
Beta Was this translation helpful? Give feedback.
-
It should be possible to implement the equivalent of memory_access_specific directly in the generated code (it's a mask, a shift, a lookup of an object pointer in an array and a call to a virtual method of that object). The gain should be significant. |
Beta Was this translation helpful? Give feedback.
-
We don’t want to mess with the DRC back-ends too much before release now that they seem to be working. This discussion is to keep track of optimisations to look at later.
Beta Was this translation helpful? Give feedback.
All reactions