priv/host_generic_simd256.c \
priv/host_generic_maddf.c \
priv/host_generic_reg_alloc2.c \
+ priv/host_generic_reg_alloc3.c \
priv/host_x86_defs.c \
priv/host_x86_isel.c \
priv/host_amd64_defs.c \
381272 ppc64 doesn't compile test_isa_2_06_partx.c without VSX support
381289 epoll_pwait can have a NULL sigmask
381274 powerpc too chatty even with --sigill-diagnostics=no
+381553 VEX register allocator v3
381769 Use ucontext_t instead of struct ucontext
381805 arm32 needs ld.so index hardwire for new glibc security fixes
382256 gz compiler flag test doesn't work for gold
/* Add the registers. The initial segment of this array must be
those available for allocation by reg-alloc, and those that
follow are not available for allocation. */
+ ru->allocable_start[HRcInt64] = ru->size;
ru->regs[ru->size++] = hregAMD64_RSI();
ru->regs[ru->size++] = hregAMD64_RDI();
ru->regs[ru->size++] = hregAMD64_R8();
ru->regs[ru->size++] = hregAMD64_R14();
ru->regs[ru->size++] = hregAMD64_R15();
ru->regs[ru->size++] = hregAMD64_RBX();
+ ru->regs[ru->size++] = hregAMD64_R10();
+ ru->allocable_end[HRcInt64] = ru->size - 1;
+
+ ru->allocable_start[HRcVec128] = ru->size;
ru->regs[ru->size++] = hregAMD64_XMM3();
ru->regs[ru->size++] = hregAMD64_XMM4();
ru->regs[ru->size++] = hregAMD64_XMM5();
ru->regs[ru->size++] = hregAMD64_XMM10();
ru->regs[ru->size++] = hregAMD64_XMM11();
ru->regs[ru->size++] = hregAMD64_XMM12();
- ru->regs[ru->size++] = hregAMD64_R10();
+ ru->allocable_end[HRcVec128] = ru->size - 1;
ru->allocable = ru->size;
+
/* And other regs, not available to the allocator. */
ru->regs[ru->size++] = hregAMD64_RAX();
ru->regs[ru->size++] = hregAMD64_RCX();
}
-void ppHRegAMD64 ( HReg reg )
+UInt ppHRegAMD64 ( HReg reg )
{
Int r;
static const HChar* ireg64_names[16]
"%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
- return;
+ return ppHReg(reg);
}
/* But specific for real regs. */
switch (hregClass(reg)) {
case HRcInt64:
r = hregEncoding(reg);
vassert(r >= 0 && r < 16);
- vex_printf("%s", ireg64_names[r]);
- return;
+ return vex_printf("%s", ireg64_names[r]);
case HRcVec128:
r = hregEncoding(reg);
vassert(r >= 0 && r < 16);
- vex_printf("%%xmm%d", r);
- return;
+ return vex_printf("%%xmm%d", r);
default:
vpanic("ppHRegAMD64");
}
}
-static void ppHRegAMD64_lo32 ( HReg reg )
+static UInt ppHRegAMD64_lo32 ( HReg reg )
{
Int r;
static const HChar* ireg32_names[16]
"%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
- vex_printf("d");
- return;
+ UInt written = ppHReg(reg);
+ written += vex_printf("d");
+ return written;
}
/* But specific for real regs. */
switch (hregClass(reg)) {
case HRcInt64:
r = hregEncoding(reg);
vassert(r >= 0 && r < 16);
- vex_printf("%s", ireg32_names[r]);
- return;
+ return vex_printf("%s", ireg32_names[r]);
default:
vpanic("ppHRegAMD64_lo32: invalid regclass");
}
}
}
+AMD64Instr* genMove_AMD64(HReg from, HReg to, Bool mode64)
+{
+ switch (hregClass(from)) {
+ case HRcInt64:
+ return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(from), to);
+ case HRcVec128:
+ return AMD64Instr_SseReRg(Asse_MOV, from, to);
+ default:
+ ppHRegClass(hregClass(from));
+ vpanic("genMove_AMD64: unimplemented regclass");
+ }
+}
+
AMD64Instr* directReload_AMD64( AMD64Instr* i, HReg vreg, Short spill_off )
{
vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
ST_IN HReg hregAMD64_R14 ( void ) { return mkHReg(False, HRcInt64, 14, 6); }
ST_IN HReg hregAMD64_R15 ( void ) { return mkHReg(False, HRcInt64, 15, 7); }
ST_IN HReg hregAMD64_RBX ( void ) { return mkHReg(False, HRcInt64, 3, 8); }
-
-ST_IN HReg hregAMD64_XMM3 ( void ) { return mkHReg(False, HRcVec128, 3, 9); }
-ST_IN HReg hregAMD64_XMM4 ( void ) { return mkHReg(False, HRcVec128, 4, 10); }
-ST_IN HReg hregAMD64_XMM5 ( void ) { return mkHReg(False, HRcVec128, 5, 11); }
-ST_IN HReg hregAMD64_XMM6 ( void ) { return mkHReg(False, HRcVec128, 6, 12); }
-ST_IN HReg hregAMD64_XMM7 ( void ) { return mkHReg(False, HRcVec128, 7, 13); }
-ST_IN HReg hregAMD64_XMM8 ( void ) { return mkHReg(False, HRcVec128, 8, 14); }
-ST_IN HReg hregAMD64_XMM9 ( void ) { return mkHReg(False, HRcVec128, 9, 15); }
-ST_IN HReg hregAMD64_XMM10 ( void ) { return mkHReg(False, HRcVec128, 10, 16); }
-ST_IN HReg hregAMD64_XMM11 ( void ) { return mkHReg(False, HRcVec128, 11, 17); }
-ST_IN HReg hregAMD64_XMM12 ( void ) { return mkHReg(False, HRcVec128, 12, 18); }
-
-ST_IN HReg hregAMD64_R10 ( void ) { return mkHReg(False, HRcInt64, 10, 19); }
+ST_IN HReg hregAMD64_R10 ( void ) { return mkHReg(False, HRcInt64, 10, 9); }
+
+ST_IN HReg hregAMD64_XMM3 ( void ) { return mkHReg(False, HRcVec128, 3, 10); }
+ST_IN HReg hregAMD64_XMM4 ( void ) { return mkHReg(False, HRcVec128, 4, 11); }
+ST_IN HReg hregAMD64_XMM5 ( void ) { return mkHReg(False, HRcVec128, 5, 12); }
+ST_IN HReg hregAMD64_XMM6 ( void ) { return mkHReg(False, HRcVec128, 6, 13); }
+ST_IN HReg hregAMD64_XMM7 ( void ) { return mkHReg(False, HRcVec128, 7, 14); }
+ST_IN HReg hregAMD64_XMM8 ( void ) { return mkHReg(False, HRcVec128, 8, 15); }
+ST_IN HReg hregAMD64_XMM9 ( void ) { return mkHReg(False, HRcVec128, 9, 16); }
+ST_IN HReg hregAMD64_XMM10 ( void ) { return mkHReg(False, HRcVec128, 10, 17); }
+ST_IN HReg hregAMD64_XMM11 ( void ) { return mkHReg(False, HRcVec128, 11, 18); }
+ST_IN HReg hregAMD64_XMM12 ( void ) { return mkHReg(False, HRcVec128, 12, 19); }
ST_IN HReg hregAMD64_RAX ( void ) { return mkHReg(False, HRcInt64, 0, 20); }
ST_IN HReg hregAMD64_RCX ( void ) { return mkHReg(False, HRcInt64, 1, 21); }
ST_IN HReg hregAMD64_XMM1 ( void ) { return mkHReg(False, HRcVec128, 1, 27); }
#undef ST_IN
-extern void ppHRegAMD64 ( HReg );
+extern UInt ppHRegAMD64 ( HReg );
/* --------- Condition codes, AMD encoding. --------- */
HReg rreg, Int offset, Bool );
extern void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offset, Bool );
-
+extern AMD64Instr* genMove_AMD64(HReg from, HReg to, Bool);
extern AMD64Instr* directReload_AMD64 ( AMD64Instr* i,
HReg vreg, Short spill_off );
/* Add the registers. The initial segment of this array must be
those available for allocation by reg-alloc, and those that
follow are not available for allocation. */
-
+ ru->allocable_start[HRcInt64] = ru->size;
ru->regs[ru->size++] = hregARM64_X22();
ru->regs[ru->size++] = hregARM64_X23();
ru->regs[ru->size++] = hregARM64_X24();
ru->regs[ru->size++] = hregARM64_X5();
ru->regs[ru->size++] = hregARM64_X6();
ru->regs[ru->size++] = hregARM64_X7();
+ ru->allocable_end[HRcInt64] = ru->size - 1;
// X8 is used as a ProfInc temporary, not available to regalloc.
// X9 is a chaining/spill temporary, not available to regalloc.
// X21 is the guest state pointer, not available to regalloc.
// vector regs. Unfortunately not callee-saved.
+ ru->allocable_start[HRcVec128] = ru->size;
ru->regs[ru->size++] = hregARM64_Q16();
ru->regs[ru->size++] = hregARM64_Q17();
ru->regs[ru->size++] = hregARM64_Q18();
ru->regs[ru->size++] = hregARM64_Q19();
ru->regs[ru->size++] = hregARM64_Q20();
+ ru->allocable_end[HRcVec128] = ru->size - 1;
// F64 regs, all of which are callee-saved
+ ru->allocable_start[HRcFlt64] = ru->size;
ru->regs[ru->size++] = hregARM64_D8();
ru->regs[ru->size++] = hregARM64_D9();
ru->regs[ru->size++] = hregARM64_D10();
ru->regs[ru->size++] = hregARM64_D11();
ru->regs[ru->size++] = hregARM64_D12();
ru->regs[ru->size++] = hregARM64_D13();
+ ru->allocable_end[HRcFlt64] = ru->size - 1;
ru->allocable = ru->size;
/* And other regs, not available to the allocator. */
}
-void ppHRegARM64 ( HReg reg ) {
+UInt ppHRegARM64 ( HReg reg ) {
Int r;
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
- return;
+ return ppHReg(reg);
}
/* But specific for real regs. */
switch (hregClass(reg)) {
case HRcInt64:
r = hregEncoding(reg);
vassert(r >= 0 && r < 31);
- vex_printf("x%d", r);
- return;
+ return vex_printf("x%d", r);
case HRcFlt64:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("d%d", r);
- return;
+ return vex_printf("d%d", r);
case HRcVec128:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("q%d", r);
- return;
+ return vex_printf("q%d", r);
default:
vpanic("ppHRegARM64");
}
}
-static void ppHRegARM64asSreg ( HReg reg ) {
- ppHRegARM64(reg);
- vex_printf("(S-reg)");
+static UInt ppHRegARM64asSreg ( HReg reg ) {
+ UInt written = ppHRegARM64(reg);
+ written += vex_printf("(S-reg)");
+ return written;
}
-static void ppHRegARM64asHreg ( HReg reg ) {
- ppHRegARM64(reg);
- vex_printf("(H-reg)");
+static UInt ppHRegARM64asHreg ( HReg reg ) {
+ UInt written = ppHRegARM64(reg);
+ written += vex_printf("(H-reg)");
+ return written;
}
ppHRegARM64asSreg(i->ARM64in.VCmpS.argR);
return;
case ARM64in_VFCSel: {
- void (*ppHRegARM64fp)(HReg)
+ UInt (*ppHRegARM64fp)(HReg)
= (i->ARM64in.VFCSel.isD ? ppHRegARM64 : ppHRegARM64asSreg);
vex_printf("fcsel ");
ppHRegARM64fp(i->ARM64in.VFCSel.dst);
}
}
+ARM64Instr* genMove_ARM64(HReg from, HReg to, Bool mode64)
+{
+ switch (hregClass(from)) {
+ case HRcInt64:
+ return ARM64Instr_MovI(to, from);
+ case HRcFlt64:
+ return ARM64Instr_VMov(8, to, from);
+ case HRcVec128:
+ return ARM64Instr_VMov(16, to, from);
+ default:
+ ppHRegClass(hregClass(from));
+ vpanic("genMove_ARM64: unimplemented regclass");
+ }
+}
+
/* Emit an instruction into buf and return the number of bytes used.
Note that buf is not the insn's final place, and therefore it is
ST_IN HReg hregARM64_X21 ( void ) { return mkHReg(False, HRcInt64, 21, 28); }
#undef ST_IN
-extern void ppHRegARM64 ( HReg );
+extern UInt ppHRegARM64 ( HReg );
/* Number of registers used arg passing in function calls */
#define ARM64_N_ARGREGS 8 /* x0 .. x7 */
HReg rreg, Int offset, Bool );
extern void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offset, Bool );
+extern ARM64Instr* genMove_ARM64(HReg from, HReg to, Bool);
extern const RRegUniverse* getRRegUniverse_ARM64 ( void );
/* Callee saves ones are listed first, since we prefer them
if they're available. */
+ ru->allocable_start[HRcInt32] = ru->size;
ru->regs[ru->size++] = hregARM_R4();
ru->regs[ru->size++] = hregARM_R5();
ru->regs[ru->size++] = hregARM_R6();
ru->regs[ru->size++] = hregARM_R2();
ru->regs[ru->size++] = hregARM_R3();
ru->regs[ru->size++] = hregARM_R9();
+ ru->allocable_end[HRcInt32] = ru->size - 1;
+
/* FP registers. Note: these are all callee-save. Yay! Hence we
don't need to mention them as trashed in getHRegUsage for
ARMInstr_Call. */
+ ru->allocable_start[HRcFlt64] = ru->size;
ru->regs[ru->size++] = hregARM_D8();
ru->regs[ru->size++] = hregARM_D9();
ru->regs[ru->size++] = hregARM_D10();
ru->regs[ru->size++] = hregARM_D11();
ru->regs[ru->size++] = hregARM_D12();
+ ru->allocable_end[HRcFlt64] = ru->size - 1;
+
+ ru->allocable_start[HRcFlt32] = ru->size;
ru->regs[ru->size++] = hregARM_S26();
ru->regs[ru->size++] = hregARM_S27();
ru->regs[ru->size++] = hregARM_S28();
ru->regs[ru->size++] = hregARM_S29();
ru->regs[ru->size++] = hregARM_S30();
+ ru->allocable_end[HRcFlt32] = ru->size - 1;
+
+ ru->allocable_start[HRcVec128] = ru->size;
ru->regs[ru->size++] = hregARM_Q8();
ru->regs[ru->size++] = hregARM_Q9();
ru->regs[ru->size++] = hregARM_Q10();
ru->regs[ru->size++] = hregARM_Q11();
ru->regs[ru->size++] = hregARM_Q12();
+ ru->allocable_end[HRcVec128] = ru->size - 1;
ru->allocable = ru->size;
/* And other regs, not available to the allocator. */
}
-void ppHRegARM ( HReg reg ) {
+UInt ppHRegARM ( HReg reg ) {
Int r;
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
- return;
+ return ppHReg(reg);
}
/* But specific for real regs. */
switch (hregClass(reg)) {
case HRcInt32:
r = hregEncoding(reg);
vassert(r >= 0 && r < 16);
- vex_printf("r%d", r);
- return;
+ return vex_printf("r%d", r);
case HRcFlt64:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("d%d", r);
- return;
+ return vex_printf("d%d", r);
case HRcFlt32:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("s%d", r);
- return;
+ return vex_printf("s%d", r);
case HRcVec128:
r = hregEncoding(reg);
vassert(r >= 0 && r < 16);
- vex_printf("q%d", r);
- return;
+ return vex_printf("q%d", r);
default:
vpanic("ppHRegARM");
}
}
}
+ARMInstr* genMove_ARM(HReg from, HReg to, Bool mode64)
+{
+ switch (hregClass(from)) {
+ case HRcInt32:
+ return ARMInstr_Mov(to, ARMRI84_R(from));
+ case HRcFlt32:
+ return ARMInstr_VUnaryS(ARMvfpu_COPY, to, from);
+ case HRcFlt64:
+ return ARMInstr_VUnaryD(ARMvfpu_COPY, to, from);
+ case HRcVec128:
+ return ARMInstr_NUnary(ARMneon_COPY, to, from, 4, False);
+ default:
+ ppHRegClass(hregClass(from));
+ vpanic("genMove_ARM: unimplemented regclass");
+ }
+}
/* Emit an instruction into buf and return the number of bytes used.
Note that buf is not the insn's final place, and therefore it is
ST_IN HReg hregARM_Q15 ( void ) { return mkHReg(False, HRcVec128, 15, 33); }
#undef ST_IN
-extern void ppHRegARM ( HReg );
+extern UInt ppHRegARM ( HReg );
/* Number of registers used arg passing in function calls */
#define ARM_N_ARGREGS 4 /* r0, r1, r2, r3 */
HReg rreg, Int offset, Bool );
extern void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offset, Bool );
+extern ARMInstr* genMove_ARM(HReg from, HReg to, Bool);
extern const RRegUniverse* getRRegUniverse_ARM ( void );
}
-/* Vectorised memset, copied from Valgrind's m_libcbase.c. */
-static void* local_memset ( void *destV, Int c, SizeT sz )
-{
-# define IS_4_ALIGNED(aaa_p) (0 == (((HWord)(aaa_p)) & ((HWord)0x3)))
-
- UInt c4;
- UChar* d = destV;
- UChar uc = c;
-
- while ((!IS_4_ALIGNED(d)) && sz >= 1) {
- d[0] = uc;
- d++;
- sz--;
- }
- if (sz == 0)
- return destV;
- c4 = uc;
- c4 |= (c4 << 8);
- c4 |= (c4 << 16);
- while (sz >= 16) {
- ((UInt*)d)[0] = c4;
- ((UInt*)d)[1] = c4;
- ((UInt*)d)[2] = c4;
- ((UInt*)d)[3] = c4;
- d += 16;
- sz -= 16;
- }
- while (sz >= 4) {
- ((UInt*)d)[0] = c4;
- d += 4;
- sz -= 4;
- }
- while (sz >= 1) {
- d[0] = c;
- d++;
- sz--;
- }
- return destV;
-
-# undef IS_4_ALIGNED
-}
-
-
/* A target-independent register allocator. Requires various
functions which it uses to deal abstractly with instructions and
registers, since it cannot have any target-specific knowledge.
Takes an expandable array of pointers to unallocated insns.
Returns an expandable array of pointers to allocated insns.
*/
-HInstrArray* doRegisterAllocation (
+HInstrArray* doRegisterAllocation_v2 (
/* Incoming virtual-registerised code. */
HInstrArray* instrs_in,
- /* The real-register universe to use. This contains facts about
- real registers, one of which is the set of registers available
- for allocation. */
- const RRegUniverse* univ,
-
- /* Return True iff the given insn is a reg-reg move, in which
- case also return the src and dst regs. */
- Bool (*isMove) ( const HInstr*, HReg*, HReg* ),
-
- /* Get info about register usage in this insn. */
- void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ),
-
- /* Apply a reg-reg mapping to an insn. */
- void (*mapRegs) ( HRegRemap*, HInstr*, Bool ),
-
- /* Return one, or, if we're unlucky, two insn(s) to spill/restore a
- real reg to a spill slot byte offset. The two leading HInstr**
- args are out parameters, through which the generated insns are
- returned. Also (optionally) a 'directReload' function, which
- attempts to replace a given instruction by one which reads
- directly from a specified spill slot. May be NULL, in which
- case the optimisation is not attempted. */
- void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ),
- void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
- HInstr* (*directReload) ( HInstr*, HReg, Short ),
- Int guest_sizeB,
-
- /* For debug printing only. */
- void (*ppInstr) ( const HInstr*, Bool ),
- void (*ppReg) ( HReg ),
-
- /* 32/64bit mode */
- Bool mode64
+ /* Register allocator controls to use. */
+ const RegAllocControl* con
)
{
# define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8)
not at each insn processed. */
Bool do_sanity_check;
- vassert(0 == (guest_sizeB % LibVEX_GUEST_STATE_ALIGN));
+ vassert(0 == (con->guest_sizeB % LibVEX_GUEST_STATE_ALIGN));
vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN));
vassert(0 == (N_SPILL64S % 2));
HInstr* _tmp = (_instr); \
if (DEBUG_REGALLOC) { \
vex_printf("** "); \
- (*ppInstr)(_tmp, mode64); \
+ con->ppInstr(_tmp, con->mode64); \
vex_printf("\n\n"); \
} \
addHInstr ( instrs_out, _tmp ); \
Int z, q; \
for (z = 0; z < n_rregs; z++) { \
vex_printf(" rreg_state[%2d] = ", z); \
- (*ppReg)(univ->regs[z]); \
+ con->ppReg(con->univ->regs[z]); \
vex_printf(" \t"); \
switch (rreg_state[z].disp) { \
case Free: vex_printf("Free\n"); break; \
case Unavail: vex_printf("Unavail\n"); break; \
case Bound: vex_printf("BoundTo "); \
- (*ppReg)(rreg_state[z].vreg); \
+ con->ppReg(rreg_state[z].vreg); \
vex_printf("\n"); break; \
} \
} \
/* ... and initialise running state. */
/* n_rregs is no more than a short name for n_available_real_regs. */
- n_rregs = univ->allocable;
+ n_rregs = con->univ->allocable;
n_vregs = instrs_in->n_vregs;
/* If this is not so, vreg_state entries will overflow. */
for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
- (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 );
+ con->getRegUsage(®_usage_arr[ii], instrs_in->arr[ii], con->mode64);
if (0) {
vex_printf("\n%d stage1: ", ii);
- (*ppInstr)(instrs_in->arr[ii], mode64);
+ con->ppInstr(instrs_in->arr[ii], con->mode64);
vex_printf("\n");
- ppHRegUsage(univ, ®_usage_arr[ii]);
+ ppHRegUsage(con->univ, ®_usage_arr[ii]);
}
/* ------ start of DEAL WITH VREG LIVE RANGES ------ */
Int k = hregIndex(vreg);
if (k < 0 || k >= n_vregs) {
vex_printf("\n");
- (*ppInstr)(instrs_in->arr[ii], mode64);
+ con->ppInstr(instrs_in->arr[ii], con->mode64);
vex_printf("\n");
vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
vpanic("doRegisterAllocation: out-of-range vreg");
} else if (!isW && isR) {
if (rreg_live_after[j] == INVALID_INSTRNO) {
vex_printf("\nOFFENDING RREG = ");
- (*ppReg)(univ->regs[j]);
+ con->ppReg(con->univ->regs[j]);
vex_printf("\n");
vex_printf("\nOFFENDING instr = ");
- (*ppInstr)(instrs_in->arr[ii], mode64);
+ con->ppInstr(instrs_in->arr[ii], con->mode64);
vex_printf("\n");
vpanic("doRegisterAllocation: "
"first event for rreg is Read");
vassert(isR && isW);
if (rreg_live_after[j] == INVALID_INSTRNO) {
vex_printf("\nOFFENDING RREG = ");
- (*ppReg)(univ->regs[j]);
+ con->ppReg(con->univ->regs[j]);
vex_printf("\n");
vex_printf("\nOFFENDING instr = ");
- (*ppInstr)(instrs_in->arr[ii], mode64);
+ con->ppInstr(instrs_in->arr[ii], con->mode64);
vex_printf("\n");
vpanic("doRegisterAllocation: "
"first event for rreg is Modify");
ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
if (0)
vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
- rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j];
+ rreg_lrs_la[rreg_lrs_used].rreg = con->univ->regs[j];
rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la);
rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
rreg_lrs_used++;
if (0)
vex_printf("FLUSH 2 (%d,%d)\n",
rreg_live_after[j], rreg_dead_before[j]);
- rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j];
+ rreg_lrs_la[rreg_lrs_used].rreg = con->univ->regs[j];
rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]);
rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
rreg_lrs_used++;
for (Int j = 0; j < n_rregs; j++) {
if (!rreg_state[j].has_hlrs)
continue;
- ppReg(univ->regs[j]);
+ con->ppReg(con->univ->regs[j]);
vex_printf(" hinted\n");
}
}
vex_printf("RRegLRs by LA:\n");
for (Int j = 0; j < rreg_lrs_used; j++) {
vex_printf(" ");
- (*ppReg)(rreg_lrs_la[j].rreg);
+ con->ppReg(rreg_lrs_la[j].rreg);
vex_printf(" la = %d, db = %d\n",
rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
}
vex_printf("RRegLRs by DB:\n");
for (Int j = 0; j < rreg_lrs_used; j++) {
vex_printf(" ");
- (*ppReg)(rreg_lrs_db[j].rreg);
+ con->ppReg(rreg_lrs_db[j].rreg);
vex_printf(" la = %d, db = %d\n",
rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
}
*/
/* Int max_ss_no = -1; */
- local_memset(ss_busy_until_before, 0, sizeof(ss_busy_until_before));
+ vex_bzero(ss_busy_until_before, sizeof(ss_busy_until_before));
for (Int j = 0; j < n_vregs; j++) {
/* This reflects LibVEX's hard-wired knowledge of the baseBlock
layout: the guest state, then two equal sized areas following
it for two sets of shadow state, and then the spill area. */
- vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + ss_no * 8);
+ vreg_lrs[j].spill_offset = toShort(con->guest_sizeB * 3 + ss_no * 8);
/* Independent check that we've made a sane choice of slot */
sanity_check_spill_offset( &vreg_lrs[j] );
if (DEBUG_REGALLOC) {
vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
vex_printf("---- ");
- (*ppInstr)(instrs_in->arr[ii], mode64);
+ con->ppInstr(instrs_in->arr[ii], con->mode64);
vex_printf("\n\nInitial state:\n");
PRINT_STATE;
vex_printf("\n");
vex_printf("considering la %d .. db %d reg = ",
rreg_lrs_la[j].live_after,
rreg_lrs_la[j].dead_before);
- (*ppReg)(reg);
+ con->ppReg(reg);
vex_printf("\n");
}
vassert(rreg_state[j].eq_spill_slot == False);
continue;
}
- vassert(hregClass(univ->regs[j])
+ vassert(hregClass(con->univ->regs[j])
== hregClass(rreg_state[j].vreg));
vassert( hregIsVirtual(rreg_state[j].vreg));
}
the dst to the src's rreg, and that's all. */
HReg vregS = INVALID_HREG;
HReg vregD = INVALID_HREG;
- if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) {
+ if ( con->isMove(instrs_in->arr[ii], &vregS, &vregD) ) {
if (!hregIsVirtual(vregS)) goto cannot_coalesce;
if (!hregIsVirtual(vregD)) goto cannot_coalesce;
/* Check that *isMove is not telling us a bunch of lies ... */
if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
if (DEBUG_REGALLOC) {
vex_printf("COALESCE ");
- (*ppReg)(vregS);
+ con->ppReg(vregS);
vex_printf(" -> ");
- (*ppReg)(vregD);
+ con->ppReg(vregD);
vex_printf("\n\n");
}
/* Find the state entry for vregS. */
vreg_state[m] = INVALID_RREG_NO;
if (DEBUG_REGALLOC) {
vex_printf("free up ");
- (*ppReg)(univ->regs[j]);
+ con->ppReg(con->univ->regs[j]);
vex_printf("\n");
}
}
than before it. */
if (DEBUG_REGALLOC) {
vex_printf("need to free up rreg: ");
- (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
+ con->ppReg(rreg_lrs_la[rreg_lrs_la_next].rreg);
vex_printf("\n\n");
}
Int k = hregIndex(rreg_lrs_la[rreg_lrs_la_next].rreg);
if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
HInstr* spill1 = NULL;
HInstr* spill2 = NULL;
- (*genSpill)( &spill1, &spill2, univ->regs[k],
- vreg_lrs[m].spill_offset, mode64 );
+ con->genSpill(&spill1, &spill2, con->univ->regs[k],
+ vreg_lrs[m].spill_offset, con->mode64);
vassert(spill1 || spill2); /* can't both be NULL */
if (spill1)
EMIT_INSTR(spill1);
that the change is invisible to the standard-case handling
that follows. */
- if (directReload && reg_usage_arr[ii].n_vRegs <= 2) {
+ if (con->directReload != NULL && reg_usage_arr[ii].n_vRegs <= 2) {
Bool debug_direct_reload = False;
HReg cand = INVALID_HREG;
Bool nreads = 0;
vassert(! sameHReg(reg_usage_arr[ii].vRegs[0],
reg_usage_arr[ii].vRegs[1]));
- reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
+ reloaded = con->directReload(instrs_in->arr[ii], cand, spilloff);
if (debug_direct_reload && !reloaded) {
vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
- ppInstr(instrs_in->arr[ii], mode64);
+ con->ppInstr(instrs_in->arr[ii], con->mode64);
}
if (reloaded) {
/* Update info about the insn, so it looks as if it had
been in this form all along. */
instrs_in->arr[ii] = reloaded;
- (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 );
+ con->getRegUsage(®_usage_arr[ii], instrs_in->arr[ii],
+ con->mode64);
if (debug_direct_reload && !reloaded) {
vex_printf(" --> ");
- ppInstr(reloaded, mode64);
+ con->ppInstr(reloaded, con->mode64);
}
}
vassert(hregIsVirtual(vreg));
if (0) {
- vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n");
+ vex_printf("considering "); con->ppReg(vreg); vex_printf("\n");
}
/* Now we're trying to find a rreg for "vreg". First of all,
Int n = vreg_state[m];
if (IS_VALID_RREGNO(n)) {
vassert(rreg_state[n].disp == Bound);
- addToHRegRemap(&remap, vreg, univ->regs[n]);
+ addToHRegRemap(&remap, vreg, con->univ->regs[n]);
/* If this rreg is written or modified, mark it as different
from any spill slot value. */
if (reg_usage_arr[ii].vMode[j] != HRmRead)
Int k;
for (k = 0; k < n_rregs; k++) {
if (rreg_state[k].disp != Free
- || hregClass(univ->regs[k]) != hregClass(vreg))
+ || hregClass(con->univ->regs[k]) != hregClass(vreg))
continue;
if (rreg_state[k].has_hlrs) {
/* Well, at least we can use k_suboptimal if we really
Int p = hregIndex(vreg);
vassert(IS_VALID_VREGNO(p));
vreg_state[p] = toShort(k);
- addToHRegRemap(&remap, vreg, univ->regs[k]);
+ addToHRegRemap(&remap, vreg, con->univ->regs[k]);
/* Generate a reload if needed. This only creates needed
reloads because the live range builder for vregs will
guarantee that the first event for a vreg is a write.
vassert(vreg_lrs[p].reg_class != HRcINVALID);
HInstr* reload1 = NULL;
HInstr* reload2 = NULL;
- (*genReload)( &reload1, &reload2, univ->regs[k],
- vreg_lrs[p].spill_offset, mode64 );
+ con->genReload(&reload1, &reload2, con->univ->regs[k],
+ vreg_lrs[p].spill_offset, con->mode64);
vassert(reload1 || reload2); /* can't both be NULL */
if (reload1)
EMIT_INSTR(reload1);
rreg_state[k].is_spill_cand = False;
if (rreg_state[k].disp != Bound)
continue;
- if (hregClass(univ->regs[k]) != hregClass(vreg))
+ if (hregClass(con->univ->regs[k]) != hregClass(vreg))
continue;
rreg_state[k].is_spill_cand = True;
/* Note, the following loop visits only the virtual regs
vassert(IS_VALID_RREGNO(spillee));
vassert(rreg_state[spillee].disp == Bound);
/* check it's the right class */
- vassert(hregClass(univ->regs[spillee]) == hregClass(vreg));
+ vassert(hregClass(con->univ->regs[spillee]) == hregClass(vreg));
/* check we're not ejecting the vreg for which we are trying
to free up a register. */
vassert(! sameHReg(rreg_state[spillee].vreg, vreg));
if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
HInstr* spill1 = NULL;
HInstr* spill2 = NULL;
- (*genSpill)( &spill1, &spill2, univ->regs[spillee],
- vreg_lrs[m].spill_offset, mode64 );
+ con->genSpill(&spill1, &spill2, con->univ->regs[spillee],
+ vreg_lrs[m].spill_offset, con->mode64);
vassert(spill1 || spill2); /* can't both be NULL */
if (spill1)
EMIT_INSTR(spill1);
vassert(vreg_lrs[m].reg_class != HRcINVALID);
HInstr* reload1 = NULL;
HInstr* reload2 = NULL;
- (*genReload)( &reload1, &reload2, univ->regs[spillee],
- vreg_lrs[m].spill_offset, mode64 );
+ con->genReload(&reload1, &reload2, con->univ->regs[spillee],
+ vreg_lrs[m].spill_offset, con->mode64);
vassert(reload1 || reload2); /* can't both be NULL */
if (reload1)
EMIT_INSTR(reload1);
/* So after much twisting and turning, we have vreg mapped to
rreg_state[spillee].rreg. Note that in the map. */
- addToHRegRemap(&remap, vreg, univ->regs[spillee]);
+ addToHRegRemap(&remap, vreg, con->univ->regs[spillee]);
} /* iterate over virtual registers in this instruction. */
*/
/* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
- (*mapRegs)( &remap, instrs_in->arr[ii], mode64 );
+ con->mapRegs(&remap, instrs_in->arr[ii], con->mode64);
EMIT_INSTR( instrs_in->arr[ii] );
if (DEBUG_REGALLOC) {
--- /dev/null
+/*----------------------------------------------------------------------------*/
+/*--- begin host_generic_reg_alloc3.c ---*/
+/*----------------------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation framework.
+
+ Copyright (C) 2017-2017 Ivo Raisr
+ ivosh@ivosh.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex.h"
+
+#include "main_util.h"
+#include "host_generic_regs.h"
+
+/* Set to 1 for lots of debugging output. */
+#define DEBUG_REGALLOC 0
+
+/* Set to 1 for sanity checking at every instruction.
+ Set to 0 for sanity checking only every 17th one and the last one. */
+#define SANITY_CHECKS_EVERY_INSTR 0
+
+
+#define INVALID_INSTRNO (-2)
+
+/* Register allocator state is kept in an array of VRegState's.
+ There is an element for every virtual register (vreg).
+ Elements are indexed [0 .. n_vregs-1].
+ Records information about vreg live range and its state. */
+typedef
+ struct {
+ /* Live range, register class and spill offset are computed during the
+ first register allocator pass and remain unchanged after that. */
+
+ /* This vreg becomes live with this instruction (inclusive). Contains
+ either an instruction number or INVALID_INSTRNO. */
+ Short live_after;
+ /* This vreg becomes dead before this instruction (exclusive). Contains
+ either an instruction number or INVALID_INSTRNO. */
+ Short dead_before;
+ /* What kind of register this is. */
+ HRegClass reg_class;
+
+ /* What is its current disposition? */
+ enum { Unallocated, /* Neither spilled nor assigned to a real reg. */
+ Assigned, /* Assigned to a real register, viz rreg. */
+ Spilled /* Spilled to the spill slot. */
+ } disp;
+
+ /* If .disp == Assigned, what rreg is it bound to? */
+ HReg rreg;
+
+ /* The "home" spill slot. The offset is relative to the beginning of
+ the guest state. */
+ UShort spill_offset;
+ }
+ VRegState;
+
+/* The allocator also maintains a redundant array of indexes (rreg_state) from
+ rreg numbers back to entries in vreg_state. It is redundant because iff
+ rreg_state[r] == v then hregNumber(vreg_state[v].rreg) == r -- that is, the
+ two entries point at each other. The purpose of this is to speed up
+ activities which involve looking for a particular rreg: there is no need to
+ scan the vreg_state looking for it, just index directly into rreg_state.
+ The FAQ "does this rreg already have an associated vreg" is the main
+ beneficiary.
+ The identity of the real register is not recorded here, because the index
+ of this structure in |rreg_state| is the index number of the register, and
+ the register itself can be extracted from the RRegUniverse (univ). */
+typedef
+ struct {
+ /* What is its current disposition? */
+ enum { Free, /* Not bound to any vreg. */
+ Bound, /* Bound to a vreg, viz vreg. */
+ Reserved /* Reserved for an instruction. */
+ } disp;
+
+ /* If .disp == Bound, what vreg is it bound to? */
+ HReg vreg;
+ }
+ RRegState;
+
+/* Records information on a real-register live range, associated with
+ a particular real register. Computed once; does not change. */
+typedef
+ struct {
+ /* This rreg becomes live with this instruction (inclusive). Contains
+ either an instruction number or INVALID_INSTRNO. */
+ Short live_after;
+ /* This rreg becomes dead before this instruction (exclusive). Contains
+ either an instruction number or INVALID_INSTRNO. */
+ Short dead_before;
+ }
+ RRegLR;
+
+/* Live ranges for a single rreg and the current one.
+ Live ranges are computed during the first register allocator pass and remain
+ unchanged after that.
+ The identity of the real register is not recorded here, because the index
+ of this structure in |rreg_lr_state| is the index number of the register, and
+ the register itself can be extracted from the RRegUniverse (univ). */
+typedef
+ struct {
+ RRegLR* lrs;
+ UInt lrs_size;
+ UInt lrs_used;
+
+ /* Live range corresponding to the currently processed instruction.
+ Points into |lrs| array. */
+ RRegLR *lr_current;
+ UInt lr_current_idx;
+ }
+ RRegLRState;
+
+#define IS_VALID_VREGNO(v) ((v) >= 0 && (v) < n_vregs)
+#define IS_VALID_RREGNO(r) ((r) >= 0 && (r) < n_rregs)
+
+/* Compute the index of the highest and lowest 1 in a ULong, respectively.
+ Results are undefined if the argument is zero. Don't pass it zero :) */
+static inline UInt ULong__maxIndex ( ULong w64 ) {
+ return 63 - __builtin_clzll(w64);
+}
+
+static inline UInt ULong__minIndex ( ULong w64 ) {
+ return __builtin_ctzll(w64);
+}
+
+static inline void enlarge_rreg_lrs(RRegLRState* rreg_lrs)
+{
+ vassert(rreg_lrs->lrs_used == rreg_lrs->lrs_size);
+
+ RRegLR* lr2 = LibVEX_Alloc_inline(2 * rreg_lrs->lrs_used * sizeof(RRegLR));
+ for (UInt l = 0; l < rreg_lrs->lrs_used; l++) {
+ lr2[l] = rreg_lrs->lrs[l];
+ }
+
+ rreg_lrs->lrs = lr2;
+ rreg_lrs->lrs_size = 2 * rreg_lrs->lrs_used;
+}
+
+static inline void print_state(
+ const RegAllocControl* con,
+ const VRegState* vreg_state, UInt n_vregs,
+ const RRegState* rreg_state, UInt n_rregs,
+ const RRegLRState* rreg_lr_state,
+ UShort current_ii)
+{
+ for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
+ const VRegState* vreg = &vreg_state[v_idx];
+
+ if (vreg->live_after == INVALID_INSTRNO) {
+ continue; /* This is a dead vreg. Never comes into live. */
+ }
+ vex_printf("vreg_state[%3u] \t", v_idx);
+
+ UInt written;
+ switch (vreg->disp) {
+ case Unallocated:
+ written = vex_printf("unallocated");
+ break;
+ case Assigned:
+ written = vex_printf("assigned to ");
+ written += con->ppReg(vreg->rreg);
+ break;
+ case Spilled:
+ written = vex_printf("spilled at offset %u", vreg->spill_offset);
+ break;
+ default:
+ vassert(0);
+ }
+
+ for (Int w = 30 - written; w > 0; w--) {
+ vex_printf(" ");
+ }
+
+ if (vreg->live_after > (Short) current_ii) {
+ vex_printf("[not live yet]\n");
+ } else if ((Short) current_ii >= vreg->dead_before) {
+ vex_printf("[now dead]\n");
+ } else {
+ vex_printf("[live]\n");
+ }
+ }
+
+ for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
+ const RRegState* rreg = &rreg_state[r_idx];
+ const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx];
+ vex_printf("rreg_state[%2u] = ", r_idx);
+ UInt written = con->ppReg(con->univ->regs[r_idx]);
+ for (Int w = 10 - written; w > 0; w--) {
+ vex_printf(" ");
+ }
+
+ switch (rreg->disp) {
+ case Free:
+ vex_printf("free\n");
+ break;
+ case Bound:
+ vex_printf("bound for ");
+ con->ppReg(rreg->vreg);
+ vex_printf("\n");
+ break;
+ case Reserved:
+ vex_printf("reserved - live range [%d, %d)\n",
+ rreg_lrs->lr_current->live_after,
+ rreg_lrs->lr_current->dead_before);
+ break;
+ }
+ }
+}
+
+static inline void emit_instr(HInstr* instr, HInstrArray* instrs_out,
+ const RegAllocControl* con, const HChar* why)
+{
+ if (DEBUG_REGALLOC) {
+ vex_printf("** ");
+ con->ppInstr(instr, con->mode64);
+ if (why != NULL) {
+ vex_printf(" (%s)", why);
+ }
+ vex_printf("\n\n");
+ }
+
+ addHInstr(instrs_out, instr);
+}
+
+/* Spills a vreg assigned to some rreg.
+ The vreg is spilled and the rreg is freed.
+ Returns rreg's index. */
+static inline UInt spill_vreg(
+ HReg vreg, UInt v_idx, UInt current_ii, VRegState* vreg_state, UInt n_vregs,
+ RRegState* rreg_state, UInt n_rregs, HInstrArray* instrs_out,
+ const RegAllocControl* con)
+{
+ /* Check some invariants first. */
+ vassert(IS_VALID_VREGNO((v_idx)));
+ vassert(vreg_state[v_idx].disp == Assigned);
+ HReg rreg = vreg_state[v_idx].rreg;
+ UInt r_idx = hregIndex(rreg);
+ vassert(IS_VALID_RREGNO(r_idx));
+ vassert(hregClass(con->univ->regs[r_idx]) == hregClass(vreg));
+ vassert(vreg_state[v_idx].dead_before > (Short) current_ii);
+ vassert(vreg_state[v_idx].reg_class != HRcINVALID);
+
+ /* Generate spill. */
+ HInstr* spill1 = NULL;
+ HInstr* spill2 = NULL;
+ con->genSpill(&spill1, &spill2, rreg, vreg_state[v_idx].spill_offset,
+ con->mode64);
+ vassert(spill1 != NULL || spill2 != NULL); /* cannot be both NULL */
+ if (spill1 != NULL) {
+ emit_instr(spill1, instrs_out, con, "spill1");
+ }
+ if (spill2 != NULL) {
+ emit_instr(spill2, instrs_out, con, "spill2");
+ }
+
+ /* Update register allocator state. */
+ vreg_state[v_idx].disp = Spilled;
+ vreg_state[v_idx].rreg = INVALID_HREG;
+ rreg_state[r_idx].disp = Free;
+ rreg_state[r_idx].vreg = INVALID_HREG;
+
+ return r_idx;
+}
+
+/* Chooses a vreg to be spilled based on various criteria.
+ The vreg must not be from the instruction being processed, that is, it must
+ not be listed in reg_usage->vRegs. */
+static inline HReg find_vreg_to_spill(
+ VRegState* vreg_state, UInt n_vregs,
+ RRegState* rreg_state, UInt n_rregs,
+ const HRegUsage* instr_regusage, HRegClass target_hregclass,
+ const HRegUsage* reg_usage, UInt scan_forward_from, UInt scan_forward_max,
+ const RegAllocControl* con)
+{
+ /* Scan forwards a few instructions to find the most distant mentioned
+ use of a vreg. We can scan in the range of (inclusive):
+ - reg_usage[scan_forward_from]
+ - reg_usage[scan_forward_end], where scan_forward_end
+ = MIN(scan_forward_max, scan_forward_from + FEW_INSTRUCTIONS). */
+# define FEW_INSTRUCTIONS 5
+ UInt scan_forward_end
+ = (scan_forward_max <= scan_forward_from + FEW_INSTRUCTIONS) ?
+ scan_forward_max : scan_forward_from + FEW_INSTRUCTIONS;
+# undef FEW_INSTRUCTIONS
+
+ HReg vreg_found = INVALID_HREG;
+ UInt distance_so_far = 0;
+
+ for (UInt r_idx = con->univ->allocable_start[target_hregclass];
+ r_idx <= con->univ->allocable_end[target_hregclass]; r_idx++) {
+ if (rreg_state[r_idx].disp == Bound) {
+ HReg vreg = rreg_state[r_idx].vreg;
+ if (! HRegUsage__contains(instr_regusage, vreg)) {
+ UInt ii = scan_forward_from;
+ for ( ; ii <= scan_forward_end; ii++) {
+ if (HRegUsage__contains(®_usage[ii], vreg)) {
+ break;
+ }
+ }
+
+ if (ii - scan_forward_from > distance_so_far) {
+ distance_so_far = ii = scan_forward_from;
+ vreg_found = vreg;
+ if (ii + distance_so_far == scan_forward_end) {
+ break; /* We are at the end. Nothing could be better. */
+ }
+ }
+ }
+ }
+ }
+
+ if (hregIsInvalid(vreg_found)) {
+ vex_printf("doRegisterAllocation_v3: cannot find a register in class: ");
+ ppHRegClass(target_hregclass);
+ vex_printf("\n");
+ vpanic("doRegisterAllocation_v3: cannot find a register.");
+ }
+
+ return vreg_found;
+}
+
+/* Find a free rreg of the correct class.
+ Tries to find an rreg whose live range (if any) is as far ahead in the
+ incoming instruction stream as possible. An ideal rreg candidate is
+ a callee-save register because it won't be used for parameter passing
+ around helper function calls. */
+static Bool find_free_rreg(
+ VRegState* vreg_state, UInt n_vregs,
+ RRegState* rreg_state, UInt n_rregs,
+ const RRegLRState* rreg_lr_state,
+ UInt current_ii, HRegClass target_hregclass,
+ Bool reserve_phase, const RegAllocControl* con, UInt* r_idx_found)
+{
+ Bool found = False;
+ UInt distance_so_far = 0; /* running max for |live_after - current_ii| */
+
+ for (UInt r_idx = con->univ->allocable_start[target_hregclass];
+ r_idx <= con->univ->allocable_end[target_hregclass]; r_idx++) {
+ const RRegState* rreg = &rreg_state[r_idx];
+ const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx];
+ if (rreg->disp == Free) {
+ if (rreg_lrs->lrs_used == 0) {
+ found = True;
+ *r_idx_found = r_idx;
+ break; /* There could be nothing better, so break now. */
+ } else {
+ const RRegLR* lr = rreg_lrs->lr_current;
+ if (lr->live_after > (Short) current_ii) {
+ /* Not live, yet. */
+ if ((lr->live_after - (Short) current_ii) > distance_so_far) {
+ distance_so_far = lr->live_after - (Short) current_ii;
+ found = True;
+ *r_idx_found = r_idx;
+ }
+ } else if ((Short) current_ii >= lr->dead_before) {
+ /* Now dead. Effectively as if there is no LR now. */
+ found = True;
+ *r_idx_found = r_idx;
+ break; /* There could be nothing better, so break now. */
+ } else {
+ /* Going live for this instruction. This could happen only when
+ rregs are being reserved en mass, for example before
+ a helper call. */
+ vassert(reserve_phase);
+ }
+ }
+ }
+ }
+
+ return found;
+}
+
+/* A target-independent register allocator (v3). Requires various functions
+ which it uses to deal abstractly with instructions and registers, since it
+ cannot have any target-specific knowledge.
+
+ Returns a new list of instructions, which, as a result of the behaviour of
+ mapRegs, will be in-place modifications of the original instructions.
+
+ Requires that the incoming code has been generated using vreg numbers
+ 0, 1 .. n_vregs-1. Appearance of a vreg outside that range is a checked
+ run-time error.
+
+ Takes unallocated instructions and returns allocated instructions.
+*/
+HInstrArray* doRegisterAllocation_v3(
+ /* Incoming virtual-registerised code. */
+ HInstrArray* instrs_in,
+
+ /* Register allocator controls to use. */
+ const RegAllocControl* con
+)
+{
+ vassert((con->guest_sizeB % LibVEX_GUEST_STATE_ALIGN) == 0);
+
+ /* The main register allocator state. */
+ UInt n_vregs = instrs_in->n_vregs;
+ VRegState* vreg_state = NULL;
+ if (n_vregs > 0) {
+ vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(VRegState));
+ }
+
+ /* If this is not so, the universe we have is nonsensical. */
+ UInt n_rregs = con->univ->allocable;
+ vassert(n_rregs > 0);
+ STATIC_ASSERT(N_RREGUNIVERSE_REGS == 64);
+
+ /* Redundant rreg -> vreg state. */
+ RRegState* rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState));
+
+ /* Info on rreg live ranges. */
+ RRegLRState* rreg_lr_state
+ = LibVEX_Alloc_inline(n_rregs * sizeof(RRegLRState));
+
+ /* Info on register usage in the incoming instruction array. Computed once
+ and remains unchanged, more or less; updated sometimes by the
+ direct-reload optimisation. */
+ HRegUsage* reg_usage
+ = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used);
+
+ /* The live range numbers are signed shorts, and so limiting the
+ number of instructions to 15000 comfortably guards against them
+ overflowing 32k. */
+ vassert(instrs_in->arr_used <= 15000);
+
+ /* The output array of instructions. */
+ HInstrArray* instrs_out = newHInstrArray();
+
+
+# define OFFENDING_VREG(_v_idx, _instr, _mode) \
+ do { \
+ vex_printf("\n\nOffending vreg = %u\n", (_v_idx)); \
+ vex_printf("\nOffending instruction = "); \
+ con->ppInstr((_instr), con->mode64); \
+ vex_printf("\n"); \
+ vpanic("doRegisterAllocation_v3: first event for vreg is "#_mode \
+ " (should be Write)"); \
+ } while (0)
+
+# define OFFENDING_RREG(_r_idx, _instr, _mode) \
+ do { \
+ vex_printf("\n\nOffending rreg = "); \
+ con->ppReg(con->univ->regs[(_r_idx)]); \
+ vex_printf("\nOffending instruction = "); \
+ con->ppInstr((_instr), con->mode64); \
+ vex_printf("\n"); \
+ vpanic("doRegisterAllocation_v3: first event for rreg is "#_mode \
+ " (should be Write)"); \
+ } while (0)
+
+
+/* Finds an rreg of the correct class.
+ If a free rreg is not found, then spills a vreg not used by the current
+ instruction and makes free the corresponding rreg. */
+# define FIND_OR_MAKE_FREE_RREG(_ii, _v_idx, _reg_class, _reserve_phase) \
+ ({ \
+ UInt _r_free_idx = -1; \
+ Bool free_rreg_found = find_free_rreg( \
+ vreg_state, n_vregs, rreg_state, n_rregs, rreg_lr_state, \
+ (_ii), (_reg_class), (_reserve_phase), con, &_r_free_idx); \
+ if (!free_rreg_found) { \
+ HReg vreg_to_spill = find_vreg_to_spill( \
+ vreg_state, n_vregs, rreg_state, n_rregs, \
+ ®_usage[(_ii)], (_reg_class), \
+ reg_usage, (_ii) + 1, \
+ instrs_in->arr_used - 1, con); \
+ _r_free_idx = spill_vreg(vreg_to_spill, hregIndex(vreg_to_spill), \
+ (_ii), vreg_state, n_vregs, \
+ rreg_state, n_rregs, \
+ instrs_out, con); \
+ } \
+ \
+ vassert(IS_VALID_RREGNO(_r_free_idx)); \
+ \
+ _r_free_idx; \
+ })
+
+
+ /* --- Stage 0. Initialize the state. --- */
+ for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
+ vreg_state[v_idx].live_after = INVALID_INSTRNO;
+ vreg_state[v_idx].dead_before = INVALID_INSTRNO;
+ vreg_state[v_idx].reg_class = HRcINVALID;
+ vreg_state[v_idx].disp = Unallocated;
+ vreg_state[v_idx].rreg = INVALID_HREG;
+ vreg_state[v_idx].spill_offset = 0;
+ }
+
+ for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
+ rreg_state[r_idx].disp = Free;
+ rreg_state[r_idx].vreg = INVALID_HREG;
+ }
+
+ for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
+ RRegLRState* rreg_lrs = &rreg_lr_state[r_idx];
+ rreg_lrs->lrs_size = 4;
+ rreg_lrs->lrs = LibVEX_Alloc_inline(rreg_lrs->lrs_size
+ * sizeof(RRegLR));
+ rreg_lrs->lrs_used = 0;
+ rreg_lrs->lr_current = &rreg_lrs->lrs[0];
+ rreg_lrs->lr_current_idx = 0;
+ }
+
+ /* --- Stage 1. Scan the incoming instructions. --- */
+ for (UShort ii = 0; ii < instrs_in->arr_used; ii++) {
+ const HInstr* instr = instrs_in->arr[ii];
+
+ con->getRegUsage(®_usage[ii], instr, con->mode64);
+
+ if (0) {
+ vex_printf("\n%u stage 1: ", ii);
+ con->ppInstr(instr, con->mode64);
+ vex_printf("\n");
+ ppHRegUsage(con->univ, ®_usage[ii]);
+ }
+
+ /* Process virtual registers mentioned in the instruction. */
+ for (UInt j = 0; j < reg_usage[ii].n_vRegs; j++) {
+ HReg vreg = reg_usage[ii].vRegs[j];
+ vassert(hregIsVirtual(vreg));
+
+ UInt v_idx = hregIndex(vreg);
+ if (!IS_VALID_VREGNO(v_idx)) {
+ vex_printf("\n");
+ con->ppInstr(instr, con->mode64);
+ vex_printf("\n");
+ vex_printf("vreg %u (n_vregs %u)\n", v_idx, n_vregs);
+ vpanic("doRegisterAllocation_v3: out-of-range vreg");
+ }
+
+ /* Note the register class. */
+ if (vreg_state[v_idx].reg_class == HRcINVALID) {
+ /* First mention of this vreg. */
+ vreg_state[v_idx].reg_class = hregClass(vreg);
+ } else {
+ /* Seen it before, so check for consistency. */
+ vassert(vreg_state[v_idx].reg_class == hregClass(vreg));
+ }
+
+ /* Consider live ranges. */
+ switch (reg_usage[ii].vMode[j]) {
+ case HRmRead:
+ if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
+ OFFENDING_VREG(v_idx, instr, "Read");
+ }
+ vreg_state[v_idx].dead_before = toShort(ii + 1);
+ break;
+ case HRmWrite:
+ if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
+ vreg_state[v_idx].live_after = toShort(ii);
+ }
+ vreg_state[v_idx].dead_before = toShort(ii + 1);
+ break;
+ case HRmModify:
+ if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
+ OFFENDING_VREG(v_idx, instr, "Modify");
+ }
+ vreg_state[v_idx].dead_before = toShort(ii + 1);
+ break;
+ default:
+ vassert(0);
+ }
+ }
+
+ /* Process real registers mentioned in the instruction. */
+ const ULong rRead = reg_usage[ii].rRead;
+ const ULong rWritten = reg_usage[ii].rWritten;
+ const ULong rMentioned = rRead | rWritten;
+
+ if (rMentioned != 0) {
+ UInt rReg_minIndex = ULong__minIndex(rMentioned);
+ UInt rReg_maxIndex = ULong__maxIndex(rMentioned);
+ /* Don't bother to look at registers which are not available
+ to the allocator such as the stack or guest state pointers. These
+ are unavailable to the register allocator and so we never visit
+ them. We asserted above that n_rregs > 0, so (n_rregs - 1) is
+ safe. */
+ if (rReg_maxIndex >= n_rregs) {
+ rReg_maxIndex = n_rregs - 1;
+ }
+
+ for (UInt r_idx = rReg_minIndex; r_idx <= rReg_maxIndex; r_idx++) {
+ const ULong jMask = 1ULL << r_idx;
+
+ if (LIKELY((rMentioned & jMask) == 0)) {
+ continue;
+ }
+
+ RRegLRState* rreg_lrs = &rreg_lr_state[r_idx];
+ const Bool isR = (rRead & jMask) != 0;
+ const Bool isW = (rWritten & jMask) != 0;
+
+ if (isW && !isR) {
+ if (rreg_lrs->lrs_used == rreg_lrs->lrs_size) {
+ enlarge_rreg_lrs(rreg_lrs);
+ }
+
+ rreg_lrs->lrs[rreg_lrs->lrs_used].live_after = toShort(ii);
+ rreg_lrs->lrs[rreg_lrs->lrs_used].dead_before = toShort(ii + 1);
+ rreg_lrs->lrs_used += 1;
+ } else if (!isW && isR) {
+ if ((rreg_lrs->lrs_used == 0)
+ || (rreg_lrs->lrs[rreg_lrs->lrs_used - 1].live_after
+ == INVALID_INSTRNO)) {
+ OFFENDING_RREG(r_idx, instr, "Read");
+ }
+ rreg_lrs->lrs[rreg_lrs->lrs_used - 1].dead_before
+ = toShort(ii + 1);
+ } else {
+ vassert(isR && isW);
+ if ((rreg_lrs->lrs_used == 0)
+ || (rreg_lrs->lrs[rreg_lrs->lrs_used - 1].live_after
+ == INVALID_INSTRNO)) {
+ OFFENDING_RREG(r_idx, instr, "Modify");
+ }
+ rreg_lrs->lrs[rreg_lrs->lrs_used - 1].dead_before
+ = toShort(ii + 1);
+ }
+ }
+ }
+ }
+
+ if (DEBUG_REGALLOC) {
+ for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
+ vex_printf("vreg %3u: [%3d, %3d)\n",
+ v_idx, vreg_state[v_idx].live_after,
+ vreg_state[v_idx].dead_before);
+ }
+
+ for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
+ vex_printf("rreg %2u (", r_idx);
+ UInt written = con->ppReg(con->univ->regs[r_idx]);
+ vex_printf("):");
+ for (Int t = 15 - written; t > 0; t--) {
+ vex_printf(" ");
+ }
+
+ const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx];
+ for (UInt l = 0; l < rreg_lrs->lrs_used; l++) {
+ vex_printf("[%3d, %3d) ",
+ rreg_lrs->lrs[l].live_after, rreg_lrs->lrs[l].dead_before);
+ }
+ vex_printf("\n");
+ }
+ }
+
+ /* --- Stage 2. Allocate spill slots. --- */
+
+ /* Each spill slot is 8 bytes long. For vregs which take more than 64 bits
+ to spill (for example classes Flt64 and Vec128), we have to allocate two
+ consecutive spill slots. For 256 bit registers (class Vec256), we have to
+ allocate four consecutive spill slots.
+
+ For Vec128-class on PowerPC, the spill slot's actual address must be
+ 16-byte aligned. Since the spill slot's address is computed as an offset
+ from the guest state pointer, and since the user of the generated code
+ must set that pointer to a 32-byte aligned value, we have the residual
+ obligation here of choosing a 16-byte aligned spill slot offset for
+ Vec128-class values. Since each spill slot is 8 bytes long, that means for
+ Vec128-class values we must allocate a spill slot number which is
+ zero mod 2.
+
+ Similarly, for Vec256 class on amd64, find a spill slot number which is
+ zero mod 4. This guarantees it will be 32-byte aligned, which isn't
+ actually necessary on amd64 (we use movUpd etc to spill), but seems like
+ a good practice.
+
+ Do a rank-based allocation of vregs to spill slot numbers. We put as few
+ values as possible in spill slots, but nevertheless need to have a spill
+ slot available for all vregs, just in case. */
+
+# define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8)
+ STATIC_ASSERT((N_SPILL64S % 2) == 0);
+ STATIC_ASSERT((LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN) == 0);
+
+ Short ss_busy_until_before[N_SPILL64S];
+ vex_bzero(&ss_busy_until_before, sizeof(ss_busy_until_before));
+
+ for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
+ /* True iff this vreg is unused. In which case we also expect that the
+ reg_class field for it has not been set. */
+ if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
+ vassert(vreg_state[v_idx].reg_class == HRcINVALID);
+ continue;
+ }
+
+ /* The spill slots are 64 bits in size. As per the comment on definition
+ of HRegClass in host_generic_regs.h, that means, to spill a vreg of
+ class Flt64 or Vec128, we'll need to find two adjacent spill slots to
+ use. For Vec256, we'll need to find four adjacent slots to use. Note,
+ this logic needs to be kept in sync with the size info on the
+ definition of HRegClass. */
+ UInt ss_no;
+ switch (vreg_state[v_idx].reg_class) {
+ case HRcFlt64:
+ case HRcVec128:
+ /* Find two adjacent free slots which provide up to 128 bits to
+ spill the vreg. Since we are trying to find an even:odd pair,
+ move along in steps of 2 (slots). */
+ for (ss_no = 0; ss_no < N_SPILL64S - 1; ss_no += 2)
+ if (ss_busy_until_before[ss_no + 0] <= vreg_state[v_idx].live_after
+ && ss_busy_until_before[ss_no + 1] <= vreg_state[v_idx].live_after)
+ break;
+ if (ss_no >= N_SPILL64S - 1) {
+ vpanic("N_SPILL64S is too low in VEX. Increase and recompile.");
+ }
+ ss_busy_until_before[ss_no + 0] = vreg_state[v_idx].dead_before;
+ ss_busy_until_before[ss_no + 1] = vreg_state[v_idx].dead_before;
+ break;
+ default:
+ /* The ordinary case -- just find a single lowest-numbered spill
+ slot which is available at the start point of this interval,
+ and assign the interval to it. */
+ for (ss_no = 0; ss_no < N_SPILL64S; ss_no++) {
+ if (ss_busy_until_before[ss_no] <= vreg_state[v_idx].live_after)
+ break;
+ }
+ if (ss_no == N_SPILL64S) {
+ vpanic("N_SPILL64S is too low in VEX. Increase and recompile.");
+ }
+ ss_busy_until_before[ss_no] = vreg_state[v_idx].dead_before;
+ break;
+ }
+
+ /* This reflects VEX's hard-wired knowledge of the guest state layout:
+ the guest state itself, then two equal sized areas following it for two
+ sets of shadow state, and then the spill area. */
+ vreg_state[v_idx].spill_offset
+ = toShort(con->guest_sizeB * 3 + ss_no * 8);
+
+ /* Independent check that we've made a sane choice of the slot. */
+ switch (vreg_state[v_idx].reg_class) {
+ case HRcVec128: case HRcFlt64:
+ vassert((vreg_state[v_idx].spill_offset % 16) == 0);
+ break;
+ default:
+ vassert((vreg_state[v_idx].spill_offset % 8) == 0);
+ break;
+ }
+ }
+
+ if (0) {
+ vex_printf("\n\n");
+ for (UInt v_idx = 0; v_idx < n_vregs; v_idx++)
+ vex_printf("vreg %3u --> spill offset %u\n",
+ v_idx, vreg_state[v_idx].spill_offset);
+ }
+
+
+ /* --- State 3. Process instructions. --- */
+ for (UShort ii = 0; ii < instrs_in->arr_used; ii++) {
+ HInstr* instr = instrs_in->arr[ii];
+
+ if (DEBUG_REGALLOC) {
+ vex_printf("\n====----====---- Instr %d ----====----====\n", ii);
+ vex_printf("---- ");
+ con->ppInstr(instrs_in->arr[ii], con->mode64);
+ vex_printf("\n\nInitial state:\n");
+ print_state(con, vreg_state, n_vregs, rreg_state, n_rregs,
+ rreg_lr_state, ii);
+ vex_printf("\n");
+ }
+
+ /* ------------ Sanity checks ------------ */
+
+ /* Sanity checks are relatively expensive. So they are done only once
+ every 17 instructions, and just before the last instruction. */
+ Bool do_sanity_check
+ = toBool(
+ SANITY_CHECKS_EVERY_INSTR
+ || ii == instrs_in->arr_used - 1
+ || (ii > 0 && (ii % 17) == 0)
+ );
+
+ if (do_sanity_check) {
+ /* Sanity check: the vreg_state and rreg_state mutually-redundant
+ mappings are consistent. If vreg_state[v].rreg points at some
+ rreg_state entry then that rreg_state entry should point back at
+ vreg_state[v]. */
+ for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
+ if (vreg_state[v_idx].disp == Assigned) {
+ vassert(!hregIsVirtual(vreg_state[v_idx].rreg));
+
+ UInt r_idx = hregIndex(vreg_state[v_idx].rreg);
+ vassert(IS_VALID_RREGNO(r_idx));
+ vassert(rreg_state[r_idx].disp == Bound);
+ vassert(hregIndex(rreg_state[r_idx].vreg) == v_idx);
+
+ vassert(hregClass(vreg_state[v_idx].rreg)
+ == hregClass(con->univ->regs[r_idx]));
+ }
+ }
+
+ for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
+ if (rreg_state[r_idx].disp == Bound) {
+ vassert(hregIsVirtual(rreg_state[r_idx].vreg));
+
+ UInt v_idx = hregIndex(rreg_state[r_idx].vreg);
+ vassert(IS_VALID_VREGNO(v_idx));
+ vassert(vreg_state[v_idx].disp == Assigned);
+ vassert(hregIndex(vreg_state[v_idx].rreg) == r_idx);
+ }
+ }
+
+ /* Sanity check: if rreg has been marked as Reserved, there must be
+ a corresponding hard live range for it. */
+ for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
+ if (rreg_state[r_idx].disp == Reserved) {
+ const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx];
+ vassert(rreg_lrs->lrs_used > 0);
+ vassert(rreg_lrs->lr_current_idx < rreg_lrs->lrs_used);
+ vassert(rreg_lrs->lr_current->live_after <= (Short) ii);
+ vassert((Short) ii < rreg_lrs->lr_current->dead_before);
+ }
+ }
+ }
+
+
+ /* --- MOV coalescing --- */
+ /* Optimise register coalescing:
+ MOV v <-> v coalescing (done here).
+ MOV v <-> r coalescing (TODO: not yet). */
+ /* If doing a reg-reg move between two vregs, and the src's live
+ range ends here and the dst's live range starts here, bind the dst
+ to the src's rreg, and that's all. */
+ HReg vregS = INVALID_HREG;
+ HReg vregD = INVALID_HREG;
+ if (con->isMove(instr, &vregS, &vregD)) {
+ if (hregIsVirtual(vregS) && hregIsVirtual(vregD)) {
+ /* Check that |isMove| is not telling us a bunch of lies ... */
+ vassert(hregClass(vregS) == hregClass(vregD));
+ UInt vs_idx = hregIndex(vregS);
+ UInt vd_idx = hregIndex(vregD);
+ vassert(IS_VALID_VREGNO(vs_idx));
+ vassert(IS_VALID_VREGNO(vd_idx));
+
+ if ((vreg_state[vs_idx].dead_before == ii + 1)
+ && (vreg_state[vd_idx].live_after == ii)
+ && (vreg_state[vs_idx].disp == Assigned)) {
+
+ /* Live ranges are adjacent and source vreg is bound.
+ Finally we can do the coalescing. */
+ HReg rreg = vreg_state[vs_idx].rreg;
+ vreg_state[vd_idx].disp = Assigned;
+ vreg_state[vd_idx].rreg = rreg;
+ vreg_state[vs_idx].disp = Unallocated;
+ vreg_state[vs_idx].rreg = INVALID_HREG;
+
+ UInt r_idx = hregIndex(rreg);
+ vassert(rreg_state[r_idx].disp == Bound);
+ rreg_state[r_idx].vreg = vregD;
+
+ if (DEBUG_REGALLOC) {
+ vex_printf("coalesced: ");
+ con->ppReg(vregS);
+ vex_printf(" -> ");
+ con->ppReg(vregD);
+ vex_printf("\n\n");
+ }
+
+ /* In rare cases it can happen that vregD's live range ends
+ here. Check and eventually free the vreg and rreg.
+ This effectively means that either the translated program
+ contained dead code (but VEX iropt passes are pretty good
+ at eliminating it) or the VEX backend generated dead code. */
+ if (vreg_state[vd_idx].dead_before <= (Short) ii + 1) {
+ vreg_state[vd_idx].disp = Unallocated;
+ vreg_state[vd_idx].rreg = INVALID_HREG;
+ rreg_state[r_idx].disp = Free;
+ rreg_state[r_idx].vreg = INVALID_HREG;
+ }
+
+ /* Move on to the next instruction. We skip the post-instruction
+ stuff because all required house-keeping was done here. */
+ continue;
+ }
+ }
+ }
+
+
+ /* --- Reserve and free rregs if needed. --- */
+ /* If the rreg enters its hard live range and is not free:
+ 1. If the corresponding vreg is not used by the instruction, spill it.
+ 2. If the corresponding vreg is used by the instruction, then:
+ 2a. If there are no free rregs, spill a vreg not used by this
+ instruction.
+ 2b. Move the corresponding vreg to a free rreg. This is better than
+ spilling it and immediatelly reloading it.
+ */
+ const ULong rRead = reg_usage[ii].rRead;
+ const ULong rWritten = reg_usage[ii].rWritten;
+ const ULong rMentioned = rRead | rWritten;
+
+ if (rMentioned != 0) {
+ UInt rReg_minIndex = ULong__minIndex(rMentioned);
+ UInt rReg_maxIndex = ULong__maxIndex(rMentioned);
+ if (rReg_maxIndex >= n_rregs) {
+ rReg_maxIndex = n_rregs - 1;
+ }
+
+ for (UInt r_idx = rReg_minIndex; r_idx <= rReg_maxIndex; r_idx++) {
+ const ULong jMask = 1ULL << r_idx;
+
+ if (LIKELY((rMentioned & jMask) == 0)) {
+ continue;
+ }
+
+ RRegState* rreg = &rreg_state[r_idx];
+ const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx];
+ if (LIKELY(rreg_lrs->lrs_used == 0)) {
+ continue;
+ }
+ if (rreg->disp == Reserved) {
+ continue;
+ }
+
+ if ((rreg_lrs->lr_current->live_after <= (Short) ii)
+ && ((Short) ii < rreg_lrs->lr_current->dead_before)) {
+
+ if (rreg->disp == Bound) {
+ /* Yes, there is an associated vreg. We need to deal with
+ it now somehow. */
+ HReg vreg = rreg->vreg;
+ UInt v_idx = hregIndex(vreg);
+
+ if (! HRegUsage__contains(®_usage[ii], vreg)) {
+ /* Spill the vreg. It is not used by this instruction. */
+ spill_vreg(vreg, v_idx, ii, vreg_state, n_vregs,
+ rreg_state, n_rregs, instrs_out, con);
+ } else {
+ /* Find or make a free rreg where to move this vreg to. */
+ UInt r_free_idx = FIND_OR_MAKE_FREE_RREG(
+ ii, v_idx, vreg_state[v_idx].reg_class, True);
+
+ /* Generate "move" between real registers. */
+ HInstr* move = con->genMove(con->univ->regs[r_idx],
+ con->univ->regs[r_free_idx], con->mode64);
+ vassert(move != NULL);
+ emit_instr(move, instrs_out, con, "move");
+
+ /* Update the register allocator state. */
+ vassert(vreg_state[v_idx].disp == Assigned);
+ vreg_state[v_idx].rreg = con->univ->regs[r_free_idx];
+ rreg_state[r_free_idx].disp = Bound;
+ rreg_state[r_free_idx].vreg = vreg;
+ rreg->disp = Free;
+ rreg->vreg = INVALID_HREG;
+ }
+ }
+
+ /* Finally claim the rreg as reserved. */
+ rreg->disp = Reserved;
+
+ if (DEBUG_REGALLOC) {
+ vex_printf("rreg has been reserved: ");
+ con->ppReg(con->univ->regs[r_idx]);
+ vex_printf("\n\n");
+ }
+ }
+ }
+ }
+
+
+ /* --- Direct reload optimisation. --- */
+ /* If the instruction reads exactly one vreg which is currently spilled,
+ and this is the last use of that vreg, see if we can convert
+ the instruction into one that reads directly from the spill slot.
+ This is clearly only possible for x86 and amd64 targets, since ppc and
+ arm are load-store architectures. If successful, replace
+ instrs_in->arr[ii] with this new instruction, and recompute
+ its reg_usage, so that the change is invisible to the standard-case
+ handling that follows. */
+ if ((con->directReload != NULL) && (reg_usage[ii].n_vRegs <= 2)) {
+ Bool debug_direct_reload = False;
+ Bool nreads = 0;
+ HReg vreg_found = INVALID_HREG;
+ Short spill_offset = 0;
+
+ for (UInt j = 0; j < reg_usage[ii].n_vRegs; j++) {
+ HReg vreg = reg_usage[ii].vRegs[j];
+ vassert(hregIsVirtual(vreg));
+
+ if (reg_usage[ii].vMode[j] == HRmRead) {
+ nreads++;
+ UInt v_idx = hregIndex(vreg);
+ vassert(IS_VALID_VREGNO(v_idx));
+ if (vreg_state[v_idx].disp == Spilled) {
+ /* Is this its last use? */
+ vassert(vreg_state[v_idx].dead_before >= (Short) (ii + 1));
+ if ((vreg_state[v_idx].dead_before == (Short) (ii + 1))
+ && hregIsInvalid(vreg_found)) {
+ vreg_found = vreg;
+ spill_offset = vreg_state[v_idx].spill_offset;
+ }
+ }
+ }
+ }
+
+ if (!hregIsInvalid(vreg_found) && (nreads == 1)) {
+ if (reg_usage[ii].n_vRegs == 2) {
+ vassert(! sameHReg(reg_usage[ii].vRegs[0],
+ reg_usage[ii].vRegs[1]));
+ }
+
+ HInstr* reloaded = con->directReload(instrs_in->arr[ii],
+ vreg_found, spill_offset);
+ if (debug_direct_reload && (reloaded != NULL)) {
+ vex_printf("[%3d] ", spill_offset);
+ ppHReg(vreg_found);
+ vex_printf(": ");
+ con->ppInstr(instr, con->mode64);
+ }
+ if (reloaded != NULL) {
+ /* Update info about the instruction, so it looks as if it had
+ been in this form all along. */
+ instr = reloaded;
+ instrs_in->arr[ii] = reloaded;
+ con->getRegUsage(®_usage[ii], instr, con->mode64);
+ if (debug_direct_reload) {
+ vex_printf(" --> ");
+ con->ppInstr(reloaded, con->mode64);
+ }
+ }
+
+ if (debug_direct_reload && (reloaded != NULL)) {
+ vex_printf("\n");
+ }
+ }
+ }
+
+
+ /* The vreg -> rreg map constructed and then applied to each
+ instruction. */
+ HRegRemap remap;
+ initHRegRemap(&remap);
+
+ /* --- Allocate vregs used by the instruction. --- */
+ /* Vregs used by the instruction can be in the following states:
+ - Unallocated: vreg is entering its live range. Find a free rreg.
+ - Assigned: we do nothing; rreg has been allocated previously.
+ - Spilled: Find a free rreg and reload vreg into it.
+ Naturally, finding a free rreg may involve spilling a vreg not used by
+ the instruction. */
+ for (UInt j = 0; j < reg_usage[ii].n_vRegs; j++) {
+ HReg vreg = reg_usage[ii].vRegs[j];
+ vassert(hregIsVirtual(vreg));
+
+ if (0) {
+ vex_printf("considering "); con->ppReg(vreg); vex_printf("\n");
+ }
+
+ UInt v_idx = hregIndex(vreg);
+ vassert(IS_VALID_VREGNO(v_idx));
+ HReg rreg = vreg_state[v_idx].rreg;
+ if (vreg_state[v_idx].disp == Assigned) {
+ UInt r_idx = hregIndex(rreg);
+ vassert(rreg_state[r_idx].disp == Bound);
+ addToHRegRemap(&remap, vreg, rreg);
+ } else {
+ vassert(hregIsInvalid(rreg));
+
+ /* Find or make a free rreg of the correct class. */
+ UInt r_idx = FIND_OR_MAKE_FREE_RREG(
+ ii, v_idx, vreg_state[v_idx].reg_class, False);
+ rreg = con->univ->regs[r_idx];
+
+ /* Generate reload only if the vreg is spilled and is about to being
+ read or modified. If it is merely written than reloading it first
+ would be pointless. */
+ if ((vreg_state[v_idx].disp == Spilled)
+ && (reg_usage[ii].vMode[j] != HRmWrite)) {
+
+ HInstr* reload1 = NULL;
+ HInstr* reload2 = NULL;
+ con->genReload(&reload1, &reload2, rreg,
+ vreg_state[v_idx].spill_offset, con->mode64);
+ vassert(reload1 != NULL || reload2 != NULL);
+ if (reload1 != NULL) {
+ emit_instr(reload1, instrs_out, con, "reload1");
+ }
+ if (reload2 != NULL) {
+ emit_instr(reload2, instrs_out, con, "reload2");
+ }
+ }
+
+ rreg_state[r_idx].disp = Bound;
+ rreg_state[r_idx].vreg = vreg;
+ vreg_state[v_idx].disp = Assigned;
+ vreg_state[v_idx].rreg = rreg;
+ addToHRegRemap(&remap, vreg, rreg);
+ }
+ }
+
+ con->mapRegs(&remap, instr, con->mode64);
+ emit_instr(instr, instrs_out, con, NULL);
+
+ if (DEBUG_REGALLOC) {
+ vex_printf("After dealing with current instruction:\n");
+ print_state(con, vreg_state, n_vregs, rreg_state, n_rregs,
+ rreg_lr_state, ii);
+ vex_printf("\n");
+ }
+
+ /* ------ Post-instruction actions. ------ */
+ /* Free rregs which:
+ - Have been reserved and whose hard live range ended.
+ - Have been bound to vregs whose live range ended. */
+ for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
+ RRegState* rreg = &rreg_state[r_idx];
+ RRegLRState* rreg_lrs = &rreg_lr_state[r_idx];
+ switch (rreg->disp) {
+ case Free:
+ break;
+ case Reserved:
+ if (rreg_lrs->lrs_used > 0) {
+ /* Consider "dead before" the next instruction. */
+ if (rreg_lrs->lr_current->dead_before <= (Short) ii + 1) {
+ rreg_state[r_idx].disp = Free;
+ rreg_state[r_idx].vreg = INVALID_HREG;
+ if (rreg_lrs->lr_current_idx < rreg_lrs->lrs_used - 1) {
+ rreg_lrs->lr_current_idx += 1;
+ rreg_lrs->lr_current
+ = &rreg_lrs->lrs[rreg_lrs->lr_current_idx];
+ }
+ }
+ }
+ break;
+ case Bound: {
+ UInt v_idx = hregIndex(rreg->vreg);
+ /* Consider "dead before" the next instruction. */
+ if (vreg_state[v_idx].dead_before <= (Short) ii + 1) {
+ vreg_state[v_idx].disp = Unallocated;
+ vreg_state[v_idx].rreg = INVALID_HREG;
+ rreg_state[r_idx].disp = Free;
+ rreg_state[r_idx].vreg = INVALID_HREG;
+ }
+ break;
+ }
+ default:
+ vassert(0);
+ }
+ }
+ }
+
+ return instrs_out;
+}
+
+/*----------------------------------------------------------------------------*/
+/*--- host_generic_reg_alloc3.c ---*/
+/*----------------------------------------------------------------------------*/
}
/* Generic printing for registers. */
-void ppHReg ( HReg r )
+UInt ppHReg ( HReg r )
{
if (hregIsInvalid(r)) {
- vex_printf("HReg_INVALID");
- return;
+ return vex_printf("HReg_INVALID");
}
const Bool isV = hregIsVirtual(r);
const HChar* maybe_v = isV ? "v" : "";
always zero for virtual registers, so that's pointless -- hence
show the index number instead. */
switch (hregClass(r)) {
- case HRcInt32: vex_printf("%%%sr%u", maybe_v, regNN); return;
- case HRcInt64: vex_printf("%%%sR%u", maybe_v, regNN); return;
- case HRcFlt32: vex_printf("%%%sF%u", maybe_v, regNN); return;
- case HRcFlt64: vex_printf("%%%sD%u", maybe_v, regNN); return;
- case HRcVec64: vex_printf("%%%sv%u", maybe_v, regNN); return;
- case HRcVec128: vex_printf("%%%sV%u", maybe_v, regNN); return;
+ case HRcInt32: return vex_printf("%%%sr%u", maybe_v, regNN);
+ case HRcInt64: return vex_printf("%%%sR%u", maybe_v, regNN);
+ case HRcFlt32: return vex_printf("%%%sF%u", maybe_v, regNN);
+ case HRcFlt64: return vex_printf("%%%sD%u", maybe_v, regNN);
+ case HRcVec64: return vex_printf("%%%sv%u", maybe_v, regNN);
+ case HRcVec128: return vex_printf("%%%sV%u", maybe_v, regNN);
default: vpanic("ppHReg");
}
}
for (UInt i = 0; i < N_RREGUNIVERSE_REGS; i++) {
univ->regs[i] = INVALID_HREG;
}
+
+ for (UInt i = 0; i <= HrcLAST; i++) {
+ univ->allocable_start[i] = N_RREGUNIVERSE_REGS;
+ univ->allocable_end[i] = N_RREGUNIVERSE_REGS;
+ }
}
void RRegUniverse__check_is_sane ( const RRegUniverse* univ )
HReg reg = univ->regs[i];
vassert(hregIsInvalid(reg));
}
+
+ /* Determine register classes used and if they form contiguous range. */
+ Bool regclass_used[HrcLAST + 1];
+ for (UInt i = 0; i <= HrcLAST; i++) {
+ regclass_used[i] = False;
+ }
+
+ for (UInt i = 0; i < univ->allocable; i++) {
+ HReg reg = univ->regs[i];
+ HRegClass regclass = hregClass(reg);
+ if (!regclass_used[regclass]) {
+ regclass_used[regclass] = True;
+ }
+ }
+
+ UInt regs_visited = 0;
+ for (UInt i = 0; i <= HrcLAST; i++) {
+ if (regclass_used[i]) {
+ for (UInt j = univ->allocable_start[i];
+ j <= univ->allocable_end[i]; j++) {
+ vassert(hregClass(univ->regs[j]) == i);
+ regs_visited += 1;
+ }
+ }
+ }
+
+ vassert(regs_visited == univ->allocable);
}
available on any specific host. For example on x86, the available
classes are: Int32, Flt64, Vec128 only.
- IMPORTANT NOTE: host_generic_reg_alloc2.c needs how much space is
+ IMPORTANT NOTE: host_generic_reg_alloc*.c needs to know how much space is
needed to spill each class of register. It allocates the following
amount of space:
HRcVec128 128 bits
If you add another regclass, you must remember to update
- host_generic_reg_alloc2.c accordingly.
+ host_generic_reg_alloc*.c and RRegUniverse accordingly.
When adding entries to enum HRegClass, do not use any value > 14 or < 1.
*/
HRcFlt32=5, /* 32-bit float */
HRcFlt64=6, /* 64-bit float */
HRcVec64=7, /* 64-bit SIMD */
- HRcVec128=8 /* 128-bit SIMD */
+ HRcVec128=8, /* 128-bit SIMD */
+ HrcLAST=HRcVec128
}
HRegClass;
extern void ppHRegClass ( HRegClass );
-/* Print an HReg in a generic (non-target-specific) way. */
-extern void ppHReg ( HReg );
+/* Print an HReg in a generic (non-target-specific) way.
+ Returns number of HChar's written. */
+extern UInt ppHReg ( HReg );
/* Construct. The goal here is that compiler can fold this down to a
constant in the case where the four arguments are constants, which
static inline HRegClass hregClass ( HReg r )
{
HRegClass rc = (HRegClass)((r.u32 >> 27) & 0xF);
- vassert(rc >= HRcInt32 && rc <= HRcVec128);
+ vassert(rc >= HRcInt32 && rc <= HrcLAST);
return rc;
}
index here, since this is the only place where we map index
numbers to actual registers. */
HReg regs[N_RREGUNIVERSE_REGS];
+
+ /* Ranges for groups of allocable registers. Used to quickly address only
+ a group of allocable registers belonging to the same register class.
+ Indexes into |allocable_{start,end}| are HRcClass entries, such as
+ HRcInt64. Values in |allocable_{start,end}| give a valid range into
+ |regs| where registers corresponding to the given register class are
+ found.
+
+ For example, let's say allocable_start[HRcInt64] == 10 and
+ allocable_end[HRcInt64] == 14. Then regs[10], regs[11], regs[12],
+ regs[13], and regs[14] give all registers of register class HRcInt64.
+
+ If a register class is not present, then values of the corresponding
+ |allocable_{start,end}| elements are equal to N_RREGUNIVERSE_REGS.
+
+ Naturally registers in |regs| must form contiguous groups. This is
+ checked by RRegUniverse__check_is_sane(). */
+ UInt allocable_start[HrcLAST + 1];
+ UInt allocable_end[HrcLAST + 1];
}
RRegUniverse;
/*---------------------------------------------------------*/
/* Note that such maps can only map virtual regs to real regs.
- addToHRegRenap will barf if given a pair not of that form. As a
+ addToHRegRemap will barf if given a pair not of that form. As a
result, no valid HRegRemap will bind a real reg to anything, and so
if lookupHRegMap is given a real reg, it returns it unchanged.
This is precisely the behaviour that the register allocator needs
/*--- Reg alloc: TODO: move somewhere else ---*/
/*---------------------------------------------------------*/
-extern
-HInstrArray* doRegisterAllocation (
+/* Control of the VEX register allocator. */
+typedef
+ struct {
+ /* The real-register universe to use. This contains facts about real
+ registers, one of which is the set of registers available for
+ allocation. */
+ const RRegUniverse* univ;
+
+ /* Return True iff the given insn is a reg-reg move, in which case also
+ return the src and dst regs. */
+ Bool (*isMove)(const HInstr*, HReg*, HReg*);
+
+ /* Get info about register usage in this insn. */
+ void (*getRegUsage)(HRegUsage*, const HInstr*, Bool);
+
+ /* Apply a reg-reg mapping to an insn. */
+ void (*mapRegs)(HRegRemap*, HInstr*, Bool);
+
+ /* Return insn(s) to spill/restore a real register to a spill slot offset.
+ Also a function to move between registers.
+ And optionally a function to do direct reloads. */
+ void (*genSpill)(HInstr**, HInstr**, HReg, Int, Bool);
+ void (*genReload)(HInstr**, HInstr**, HReg, Int, Bool);
+ HInstr* (*genMove)(HReg from, HReg to, Bool);
+ HInstr* (*directReload)(HInstr*, HReg, Short);
+ UInt guest_sizeB;
+
+ /* For debug printing only. */
+ void (*ppInstr)(const HInstr*, Bool);
+ UInt (*ppReg)(HReg);
+
+ /* 32/64bit mode */
+ Bool mode64;
+ }
+ RegAllocControl;
- /* Incoming virtual-registerised code. */
+extern HInstrArray* doRegisterAllocation_v2(
HInstrArray* instrs_in,
-
- /* The real-register universe to use. This contains facts about
- real registers, one of which is the set of registers available
- for allocation. */
- const RRegUniverse* univ,
-
- /* Return True iff the given insn is a reg-reg move, in which
- case also return the src and dst regs. */
- Bool (*isMove) (const HInstr*, HReg*, HReg*),
-
- /* Get info about register usage in this insn. */
- void (*getRegUsage) (HRegUsage*, const HInstr*, Bool),
-
- /* Apply a reg-reg mapping to an insn. */
- void (*mapRegs) (HRegRemap*, HInstr*, Bool),
-
- /* Return insn(s) to spill/restore a real reg to a spill slot
- offset. And optionally a function to do direct reloads. */
- void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ),
- void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
- HInstr* (*directReload) ( HInstr*, HReg, Short ),
- Int guest_sizeB,
-
- /* For debug printing only. */
- void (*ppInstr) ( const HInstr*, Bool ),
- void (*ppReg) ( HReg ),
-
- /* 32/64bit mode */
- Bool mode64
+ const RegAllocControl* con
+);
+extern HInstrArray* doRegisterAllocation_v3(
+ HInstrArray* instrs_in,
+ const RegAllocControl* con
);
/* Add the registers. The initial segment of this array must be
those available for allocation by reg-alloc, and those that
follow are not available for allocation. */
+ ru->allocable_start[(mode64) ? HRcInt64 : HRcInt32] = ru->size;
ru->regs[ru->size++] = hregMIPS_GPR16(mode64);
ru->regs[ru->size++] = hregMIPS_GPR17(mode64);
ru->regs[ru->size++] = hregMIPS_GPR18(mode64);
ru->regs[ru->size++] = hregMIPS_GPR14(mode64);
ru->regs[ru->size++] = hregMIPS_GPR15(mode64);
ru->regs[ru->size++] = hregMIPS_GPR24(mode64);
+ ru->allocable_end[(mode64) ? HRcInt64 : HRcInt32] = ru->size - 1;
+
/* s7 (=guest_state) */
+ ru->allocable_start[(mode64) ? HRcFlt64 : HRcFlt32] = ru->size;
ru->regs[ru->size++] = hregMIPS_F16(mode64);
ru->regs[ru->size++] = hregMIPS_F18(mode64);
ru->regs[ru->size++] = hregMIPS_F20(mode64);
ru->regs[ru->size++] = hregMIPS_F26(mode64);
ru->regs[ru->size++] = hregMIPS_F28(mode64);
ru->regs[ru->size++] = hregMIPS_F30(mode64);
+ ru->allocable_end[(mode64) ? HRcFlt64 : HRcFlt32] = ru->size - 1;
+
if (!mode64) {
/* Fake double floating point */
+ ru->allocable_start[HRcFlt64] = ru->size;
ru->regs[ru->size++] = hregMIPS_D0(mode64);
ru->regs[ru->size++] = hregMIPS_D1(mode64);
ru->regs[ru->size++] = hregMIPS_D2(mode64);
ru->regs[ru->size++] = hregMIPS_D5(mode64);
ru->regs[ru->size++] = hregMIPS_D6(mode64);
ru->regs[ru->size++] = hregMIPS_D7(mode64);
+ ru->allocable_end[HRcFlt64] = ru->size - 1;
}
ru->allocable = ru->size;
}
-void ppHRegMIPS(HReg reg, Bool mode64)
+UInt ppHRegMIPS(HReg reg, Bool mode64)
{
Int r;
static const HChar *ireg32_names[35]
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
- return;
+ return ppHReg(reg);
}
/* But specific for real regs. */
case HRcInt32:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("%s", ireg32_names[r]);
- return;
+ return vex_printf("%s", ireg32_names[r]);
case HRcInt64:
r = hregEncoding (reg);
vassert (r >= 0 && r < 32);
- vex_printf ("%s", ireg32_names[r]);
- return;
+ return vex_printf ("%s", ireg32_names[r]);
case HRcFlt32:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("%s", freg32_names[r]);
- return;
+ return vex_printf("%s", freg32_names[r]);
case HRcFlt64:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("%s", freg64_names[r]);
- return;
+ return vex_printf("%s", freg64_names[r]);
default:
vpanic("ppHRegMIPS");
break;
}
-
- return;
}
}
}
+MIPSInstr* genMove_MIPS(HReg from, HReg to, Bool mode64)
+{
+ switch (hregClass(from)) {
+ case HRcInt32:
+ case HRcInt64:
+ return MIPSInstr_Alu(Malu_OR, to, from, MIPSRH_Reg(from));
+ default:
+ ppHRegClass(hregClass(from));
+ vpanic("genMove_MIPS: unimplemented regclass");
+ }
+}
+
/* --------- The mips assembler --------- */
inline static UInt iregNo(HReg r, Bool mode64)
# define MIPS_N_REGPARMS 8
#endif
-extern void ppHRegMIPS ( HReg, Bool );
+extern UInt ppHRegMIPS ( HReg, Bool );
/* --------- Condition codes, Intel encoding. --------- */
HReg rreg, Int offset, Bool);
extern void genReload_MIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2,
HReg rreg, Int offset, Bool);
+extern MIPSInstr* genMove_MIPS(HReg from, HReg to, Bool mode64);
extern const RRegUniverse* getRRegUniverse_MIPS ( Bool mode64 );
// GPR0 = scratch reg where poss. - some ops interpret as value zero
// GPR1 = stack pointer
// GPR2 = TOC pointer
+ ru->allocable_start[(mode64) ? HRcInt64 : HRcInt32] = ru->size;
ru->regs[ru->size++] = hregPPC_GPR3(mode64);
ru->regs[ru->size++] = hregPPC_GPR4(mode64);
ru->regs[ru->size++] = hregPPC_GPR5(mode64);
ru->regs[ru->size++] = hregPPC_GPR26(mode64);
ru->regs[ru->size++] = hregPPC_GPR27(mode64);
ru->regs[ru->size++] = hregPPC_GPR28(mode64);
+ ru->allocable_end[(mode64) ? HRcInt64 : HRcInt32] = ru->size - 1;
// GPR29 is reserved for the dispatcher
// GPR30 is reserved as AltiVec spill reg temporary
// GPR31 is reserved for the GuestStatePtr
the occasional extra spill instead. */
/* For both ppc32-linux and ppc64-linux, f14-f31 are callee save.
So use them. */
+ ru->allocable_start[HRcFlt64] = ru->size;
ru->regs[ru->size++] = hregPPC_FPR14(mode64);
ru->regs[ru->size++] = hregPPC_FPR15(mode64);
ru->regs[ru->size++] = hregPPC_FPR16(mode64);
ru->regs[ru->size++] = hregPPC_FPR19(mode64);
ru->regs[ru->size++] = hregPPC_FPR20(mode64);
ru->regs[ru->size++] = hregPPC_FPR21(mode64);
+ ru->allocable_end[HRcFlt64] = ru->size - 1;
/* Same deal re Altivec */
/* For both ppc32-linux and ppc64-linux, v20-v31 are callee save.
So use them. */
/* NB, vr29 is used as a scratch temporary -- do not allocate */
+ ru->allocable_start[HRcVec128] = ru->size;
ru->regs[ru->size++] = hregPPC_VR20(mode64);
ru->regs[ru->size++] = hregPPC_VR21(mode64);
ru->regs[ru->size++] = hregPPC_VR22(mode64);
ru->regs[ru->size++] = hregPPC_VR25(mode64);
ru->regs[ru->size++] = hregPPC_VR26(mode64);
ru->regs[ru->size++] = hregPPC_VR27(mode64);
+ ru->allocable_end[HRcVec128] = ru->size - 1;
ru->allocable = ru->size;
/* And other regs, not available to the allocator. */
}
-void ppHRegPPC ( HReg reg )
+UInt ppHRegPPC ( HReg reg )
{
Int r;
static const HChar* ireg32_names[32]
"%r28", "%r29", "%r30", "%r31" };
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
- return;
+ return ppHReg(reg);
}
/* But specific for real regs. */
switch (hregClass(reg)) {
case HRcInt64:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("%s", ireg32_names[r]);
- return;
+ return vex_printf("%s", ireg32_names[r]);
case HRcInt32:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("%s", ireg32_names[r]);
- return;
+ return vex_printf("%s", ireg32_names[r]);
case HRcFlt64:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("%%fr%d", r);
- return;
+ return vex_printf("%%fr%d", r);
case HRcVec128:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("%%v%d", r);
- return;
+ return vex_printf("%%v%d", r);
default:
vpanic("ppHRegPPC");
}
}
}
+PPCInstr* genMove_PPC(HReg from, HReg to, Bool mode64)
+{
+ switch (hregClass(from)) {
+ case HRcInt32:
+ case HRcInt64:
+ return PPCInstr_Alu(Palu_OR, to, from, PPCRH_Reg(from));
+ case HRcFlt64:
+ return PPCInstr_FpUnary(Pfp_MOV, to, from);
+ default:
+ ppHRegClass(hregClass(from));
+ vpanic("genMove_PPC: unimplemented regclass");
+ }
+}
+
/* --------- The ppc assembler (bleh.) --------- */
/* Num registers used for function calls */
#define PPC_N_REGPARMS 8
-extern void ppHRegPPC ( HReg );
+extern UInt ppHRegPPC ( HReg );
/* --------- Condition codes --------- */
HReg rreg, Int offsetB, Bool mode64 );
extern void genReload_PPC ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offsetB, Bool mode64 );
+extern PPCInstr* genMove_PPC(HReg from, HReg to, Bool mode64);
extern const RRegUniverse* getRRegUniverse_PPC ( Bool mode64 );
vex_printf("%s", s390_insn_as_string(insn));
}
-void
+UInt
ppHRegS390(HReg reg)
{
- vex_printf("%s", s390_hreg_as_string(reg));
+ return vex_printf("%s", s390_hreg_as_string(reg));
}
/*------------------------------------------------------------*/
FPR12 - FPR15 are also used as register pairs for 128-bit
floating point operations
*/
- UInt regno;
- for (regno = 1; regno <= 11; ++regno) {
+ ru->allocable_start[HRcInt64] = ru->size;
+ for (UInt regno = 1; regno <= 11; ++regno) {
gpr_index[regno] = ru->size;
ru->regs[ru->size++] = s390_hreg_gpr(regno);
}
- for (regno = 0; regno <= 15; ++regno) {
+ ru->allocable_end[HRcInt64] = ru->size - 1;
+
+ ru->allocable_start[HRcFlt64] = ru->size;
+ for (UInt regno = 0; regno <= 15; ++regno) {
fpr_index[regno] = ru->size;
ru->regs[ru->size++] = s390_hreg_fpr(regno);
}
+ ru->allocable_end[HRcFlt64] = ru->size - 1;
ru->allocable = ru->size;
/* Add the registers that are not available for allocation.
}
}
+s390_insn* genMove_S390(HReg from, HReg to, Bool mode64)
+{
+ switch (hregClass(from)) {
+ case HRcInt64:
+ return s390_insn_move(sizeofIRType(Ity_I64), to, from);
+ default:
+ ppHRegClass(hregClass(from));
+ vpanic("genMove_S390: unimplemented regclass");
+ }
+}
+
/* Helper function for s390_insn_get_reg_usage */
static void
s390_opnd_RMI_get_reg_usage(HRegUsage *u, s390_opnd_RMI op)
void ppS390AMode(const s390_amode *);
void ppS390Instr(const s390_insn *, Bool mode64);
-void ppHRegS390(HReg);
+UInt ppHRegS390(HReg);
/* Some functions that insulate the register allocator from details
of the underlying instruction set. */
const RRegUniverse *getRRegUniverse_S390( void );
void genSpill_S390 ( HInstr **, HInstr **, HReg , Int , Bool );
void genReload_S390 ( HInstr **, HInstr **, HReg , Int , Bool );
+extern s390_insn* genMove_S390(HReg from, HReg to, Bool mode64);
HInstrArray *iselSB_S390 ( const IRSB *, VexArch, const VexArchInfo *,
const VexAbiInfo *, Int, Int, Bool, Bool, Addr);
/* Add the registers. The initial segment of this array must be
those available for allocation by reg-alloc, and those that
follow are not available for allocation. */
+ ru->allocable_start[HRcInt32] = ru->size;
ru->regs[ru->size++] = hregX86_EAX();
ru->regs[ru->size++] = hregX86_EBX();
ru->regs[ru->size++] = hregX86_ECX();
ru->regs[ru->size++] = hregX86_EDX();
ru->regs[ru->size++] = hregX86_ESI();
ru->regs[ru->size++] = hregX86_EDI();
+ ru->allocable_end[HRcInt32] = ru->size - 1;
+
+ ru->allocable_start[HRcFlt64] = ru->size;
ru->regs[ru->size++] = hregX86_FAKE0();
ru->regs[ru->size++] = hregX86_FAKE1();
ru->regs[ru->size++] = hregX86_FAKE2();
ru->regs[ru->size++] = hregX86_FAKE3();
ru->regs[ru->size++] = hregX86_FAKE4();
ru->regs[ru->size++] = hregX86_FAKE5();
+ ru->allocable_end[HRcFlt64] = ru->size - 1;
+
+ ru->allocable_start[HRcVec128] = ru->size;
ru->regs[ru->size++] = hregX86_XMM0();
ru->regs[ru->size++] = hregX86_XMM1();
ru->regs[ru->size++] = hregX86_XMM2();
ru->regs[ru->size++] = hregX86_XMM5();
ru->regs[ru->size++] = hregX86_XMM6();
ru->regs[ru->size++] = hregX86_XMM7();
+ ru->allocable_end[HRcVec128] = ru->size - 1;
ru->allocable = ru->size;
+
/* And other regs, not available to the allocator. */
ru->regs[ru->size++] = hregX86_ESP();
ru->regs[ru->size++] = hregX86_EBP();
}
-void ppHRegX86 ( HReg reg )
+UInt ppHRegX86 ( HReg reg )
{
Int r;
static const HChar* ireg32_names[8]
= { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" };
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
- return;
+ return ppHReg(reg);
}
/* But specific for real regs. */
switch (hregClass(reg)) {
case HRcInt32:
r = hregEncoding(reg);
vassert(r >= 0 && r < 8);
- vex_printf("%s", ireg32_names[r]);
- return;
+ return vex_printf("%s", ireg32_names[r]);
case HRcFlt64:
r = hregEncoding(reg);
vassert(r >= 0 && r < 6);
- vex_printf("%%fake%d", r);
- return;
+ return vex_printf("%%fake%d", r);
case HRcVec128:
r = hregEncoding(reg);
vassert(r >= 0 && r < 8);
- vex_printf("%%xmm%d", r);
- return;
+ return vex_printf("%%xmm%d", r);
default:
vpanic("ppHRegX86");
}
}
}
+X86Instr* genMove_X86(HReg from, HReg to, Bool mode64)
+{
+ switch (hregClass(from)) {
+ case HRcInt32:
+ return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(from), to);
+ case HRcVec128:
+ return X86Instr_SseReRg(Xsse_MOV, from, to);
+ default:
+ ppHRegClass(hregClass(from));
+ vpanic("genMove_X86: unimplemented regclass");
+ }
+}
+
/* The given instruction reads the specified vreg exactly once, and
that vreg is currently located at the given spill offset. If
possible, return a variant of the instruction to one which instead
ST_IN HReg hregX86_EBP ( void ) { return mkHReg(False, HRcInt32, 5, 21); }
#undef ST_IN
-extern void ppHRegX86 ( HReg );
+extern UInt ppHRegX86 ( HReg );
/* --------- Condition codes, Intel encoding. --------- */
HReg rreg, Int offset, Bool );
extern void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offset, Bool );
-
+extern X86Instr* genMove_X86(HReg from, HReg to, Bool);
extern X86Instr* directReload_X86 ( X86Instr* i, HReg vreg, Short spill_off );
extern const RRegUniverse* getRRegUniverse_X86 ( void );
vcon->guest_max_insns = 60;
vcon->guest_chase_thresh = 10;
vcon->guest_chase_cond = False;
+ vcon->regalloc_version = 3;
}
vassert(vcon->guest_chase_thresh < vcon->guest_max_insns);
vassert(vcon->guest_chase_cond == True
|| vcon->guest_chase_cond == False);
+ vassert(vcon->regalloc_version == 2 || vcon->regalloc_version == 3);
/* Check that Vex has been built with sizes of basic types as
stated in priv/libvex_basictypes.h. Failure of any of these is
void (*mapRegs) ( HRegRemap*, HInstr*, Bool );
void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool );
void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool );
+ HInstr* (*genMove) ( HReg, HReg, Bool );
HInstr* (*directReload) ( HInstr*, HReg, Short );
void (*ppInstr) ( const HInstr*, Bool );
- void (*ppReg) ( HReg );
+ UInt (*ppReg) ( HReg );
HInstrArray* (*iselSB) ( const IRSB*, VexArch, const VexArchInfo*,
const VexAbiInfo*, Int, Int, Bool, Bool,
Addr );
mapRegs = NULL;
genSpill = NULL;
genReload = NULL;
+ genMove = NULL;
directReload = NULL;
ppInstr = NULL;
ppReg = NULL;
mapRegs = CAST_TO_TYPEOF(mapRegs) X86FN(mapRegs_X86Instr);
genSpill = CAST_TO_TYPEOF(genSpill) X86FN(genSpill_X86);
genReload = CAST_TO_TYPEOF(genReload) X86FN(genReload_X86);
+ genMove = CAST_TO_TYPEOF(genMove) X86FN(genMove_X86);
directReload = CAST_TO_TYPEOF(directReload) X86FN(directReload_X86);
ppInstr = CAST_TO_TYPEOF(ppInstr) X86FN(ppX86Instr);
ppReg = CAST_TO_TYPEOF(ppReg) X86FN(ppHRegX86);
mapRegs = CAST_TO_TYPEOF(mapRegs) AMD64FN(mapRegs_AMD64Instr);
genSpill = CAST_TO_TYPEOF(genSpill) AMD64FN(genSpill_AMD64);
genReload = CAST_TO_TYPEOF(genReload) AMD64FN(genReload_AMD64);
+ genMove = CAST_TO_TYPEOF(genMove) AMD64FN(genMove_AMD64);
directReload = CAST_TO_TYPEOF(directReload) AMD64FN(directReload_AMD64);
ppInstr = CAST_TO_TYPEOF(ppInstr) AMD64FN(ppAMD64Instr);
ppReg = CAST_TO_TYPEOF(ppReg) AMD64FN(ppHRegAMD64);
mapRegs = CAST_TO_TYPEOF(mapRegs) PPC32FN(mapRegs_PPCInstr);
genSpill = CAST_TO_TYPEOF(genSpill) PPC32FN(genSpill_PPC);
genReload = CAST_TO_TYPEOF(genReload) PPC32FN(genReload_PPC);
+ genMove = CAST_TO_TYPEOF(genMove) PPC32FN(genMove_PPC);
ppInstr = CAST_TO_TYPEOF(ppInstr) PPC32FN(ppPPCInstr);
ppReg = CAST_TO_TYPEOF(ppReg) PPC32FN(ppHRegPPC);
iselSB = PPC32FN(iselSB_PPC);
mapRegs = CAST_TO_TYPEOF(mapRegs) PPC64FN(mapRegs_PPCInstr);
genSpill = CAST_TO_TYPEOF(genSpill) PPC64FN(genSpill_PPC);
genReload = CAST_TO_TYPEOF(genReload) PPC64FN(genReload_PPC);
+ genMove = CAST_TO_TYPEOF(genMove) PPC64FN(genMove_PPC);
ppInstr = CAST_TO_TYPEOF(ppInstr) PPC64FN(ppPPCInstr);
ppReg = CAST_TO_TYPEOF(ppReg) PPC64FN(ppHRegPPC);
iselSB = PPC64FN(iselSB_PPC);
mapRegs = CAST_TO_TYPEOF(mapRegs) S390FN(mapRegs_S390Instr);
genSpill = CAST_TO_TYPEOF(genSpill) S390FN(genSpill_S390);
genReload = CAST_TO_TYPEOF(genReload) S390FN(genReload_S390);
+ genMove = CAST_TO_TYPEOF(genMove) S390FN(genMove_S390);
// fixs390: consider implementing directReload_S390
ppInstr = CAST_TO_TYPEOF(ppInstr) S390FN(ppS390Instr);
ppReg = CAST_TO_TYPEOF(ppReg) S390FN(ppHRegS390);
mapRegs = CAST_TO_TYPEOF(mapRegs) ARMFN(mapRegs_ARMInstr);
genSpill = CAST_TO_TYPEOF(genSpill) ARMFN(genSpill_ARM);
genReload = CAST_TO_TYPEOF(genReload) ARMFN(genReload_ARM);
+ genMove = CAST_TO_TYPEOF(genMove) ARMFN(genMove_ARM);
ppInstr = CAST_TO_TYPEOF(ppInstr) ARMFN(ppARMInstr);
ppReg = CAST_TO_TYPEOF(ppReg) ARMFN(ppHRegARM);
iselSB = ARMFN(iselSB_ARM);
mapRegs = CAST_TO_TYPEOF(mapRegs) ARM64FN(mapRegs_ARM64Instr);
genSpill = CAST_TO_TYPEOF(genSpill) ARM64FN(genSpill_ARM64);
genReload = CAST_TO_TYPEOF(genReload) ARM64FN(genReload_ARM64);
+ genMove = CAST_TO_TYPEOF(genMove) ARM64FN(genMove_ARM64);
ppInstr = CAST_TO_TYPEOF(ppInstr) ARM64FN(ppARM64Instr);
ppReg = CAST_TO_TYPEOF(ppReg) ARM64FN(ppHRegARM64);
iselSB = ARM64FN(iselSB_ARM64);
mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS32FN(mapRegs_MIPSInstr);
genSpill = CAST_TO_TYPEOF(genSpill) MIPS32FN(genSpill_MIPS);
genReload = CAST_TO_TYPEOF(genReload) MIPS32FN(genReload_MIPS);
+ genMove = CAST_TO_TYPEOF(genMove) MIPS32FN(genMove_MIPS);
ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS32FN(ppMIPSInstr);
ppReg = CAST_TO_TYPEOF(ppReg) MIPS32FN(ppHRegMIPS);
iselSB = MIPS32FN(iselSB_MIPS);
mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS64FN(mapRegs_MIPSInstr);
genSpill = CAST_TO_TYPEOF(genSpill) MIPS64FN(genSpill_MIPS);
genReload = CAST_TO_TYPEOF(genReload) MIPS64FN(genReload_MIPS);
+ genMove = CAST_TO_TYPEOF(genMove) MIPS64FN(genMove_MIPS);
ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS64FN(ppMIPSInstr);
ppReg = CAST_TO_TYPEOF(ppReg) MIPS64FN(ppHRegMIPS);
iselSB = MIPS64FN(iselSB_MIPS);
}
/* Register allocate. */
- rcode = doRegisterAllocation ( vcode, rRegUniv,
- isMove, getRegUsage, mapRegs,
- genSpill, genReload, directReload,
- guest_sizeB,
- ppInstr, ppReg, mode64 );
+ RegAllocControl con = {
+ .univ = rRegUniv, .isMove = isMove, .getRegUsage = getRegUsage,
+ .mapRegs = mapRegs, .genSpill = genSpill, .genReload = genReload,
+ .genMove = genMove, .directReload = directReload,
+ .guest_sizeB = guest_sizeB, .ppInstr = ppInstr, .ppReg = ppReg,
+ .mode64 = mode64};
+ switch (vex_control.regalloc_version) {
+ case 2:
+ rcode = doRegisterAllocation_v2(vcode, &con);
+ break;
+ case 3:
+ rcode = doRegisterAllocation_v3(vcode, &con);
+ break;
+ default:
+ vassert(0);
+ }
vexAllocSanityCheck();
}
}
+/* Vectorised memset, copied from Valgrind's m_libcbase.c. */
void vex_bzero ( void* sV, SizeT n )
{
- SizeT i;
- UChar* s = (UChar*)sV;
- /* No laughing, please. Just don't call this too often. Thank you
- for your attention. */
- for (i = 0; i < n; i++) s[i] = 0;
+# define IS_4_ALIGNED(aaa_p) (0 == (((HWord)(aaa_p)) & ((HWord)0x3)))
+
+ UChar* d = sV;
+
+ while ((!IS_4_ALIGNED(d)) && n >= 1) {
+ d[0] = 0;
+ d++;
+ n--;
+ }
+ if (n == 0)
+ return;
+ while (n >= 16) {
+ ((UInt*)d)[0] = 0;
+ ((UInt*)d)[1] = 0;
+ ((UInt*)d)[2] = 0;
+ ((UInt*)d)[3] = 0;
+ d += 16;
+ n -= 16;
+ }
+ while (n >= 4) {
+ ((UInt*)d)[0] = 0;
+ d += 4;
+ n -= 4;
+ }
+ while (n >= 1) {
+ d[0] = 0;
+ d++;
+ n--;
+ }
+ return;
+# undef IS_4_ALIGNED
}
/* EXPERIMENTAL: chase across conditional branches? Not all
front ends honour this. Default: NO. */
Bool guest_chase_cond;
+ /* Register allocator version. Allowed values are:
+ - '2': previous, good and slow implementation.
+ - '3': current, faster implementation; perhaps producing slightly worse
+ spilling decisions. */
+ UInt regalloc_version;
}
VexControl;
" 0000 0000 show summary profile only\n"
" (Nb: you need --trace-notbelow and/or --trace-notabove\n"
" with --trace-flags for full details)\n"
+" --vex-regalloc-version=2|3 [3]\n"
"\n"
" debugging options for Valgrind tools that report errors\n"
" --dump-error=<number> show translation for basic block associated\n"
VG_(clo_vex_control).iropt_verbosity, 0, 10) {}
else if VG_BINT_CLO(arg, "--vex-iropt-level",
VG_(clo_vex_control).iropt_level, 0, 2) {}
+ else if VG_BINT_CLO(arg, "--vex-regalloc-version",
+ VG_(clo_vex_control).regalloc_version, 2, 3) {}
else if VG_STRINDEX_CLO(arg, "--vex-iropt-register-updates",
pxStrings, ix) {
0000 0000 show summary profile only
(Nb: you need --trace-notbelow and/or --trace-notabove
with --trace-flags for full details)
+ --vex-regalloc-version=2|3 [3]
debugging options for Valgrind tools that report errors
--dump-error=<number> show translation for basic block associated