From: Ivo Raisr Date: Fri, 25 Aug 2017 22:19:05 +0000 (+0200) Subject: VEX register allocator version 3. X-Git-Tag: VALGRIND_3_14_0~276 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=efa1e5ef8d257e3b20facf6f04350d29578ae9e4;p=thirdparty%2Fvalgrind.git VEX register allocator version 3. Implements a new version of VEX register allocator which keeps the main state per virtual registers, as opposed to real registers in v2. This results in a simpler and cleaner design and much simpler implementation. It has been observed that the new allocator executes 20-30% faster than the previous one but could produce slightly worse spilling decisions. Overall performance improvement when running the Valgrind performance regression test suite has been observed in terms of a few percent. The new register allocator (v3) is now the default one. The old register allocator (v2) is still kept around and can be activated with command line option '--vex-regalloc-version=2'. Fixes BZ#381553. --- diff --git a/Makefile.vex.am b/Makefile.vex.am index 9b9b9b53eb..4ad5ffa67f 100644 --- a/Makefile.vex.am +++ b/Makefile.vex.am @@ -143,6 +143,7 @@ LIBVEX_SOURCES_COMMON = \ priv/host_generic_simd256.c \ priv/host_generic_maddf.c \ priv/host_generic_reg_alloc2.c \ + priv/host_generic_reg_alloc3.c \ priv/host_x86_defs.c \ priv/host_x86_isel.c \ priv/host_amd64_defs.c \ diff --git a/NEWS b/NEWS index 516c4cc62e..446a7fa2d6 100644 --- a/NEWS +++ b/NEWS @@ -40,6 +40,7 @@ where XXXXXX is the bug number as listed below. 381272 ppc64 doesn't compile test_isa_2_06_partx.c without VSX support 381289 epoll_pwait can have a NULL sigmask 381274 powerpc too chatty even with --sigill-diagnostics=no +381553 VEX register allocator v3 381769 Use ucontext_t instead of struct ucontext 381805 arm32 needs ld.so index hardwire for new glibc security fixes 382256 gz compiler flag test doesn't work for gold diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c index 5e0600ac69..ebe2b0013b 100644 --- a/VEX/priv/host_amd64_defs.c +++ b/VEX/priv/host_amd64_defs.c @@ -63,6 +63,7 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) /* Add the registers. The initial segment of this array must be those available for allocation by reg-alloc, and those that follow are not available for allocation. */ + ru->allocable_start[HRcInt64] = ru->size; ru->regs[ru->size++] = hregAMD64_RSI(); ru->regs[ru->size++] = hregAMD64_RDI(); ru->regs[ru->size++] = hregAMD64_R8(); @@ -72,6 +73,10 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) ru->regs[ru->size++] = hregAMD64_R14(); ru->regs[ru->size++] = hregAMD64_R15(); ru->regs[ru->size++] = hregAMD64_RBX(); + ru->regs[ru->size++] = hregAMD64_R10(); + ru->allocable_end[HRcInt64] = ru->size - 1; + + ru->allocable_start[HRcVec128] = ru->size; ru->regs[ru->size++] = hregAMD64_XMM3(); ru->regs[ru->size++] = hregAMD64_XMM4(); ru->regs[ru->size++] = hregAMD64_XMM5(); @@ -82,8 +87,9 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) ru->regs[ru->size++] = hregAMD64_XMM10(); ru->regs[ru->size++] = hregAMD64_XMM11(); ru->regs[ru->size++] = hregAMD64_XMM12(); - ru->regs[ru->size++] = hregAMD64_R10(); + ru->allocable_end[HRcVec128] = ru->size - 1; ru->allocable = ru->size; + /* And other regs, not available to the allocator. */ ru->regs[ru->size++] = hregAMD64_RAX(); ru->regs[ru->size++] = hregAMD64_RCX(); @@ -101,7 +107,7 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) } -void ppHRegAMD64 ( HReg reg ) +UInt ppHRegAMD64 ( HReg reg ) { Int r; static const HChar* ireg64_names[16] @@ -109,27 +115,24 @@ void ppHRegAMD64 ( HReg reg ) "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt64: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("%s", ireg64_names[r]); - return; + return vex_printf("%s", ireg64_names[r]); case HRcVec128: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("%%xmm%d", r); - return; + return vex_printf("%%xmm%d", r); default: vpanic("ppHRegAMD64"); } } -static void ppHRegAMD64_lo32 ( HReg reg ) +static UInt ppHRegAMD64_lo32 ( HReg reg ) { Int r; static const HChar* ireg32_names[16] @@ -137,17 +140,16 @@ static void ppHRegAMD64_lo32 ( HReg reg ) "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" }; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - vex_printf("d"); - return; + UInt written = ppHReg(reg); + written += vex_printf("d"); + return written; } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt64: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("%s", ireg32_names[r]); - return; + return vex_printf("%s", ireg32_names[r]); default: vpanic("ppHRegAMD64_lo32: invalid regclass"); } @@ -1995,6 +1997,19 @@ void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +AMD64Instr* genMove_AMD64(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt64: + return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(from), to); + case HRcVec128: + return AMD64Instr_SseReRg(Asse_MOV, from, to); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_AMD64: unimplemented regclass"); + } +} + AMD64Instr* directReload_AMD64( AMD64Instr* i, HReg vreg, Short spill_off ) { vassert(spill_off >= 0 && spill_off < 10000); /* let's say */ diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h index 39682ef9c4..8a3eea8817 100644 --- a/VEX/priv/host_amd64_defs.h +++ b/VEX/priv/host_amd64_defs.h @@ -56,19 +56,18 @@ ST_IN HReg hregAMD64_R13 ( void ) { return mkHReg(False, HRcInt64, 13, 5); } ST_IN HReg hregAMD64_R14 ( void ) { return mkHReg(False, HRcInt64, 14, 6); } ST_IN HReg hregAMD64_R15 ( void ) { return mkHReg(False, HRcInt64, 15, 7); } ST_IN HReg hregAMD64_RBX ( void ) { return mkHReg(False, HRcInt64, 3, 8); } - -ST_IN HReg hregAMD64_XMM3 ( void ) { return mkHReg(False, HRcVec128, 3, 9); } -ST_IN HReg hregAMD64_XMM4 ( void ) { return mkHReg(False, HRcVec128, 4, 10); } -ST_IN HReg hregAMD64_XMM5 ( void ) { return mkHReg(False, HRcVec128, 5, 11); } -ST_IN HReg hregAMD64_XMM6 ( void ) { return mkHReg(False, HRcVec128, 6, 12); } -ST_IN HReg hregAMD64_XMM7 ( void ) { return mkHReg(False, HRcVec128, 7, 13); } -ST_IN HReg hregAMD64_XMM8 ( void ) { return mkHReg(False, HRcVec128, 8, 14); } -ST_IN HReg hregAMD64_XMM9 ( void ) { return mkHReg(False, HRcVec128, 9, 15); } -ST_IN HReg hregAMD64_XMM10 ( void ) { return mkHReg(False, HRcVec128, 10, 16); } -ST_IN HReg hregAMD64_XMM11 ( void ) { return mkHReg(False, HRcVec128, 11, 17); } -ST_IN HReg hregAMD64_XMM12 ( void ) { return mkHReg(False, HRcVec128, 12, 18); } - -ST_IN HReg hregAMD64_R10 ( void ) { return mkHReg(False, HRcInt64, 10, 19); } +ST_IN HReg hregAMD64_R10 ( void ) { return mkHReg(False, HRcInt64, 10, 9); } + +ST_IN HReg hregAMD64_XMM3 ( void ) { return mkHReg(False, HRcVec128, 3, 10); } +ST_IN HReg hregAMD64_XMM4 ( void ) { return mkHReg(False, HRcVec128, 4, 11); } +ST_IN HReg hregAMD64_XMM5 ( void ) { return mkHReg(False, HRcVec128, 5, 12); } +ST_IN HReg hregAMD64_XMM6 ( void ) { return mkHReg(False, HRcVec128, 6, 13); } +ST_IN HReg hregAMD64_XMM7 ( void ) { return mkHReg(False, HRcVec128, 7, 14); } +ST_IN HReg hregAMD64_XMM8 ( void ) { return mkHReg(False, HRcVec128, 8, 15); } +ST_IN HReg hregAMD64_XMM9 ( void ) { return mkHReg(False, HRcVec128, 9, 16); } +ST_IN HReg hregAMD64_XMM10 ( void ) { return mkHReg(False, HRcVec128, 10, 17); } +ST_IN HReg hregAMD64_XMM11 ( void ) { return mkHReg(False, HRcVec128, 11, 18); } +ST_IN HReg hregAMD64_XMM12 ( void ) { return mkHReg(False, HRcVec128, 12, 19); } ST_IN HReg hregAMD64_RAX ( void ) { return mkHReg(False, HRcInt64, 0, 20); } ST_IN HReg hregAMD64_RCX ( void ) { return mkHReg(False, HRcInt64, 1, 21); } @@ -81,7 +80,7 @@ ST_IN HReg hregAMD64_XMM0 ( void ) { return mkHReg(False, HRcVec128, 0, 26); } ST_IN HReg hregAMD64_XMM1 ( void ) { return mkHReg(False, HRcVec128, 1, 27); } #undef ST_IN -extern void ppHRegAMD64 ( HReg ); +extern UInt ppHRegAMD64 ( HReg ); /* --------- Condition codes, AMD encoding. --------- */ @@ -801,7 +800,7 @@ extern void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); extern void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); - +extern AMD64Instr* genMove_AMD64(HReg from, HReg to, Bool); extern AMD64Instr* directReload_AMD64 ( AMD64Instr* i, HReg vreg, Short spill_off ); diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 380a24d6d9..2506512adb 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -64,7 +64,7 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) /* Add the registers. The initial segment of this array must be those available for allocation by reg-alloc, and those that follow are not available for allocation. */ - + ru->allocable_start[HRcInt64] = ru->size; ru->regs[ru->size++] = hregARM64_X22(); ru->regs[ru->size++] = hregARM64_X23(); ru->regs[ru->size++] = hregARM64_X24(); @@ -81,6 +81,7 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) ru->regs[ru->size++] = hregARM64_X5(); ru->regs[ru->size++] = hregARM64_X6(); ru->regs[ru->size++] = hregARM64_X7(); + ru->allocable_end[HRcInt64] = ru->size - 1; // X8 is used as a ProfInc temporary, not available to regalloc. // X9 is a chaining/spill temporary, not available to regalloc. @@ -94,19 +95,23 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) // X21 is the guest state pointer, not available to regalloc. // vector regs. Unfortunately not callee-saved. + ru->allocable_start[HRcVec128] = ru->size; ru->regs[ru->size++] = hregARM64_Q16(); ru->regs[ru->size++] = hregARM64_Q17(); ru->regs[ru->size++] = hregARM64_Q18(); ru->regs[ru->size++] = hregARM64_Q19(); ru->regs[ru->size++] = hregARM64_Q20(); + ru->allocable_end[HRcVec128] = ru->size - 1; // F64 regs, all of which are callee-saved + ru->allocable_start[HRcFlt64] = ru->size; ru->regs[ru->size++] = hregARM64_D8(); ru->regs[ru->size++] = hregARM64_D9(); ru->regs[ru->size++] = hregARM64_D10(); ru->regs[ru->size++] = hregARM64_D11(); ru->regs[ru->size++] = hregARM64_D12(); ru->regs[ru->size++] = hregARM64_D13(); + ru->allocable_end[HRcFlt64] = ru->size - 1; ru->allocable = ru->size; /* And other regs, not available to the allocator. */ @@ -142,43 +147,41 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) } -void ppHRegARM64 ( HReg reg ) { +UInt ppHRegARM64 ( HReg reg ) { Int r; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt64: r = hregEncoding(reg); vassert(r >= 0 && r < 31); - vex_printf("x%d", r); - return; + return vex_printf("x%d", r); case HRcFlt64: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("d%d", r); - return; + return vex_printf("d%d", r); case HRcVec128: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("q%d", r); - return; + return vex_printf("q%d", r); default: vpanic("ppHRegARM64"); } } -static void ppHRegARM64asSreg ( HReg reg ) { - ppHRegARM64(reg); - vex_printf("(S-reg)"); +static UInt ppHRegARM64asSreg ( HReg reg ) { + UInt written = ppHRegARM64(reg); + written += vex_printf("(S-reg)"); + return written; } -static void ppHRegARM64asHreg ( HReg reg ) { - ppHRegARM64(reg); - vex_printf("(H-reg)"); +static UInt ppHRegARM64asHreg ( HReg reg ) { + UInt written = ppHRegARM64(reg); + written += vex_printf("(H-reg)"); + return written; } @@ -1745,7 +1748,7 @@ void ppARM64Instr ( const ARM64Instr* i ) { ppHRegARM64asSreg(i->ARM64in.VCmpS.argR); return; case ARM64in_VFCSel: { - void (*ppHRegARM64fp)(HReg) + UInt (*ppHRegARM64fp)(HReg) = (i->ARM64in.VFCSel.isD ? ppHRegARM64 : ppHRegARM64asSreg); vex_printf("fcsel "); ppHRegARM64fp(i->ARM64in.VFCSel.dst); @@ -2616,6 +2619,21 @@ void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +ARM64Instr* genMove_ARM64(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt64: + return ARM64Instr_MovI(to, from); + case HRcFlt64: + return ARM64Instr_VMov(8, to, from); + case HRcVec128: + return ARM64Instr_VMov(16, to, from); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_ARM64: unimplemented regclass"); + } +} + /* Emit an instruction into buf and return the number of bytes used. Note that buf is not the insn's final place, and therefore it is diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index 14b2de6a41..e7da4f90fb 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -74,7 +74,7 @@ ST_IN HReg hregARM64_X9 ( void ) { return mkHReg(False, HRcInt64, 9, 27); } ST_IN HReg hregARM64_X21 ( void ) { return mkHReg(False, HRcInt64, 21, 28); } #undef ST_IN -extern void ppHRegARM64 ( HReg ); +extern UInt ppHRegARM64 ( HReg ); /* Number of registers used arg passing in function calls */ #define ARM64_N_ARGREGS 8 /* x0 .. x7 */ @@ -1007,6 +1007,7 @@ extern void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); extern void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); +extern ARM64Instr* genMove_ARM64(HReg from, HReg to, Bool); extern const RRegUniverse* getRRegUniverse_ARM64 ( void ); diff --git a/VEX/priv/host_arm_defs.c b/VEX/priv/host_arm_defs.c index a986f37877..9bf87cd5c3 100644 --- a/VEX/priv/host_arm_defs.c +++ b/VEX/priv/host_arm_defs.c @@ -68,6 +68,7 @@ const RRegUniverse* getRRegUniverse_ARM ( void ) /* Callee saves ones are listed first, since we prefer them if they're available. */ + ru->allocable_start[HRcInt32] = ru->size; ru->regs[ru->size++] = hregARM_R4(); ru->regs[ru->size++] = hregARM_R5(); ru->regs[ru->size++] = hregARM_R6(); @@ -80,24 +81,34 @@ const RRegUniverse* getRRegUniverse_ARM ( void ) ru->regs[ru->size++] = hregARM_R2(); ru->regs[ru->size++] = hregARM_R3(); ru->regs[ru->size++] = hregARM_R9(); + ru->allocable_end[HRcInt32] = ru->size - 1; + /* FP registers. Note: these are all callee-save. Yay! Hence we don't need to mention them as trashed in getHRegUsage for ARMInstr_Call. */ + ru->allocable_start[HRcFlt64] = ru->size; ru->regs[ru->size++] = hregARM_D8(); ru->regs[ru->size++] = hregARM_D9(); ru->regs[ru->size++] = hregARM_D10(); ru->regs[ru->size++] = hregARM_D11(); ru->regs[ru->size++] = hregARM_D12(); + ru->allocable_end[HRcFlt64] = ru->size - 1; + + ru->allocable_start[HRcFlt32] = ru->size; ru->regs[ru->size++] = hregARM_S26(); ru->regs[ru->size++] = hregARM_S27(); ru->regs[ru->size++] = hregARM_S28(); ru->regs[ru->size++] = hregARM_S29(); ru->regs[ru->size++] = hregARM_S30(); + ru->allocable_end[HRcFlt32] = ru->size - 1; + + ru->allocable_start[HRcVec128] = ru->size; ru->regs[ru->size++] = hregARM_Q8(); ru->regs[ru->size++] = hregARM_Q9(); ru->regs[ru->size++] = hregARM_Q10(); ru->regs[ru->size++] = hregARM_Q11(); ru->regs[ru->size++] = hregARM_Q12(); + ru->allocable_end[HRcVec128] = ru->size - 1; ru->allocable = ru->size; /* And other regs, not available to the allocator. */ @@ -140,35 +151,30 @@ const RRegUniverse* getRRegUniverse_ARM ( void ) } -void ppHRegARM ( HReg reg ) { +UInt ppHRegARM ( HReg reg ) { Int r; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt32: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("r%d", r); - return; + return vex_printf("r%d", r); case HRcFlt64: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("d%d", r); - return; + return vex_printf("d%d", r); case HRcFlt32: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("s%d", r); - return; + return vex_printf("s%d", r); case HRcVec128: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("q%d", r); - return; + return vex_printf("q%d", r); default: vpanic("ppHRegARM"); } @@ -2772,6 +2778,22 @@ void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +ARMInstr* genMove_ARM(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt32: + return ARMInstr_Mov(to, ARMRI84_R(from)); + case HRcFlt32: + return ARMInstr_VUnaryS(ARMvfpu_COPY, to, from); + case HRcFlt64: + return ARMInstr_VUnaryD(ARMvfpu_COPY, to, from); + case HRcVec128: + return ARMInstr_NUnary(ARMneon_COPY, to, from, 4, False); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_ARM: unimplemented regclass"); + } +} /* Emit an instruction into buf and return the number of bytes used. Note that buf is not the insn's final place, and therefore it is diff --git a/VEX/priv/host_arm_defs.h b/VEX/priv/host_arm_defs.h index e8a2eb7237..56c4ec5055 100644 --- a/VEX/priv/host_arm_defs.h +++ b/VEX/priv/host_arm_defs.h @@ -81,7 +81,7 @@ ST_IN HReg hregARM_Q14 ( void ) { return mkHReg(False, HRcVec128, 14, 32); } ST_IN HReg hregARM_Q15 ( void ) { return mkHReg(False, HRcVec128, 15, 33); } #undef ST_IN -extern void ppHRegARM ( HReg ); +extern UInt ppHRegARM ( HReg ); /* Number of registers used arg passing in function calls */ #define ARM_N_ARGREGS 4 /* r0, r1, r2, r3 */ @@ -1070,6 +1070,7 @@ extern void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); extern void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); +extern ARMInstr* genMove_ARM(HReg from, HReg to, Bool); extern const RRegUniverse* getRRegUniverse_ARM ( void ); diff --git a/VEX/priv/host_generic_reg_alloc2.c b/VEX/priv/host_generic_reg_alloc2.c index 2294a9bcc9..ec291d3deb 100644 --- a/VEX/priv/host_generic_reg_alloc2.c +++ b/VEX/priv/host_generic_reg_alloc2.c @@ -294,49 +294,6 @@ static inline UInt ULong__minIndex ( ULong w64 ) { } -/* Vectorised memset, copied from Valgrind's m_libcbase.c. */ -static void* local_memset ( void *destV, Int c, SizeT sz ) -{ -# define IS_4_ALIGNED(aaa_p) (0 == (((HWord)(aaa_p)) & ((HWord)0x3))) - - UInt c4; - UChar* d = destV; - UChar uc = c; - - while ((!IS_4_ALIGNED(d)) && sz >= 1) { - d[0] = uc; - d++; - sz--; - } - if (sz == 0) - return destV; - c4 = uc; - c4 |= (c4 << 8); - c4 |= (c4 << 16); - while (sz >= 16) { - ((UInt*)d)[0] = c4; - ((UInt*)d)[1] = c4; - ((UInt*)d)[2] = c4; - ((UInt*)d)[3] = c4; - d += 16; - sz -= 16; - } - while (sz >= 4) { - ((UInt*)d)[0] = c4; - d += 4; - sz -= 4; - } - while (sz >= 1) { - d[0] = c; - d++; - sz--; - } - return destV; - -# undef IS_4_ALIGNED -} - - /* A target-independent register allocator. Requires various functions which it uses to deal abstractly with instructions and registers, since it cannot have any target-specific knowledge. @@ -352,44 +309,13 @@ static void* local_memset ( void *destV, Int c, SizeT sz ) Takes an expandable array of pointers to unallocated insns. Returns an expandable array of pointers to allocated insns. */ -HInstrArray* doRegisterAllocation ( +HInstrArray* doRegisterAllocation_v2 ( /* Incoming virtual-registerised code. */ HInstrArray* instrs_in, - /* The real-register universe to use. This contains facts about - real registers, one of which is the set of registers available - for allocation. */ - const RRegUniverse* univ, - - /* Return True iff the given insn is a reg-reg move, in which - case also return the src and dst regs. */ - Bool (*isMove) ( const HInstr*, HReg*, HReg* ), - - /* Get info about register usage in this insn. */ - void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ), - - /* Apply a reg-reg mapping to an insn. */ - void (*mapRegs) ( HRegRemap*, HInstr*, Bool ), - - /* Return one, or, if we're unlucky, two insn(s) to spill/restore a - real reg to a spill slot byte offset. The two leading HInstr** - args are out parameters, through which the generated insns are - returned. Also (optionally) a 'directReload' function, which - attempts to replace a given instruction by one which reads - directly from a specified spill slot. May be NULL, in which - case the optimisation is not attempted. */ - void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ), - void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ), - HInstr* (*directReload) ( HInstr*, HReg, Short ), - Int guest_sizeB, - - /* For debug printing only. */ - void (*ppInstr) ( const HInstr*, Bool ), - void (*ppReg) ( HReg ), - - /* 32/64bit mode */ - Bool mode64 + /* Register allocator controls to use. */ + const RegAllocControl* con ) { # define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8) @@ -447,7 +373,7 @@ HInstrArray* doRegisterAllocation ( not at each insn processed. */ Bool do_sanity_check; - vassert(0 == (guest_sizeB % LibVEX_GUEST_STATE_ALIGN)); + vassert(0 == (con->guest_sizeB % LibVEX_GUEST_STATE_ALIGN)); vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN)); vassert(0 == (N_SPILL64S % 2)); @@ -463,7 +389,7 @@ HInstrArray* doRegisterAllocation ( HInstr* _tmp = (_instr); \ if (DEBUG_REGALLOC) { \ vex_printf("** "); \ - (*ppInstr)(_tmp, mode64); \ + con->ppInstr(_tmp, con->mode64); \ vex_printf("\n\n"); \ } \ addHInstr ( instrs_out, _tmp ); \ @@ -474,13 +400,13 @@ HInstrArray* doRegisterAllocation ( Int z, q; \ for (z = 0; z < n_rregs; z++) { \ vex_printf(" rreg_state[%2d] = ", z); \ - (*ppReg)(univ->regs[z]); \ + con->ppReg(con->univ->regs[z]); \ vex_printf(" \t"); \ switch (rreg_state[z].disp) { \ case Free: vex_printf("Free\n"); break; \ case Unavail: vex_printf("Unavail\n"); break; \ case Bound: vex_printf("BoundTo "); \ - (*ppReg)(rreg_state[z].vreg); \ + con->ppReg(rreg_state[z].vreg); \ vex_printf("\n"); break; \ } \ } \ @@ -505,7 +431,7 @@ HInstrArray* doRegisterAllocation ( /* ... and initialise running state. */ /* n_rregs is no more than a short name for n_available_real_regs. */ - n_rregs = univ->allocable; + n_rregs = con->univ->allocable; n_vregs = instrs_in->n_vregs; /* If this is not so, vreg_state entries will overflow. */ @@ -586,13 +512,13 @@ HInstrArray* doRegisterAllocation ( for (Int ii = 0; ii < instrs_in->arr_used; ii++) { - (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 ); + con->getRegUsage(®_usage_arr[ii], instrs_in->arr[ii], con->mode64); if (0) { vex_printf("\n%d stage1: ", ii); - (*ppInstr)(instrs_in->arr[ii], mode64); + con->ppInstr(instrs_in->arr[ii], con->mode64); vex_printf("\n"); - ppHRegUsage(univ, ®_usage_arr[ii]); + ppHRegUsage(con->univ, ®_usage_arr[ii]); } /* ------ start of DEAL WITH VREG LIVE RANGES ------ */ @@ -606,7 +532,7 @@ HInstrArray* doRegisterAllocation ( Int k = hregIndex(vreg); if (k < 0 || k >= n_vregs) { vex_printf("\n"); - (*ppInstr)(instrs_in->arr[ii], mode64); + con->ppInstr(instrs_in->arr[ii], con->mode64); vex_printf("\n"); vex_printf("vreg %d, n_vregs %d\n", k, n_vregs); vpanic("doRegisterAllocation: out-of-range vreg"); @@ -711,10 +637,10 @@ HInstrArray* doRegisterAllocation ( } else if (!isW && isR) { if (rreg_live_after[j] == INVALID_INSTRNO) { vex_printf("\nOFFENDING RREG = "); - (*ppReg)(univ->regs[j]); + con->ppReg(con->univ->regs[j]); vex_printf("\n"); vex_printf("\nOFFENDING instr = "); - (*ppInstr)(instrs_in->arr[ii], mode64); + con->ppInstr(instrs_in->arr[ii], con->mode64); vex_printf("\n"); vpanic("doRegisterAllocation: " "first event for rreg is Read"); @@ -724,10 +650,10 @@ HInstrArray* doRegisterAllocation ( vassert(isR && isW); if (rreg_live_after[j] == INVALID_INSTRNO) { vex_printf("\nOFFENDING RREG = "); - (*ppReg)(univ->regs[j]); + con->ppReg(con->univ->regs[j]); vex_printf("\n"); vex_printf("\nOFFENDING instr = "); - (*ppInstr)(instrs_in->arr[ii], mode64); + con->ppInstr(instrs_in->arr[ii], con->mode64); vex_printf("\n"); vpanic("doRegisterAllocation: " "first event for rreg is Modify"); @@ -741,7 +667,7 @@ HInstrArray* doRegisterAllocation ( ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); if (0) vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db); - rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j]; + rreg_lrs_la[rreg_lrs_used].rreg = con->univ->regs[j]; rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la); rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db); rreg_lrs_used++; @@ -778,7 +704,7 @@ HInstrArray* doRegisterAllocation ( if (0) vex_printf("FLUSH 2 (%d,%d)\n", rreg_live_after[j], rreg_dead_before[j]); - rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j]; + rreg_lrs_la[rreg_lrs_used].rreg = con->univ->regs[j]; rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]); rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]); rreg_lrs_used++; @@ -805,7 +731,7 @@ HInstrArray* doRegisterAllocation ( for (Int j = 0; j < n_rregs; j++) { if (!rreg_state[j].has_hlrs) continue; - ppReg(univ->regs[j]); + con->ppReg(con->univ->regs[j]); vex_printf(" hinted\n"); } } @@ -841,14 +767,14 @@ HInstrArray* doRegisterAllocation ( vex_printf("RRegLRs by LA:\n"); for (Int j = 0; j < rreg_lrs_used; j++) { vex_printf(" "); - (*ppReg)(rreg_lrs_la[j].rreg); + con->ppReg(rreg_lrs_la[j].rreg); vex_printf(" la = %d, db = %d\n", rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before ); } vex_printf("RRegLRs by DB:\n"); for (Int j = 0; j < rreg_lrs_used; j++) { vex_printf(" "); - (*ppReg)(rreg_lrs_db[j].rreg); + con->ppReg(rreg_lrs_db[j].rreg); vex_printf(" la = %d, db = %d\n", rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before ); } @@ -882,7 +808,7 @@ HInstrArray* doRegisterAllocation ( */ /* Int max_ss_no = -1; */ - local_memset(ss_busy_until_before, 0, sizeof(ss_busy_until_before)); + vex_bzero(ss_busy_until_before, sizeof(ss_busy_until_before)); for (Int j = 0; j < n_vregs; j++) { @@ -940,7 +866,7 @@ HInstrArray* doRegisterAllocation ( /* This reflects LibVEX's hard-wired knowledge of the baseBlock layout: the guest state, then two equal sized areas following it for two sets of shadow state, and then the spill area. */ - vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + ss_no * 8); + vreg_lrs[j].spill_offset = toShort(con->guest_sizeB * 3 + ss_no * 8); /* Independent check that we've made a sane choice of slot */ sanity_check_spill_offset( &vreg_lrs[j] ); @@ -983,7 +909,7 @@ HInstrArray* doRegisterAllocation ( if (DEBUG_REGALLOC) { vex_printf("\n====----====---- Insn %d ----====----====\n", ii); vex_printf("---- "); - (*ppInstr)(instrs_in->arr[ii], mode64); + con->ppInstr(instrs_in->arr[ii], con->mode64); vex_printf("\n\nInitial state:\n"); PRINT_STATE; vex_printf("\n"); @@ -1018,7 +944,7 @@ HInstrArray* doRegisterAllocation ( vex_printf("considering la %d .. db %d reg = ", rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before); - (*ppReg)(reg); + con->ppReg(reg); vex_printf("\n"); } @@ -1059,7 +985,7 @@ HInstrArray* doRegisterAllocation ( vassert(rreg_state[j].eq_spill_slot == False); continue; } - vassert(hregClass(univ->regs[j]) + vassert(hregClass(con->univ->regs[j]) == hregClass(rreg_state[j].vreg)); vassert( hregIsVirtual(rreg_state[j].vreg)); } @@ -1099,7 +1025,7 @@ HInstrArray* doRegisterAllocation ( the dst to the src's rreg, and that's all. */ HReg vregS = INVALID_HREG; HReg vregD = INVALID_HREG; - if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) { + if ( con->isMove(instrs_in->arr[ii], &vregS, &vregD) ) { if (!hregIsVirtual(vregS)) goto cannot_coalesce; if (!hregIsVirtual(vregD)) goto cannot_coalesce; /* Check that *isMove is not telling us a bunch of lies ... */ @@ -1112,9 +1038,9 @@ HInstrArray* doRegisterAllocation ( if (vreg_lrs[m].live_after != ii) goto cannot_coalesce; if (DEBUG_REGALLOC) { vex_printf("COALESCE "); - (*ppReg)(vregS); + con->ppReg(vregS); vex_printf(" -> "); - (*ppReg)(vregD); + con->ppReg(vregD); vex_printf("\n\n"); } /* Find the state entry for vregS. */ @@ -1163,7 +1089,7 @@ HInstrArray* doRegisterAllocation ( vreg_state[m] = INVALID_RREG_NO; if (DEBUG_REGALLOC) { vex_printf("free up "); - (*ppReg)(univ->regs[j]); + con->ppReg(con->univ->regs[j]); vex_printf("\n"); } } @@ -1204,7 +1130,7 @@ HInstrArray* doRegisterAllocation ( than before it. */ if (DEBUG_REGALLOC) { vex_printf("need to free up rreg: "); - (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg); + con->ppReg(rreg_lrs_la[rreg_lrs_la_next].rreg); vex_printf("\n\n"); } Int k = hregIndex(rreg_lrs_la[rreg_lrs_la_next].rreg); @@ -1223,8 +1149,8 @@ HInstrArray* doRegisterAllocation ( if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) { HInstr* spill1 = NULL; HInstr* spill2 = NULL; - (*genSpill)( &spill1, &spill2, univ->regs[k], - vreg_lrs[m].spill_offset, mode64 ); + con->genSpill(&spill1, &spill2, con->univ->regs[k], + vreg_lrs[m].spill_offset, con->mode64); vassert(spill1 || spill2); /* can't both be NULL */ if (spill1) EMIT_INSTR(spill1); @@ -1271,7 +1197,7 @@ HInstrArray* doRegisterAllocation ( that the change is invisible to the standard-case handling that follows. */ - if (directReload && reg_usage_arr[ii].n_vRegs <= 2) { + if (con->directReload != NULL && reg_usage_arr[ii].n_vRegs <= 2) { Bool debug_direct_reload = False; HReg cand = INVALID_HREG; Bool nreads = 0; @@ -1305,19 +1231,20 @@ HInstrArray* doRegisterAllocation ( vassert(! sameHReg(reg_usage_arr[ii].vRegs[0], reg_usage_arr[ii].vRegs[1])); - reloaded = directReload ( instrs_in->arr[ii], cand, spilloff ); + reloaded = con->directReload(instrs_in->arr[ii], cand, spilloff); if (debug_direct_reload && !reloaded) { vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" "); - ppInstr(instrs_in->arr[ii], mode64); + con->ppInstr(instrs_in->arr[ii], con->mode64); } if (reloaded) { /* Update info about the insn, so it looks as if it had been in this form all along. */ instrs_in->arr[ii] = reloaded; - (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 ); + con->getRegUsage(®_usage_arr[ii], instrs_in->arr[ii], + con->mode64); if (debug_direct_reload && !reloaded) { vex_printf(" --> "); - ppInstr(reloaded, mode64); + con->ppInstr(reloaded, con->mode64); } } @@ -1336,7 +1263,7 @@ HInstrArray* doRegisterAllocation ( vassert(hregIsVirtual(vreg)); if (0) { - vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n"); + vex_printf("considering "); con->ppReg(vreg); vex_printf("\n"); } /* Now we're trying to find a rreg for "vreg". First of all, @@ -1347,7 +1274,7 @@ HInstrArray* doRegisterAllocation ( Int n = vreg_state[m]; if (IS_VALID_RREGNO(n)) { vassert(rreg_state[n].disp == Bound); - addToHRegRemap(&remap, vreg, univ->regs[n]); + addToHRegRemap(&remap, vreg, con->univ->regs[n]); /* If this rreg is written or modified, mark it as different from any spill slot value. */ if (reg_usage_arr[ii].vMode[j] != HRmRead) @@ -1366,7 +1293,7 @@ HInstrArray* doRegisterAllocation ( Int k; for (k = 0; k < n_rregs; k++) { if (rreg_state[k].disp != Free - || hregClass(univ->regs[k]) != hregClass(vreg)) + || hregClass(con->univ->regs[k]) != hregClass(vreg)) continue; if (rreg_state[k].has_hlrs) { /* Well, at least we can use k_suboptimal if we really @@ -1387,7 +1314,7 @@ HInstrArray* doRegisterAllocation ( Int p = hregIndex(vreg); vassert(IS_VALID_VREGNO(p)); vreg_state[p] = toShort(k); - addToHRegRemap(&remap, vreg, univ->regs[k]); + addToHRegRemap(&remap, vreg, con->univ->regs[k]); /* Generate a reload if needed. This only creates needed reloads because the live range builder for vregs will guarantee that the first event for a vreg is a write. @@ -1398,8 +1325,8 @@ HInstrArray* doRegisterAllocation ( vassert(vreg_lrs[p].reg_class != HRcINVALID); HInstr* reload1 = NULL; HInstr* reload2 = NULL; - (*genReload)( &reload1, &reload2, univ->regs[k], - vreg_lrs[p].spill_offset, mode64 ); + con->genReload(&reload1, &reload2, con->univ->regs[k], + vreg_lrs[p].spill_offset, con->mode64); vassert(reload1 || reload2); /* can't both be NULL */ if (reload1) EMIT_INSTR(reload1); @@ -1433,7 +1360,7 @@ HInstrArray* doRegisterAllocation ( rreg_state[k].is_spill_cand = False; if (rreg_state[k].disp != Bound) continue; - if (hregClass(univ->regs[k]) != hregClass(vreg)) + if (hregClass(con->univ->regs[k]) != hregClass(vreg)) continue; rreg_state[k].is_spill_cand = True; /* Note, the following loop visits only the virtual regs @@ -1468,7 +1395,7 @@ HInstrArray* doRegisterAllocation ( vassert(IS_VALID_RREGNO(spillee)); vassert(rreg_state[spillee].disp == Bound); /* check it's the right class */ - vassert(hregClass(univ->regs[spillee]) == hregClass(vreg)); + vassert(hregClass(con->univ->regs[spillee]) == hregClass(vreg)); /* check we're not ejecting the vreg for which we are trying to free up a register. */ vassert(! sameHReg(rreg_state[spillee].vreg, vreg)); @@ -1483,8 +1410,8 @@ HInstrArray* doRegisterAllocation ( if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) { HInstr* spill1 = NULL; HInstr* spill2 = NULL; - (*genSpill)( &spill1, &spill2, univ->regs[spillee], - vreg_lrs[m].spill_offset, mode64 ); + con->genSpill(&spill1, &spill2, con->univ->regs[spillee], + vreg_lrs[m].spill_offset, con->mode64); vassert(spill1 || spill2); /* can't both be NULL */ if (spill1) EMIT_INSTR(spill1); @@ -1509,8 +1436,8 @@ HInstrArray* doRegisterAllocation ( vassert(vreg_lrs[m].reg_class != HRcINVALID); HInstr* reload1 = NULL; HInstr* reload2 = NULL; - (*genReload)( &reload1, &reload2, univ->regs[spillee], - vreg_lrs[m].spill_offset, mode64 ); + con->genReload(&reload1, &reload2, con->univ->regs[spillee], + vreg_lrs[m].spill_offset, con->mode64); vassert(reload1 || reload2); /* can't both be NULL */ if (reload1) EMIT_INSTR(reload1); @@ -1529,7 +1456,7 @@ HInstrArray* doRegisterAllocation ( /* So after much twisting and turning, we have vreg mapped to rreg_state[spillee].rreg. Note that in the map. */ - addToHRegRemap(&remap, vreg, univ->regs[spillee]); + addToHRegRemap(&remap, vreg, con->univ->regs[spillee]); } /* iterate over virtual registers in this instruction. */ @@ -1545,7 +1472,7 @@ HInstrArray* doRegisterAllocation ( */ /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */ - (*mapRegs)( &remap, instrs_in->arr[ii], mode64 ); + con->mapRegs(&remap, instrs_in->arr[ii], con->mode64); EMIT_INSTR( instrs_in->arr[ii] ); if (DEBUG_REGALLOC) { diff --git a/VEX/priv/host_generic_reg_alloc3.c b/VEX/priv/host_generic_reg_alloc3.c new file mode 100644 index 0000000000..f798372d94 --- /dev/null +++ b/VEX/priv/host_generic_reg_alloc3.c @@ -0,0 +1,1171 @@ +/*----------------------------------------------------------------------------*/ +/*--- begin host_generic_reg_alloc3.c ---*/ +/*----------------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation framework. + + Copyright (C) 2017-2017 Ivo Raisr + ivosh@ivosh.net + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "libvex_basictypes.h" +#include "libvex.h" + +#include "main_util.h" +#include "host_generic_regs.h" + +/* Set to 1 for lots of debugging output. */ +#define DEBUG_REGALLOC 0 + +/* Set to 1 for sanity checking at every instruction. + Set to 0 for sanity checking only every 17th one and the last one. */ +#define SANITY_CHECKS_EVERY_INSTR 0 + + +#define INVALID_INSTRNO (-2) + +/* Register allocator state is kept in an array of VRegState's. + There is an element for every virtual register (vreg). + Elements are indexed [0 .. n_vregs-1]. + Records information about vreg live range and its state. */ +typedef + struct { + /* Live range, register class and spill offset are computed during the + first register allocator pass and remain unchanged after that. */ + + /* This vreg becomes live with this instruction (inclusive). Contains + either an instruction number or INVALID_INSTRNO. */ + Short live_after; + /* This vreg becomes dead before this instruction (exclusive). Contains + either an instruction number or INVALID_INSTRNO. */ + Short dead_before; + /* What kind of register this is. */ + HRegClass reg_class; + + /* What is its current disposition? */ + enum { Unallocated, /* Neither spilled nor assigned to a real reg. */ + Assigned, /* Assigned to a real register, viz rreg. */ + Spilled /* Spilled to the spill slot. */ + } disp; + + /* If .disp == Assigned, what rreg is it bound to? */ + HReg rreg; + + /* The "home" spill slot. The offset is relative to the beginning of + the guest state. */ + UShort spill_offset; + } + VRegState; + +/* The allocator also maintains a redundant array of indexes (rreg_state) from + rreg numbers back to entries in vreg_state. It is redundant because iff + rreg_state[r] == v then hregNumber(vreg_state[v].rreg) == r -- that is, the + two entries point at each other. The purpose of this is to speed up + activities which involve looking for a particular rreg: there is no need to + scan the vreg_state looking for it, just index directly into rreg_state. + The FAQ "does this rreg already have an associated vreg" is the main + beneficiary. + The identity of the real register is not recorded here, because the index + of this structure in |rreg_state| is the index number of the register, and + the register itself can be extracted from the RRegUniverse (univ). */ +typedef + struct { + /* What is its current disposition? */ + enum { Free, /* Not bound to any vreg. */ + Bound, /* Bound to a vreg, viz vreg. */ + Reserved /* Reserved for an instruction. */ + } disp; + + /* If .disp == Bound, what vreg is it bound to? */ + HReg vreg; + } + RRegState; + +/* Records information on a real-register live range, associated with + a particular real register. Computed once; does not change. */ +typedef + struct { + /* This rreg becomes live with this instruction (inclusive). Contains + either an instruction number or INVALID_INSTRNO. */ + Short live_after; + /* This rreg becomes dead before this instruction (exclusive). Contains + either an instruction number or INVALID_INSTRNO. */ + Short dead_before; + } + RRegLR; + +/* Live ranges for a single rreg and the current one. + Live ranges are computed during the first register allocator pass and remain + unchanged after that. + The identity of the real register is not recorded here, because the index + of this structure in |rreg_lr_state| is the index number of the register, and + the register itself can be extracted from the RRegUniverse (univ). */ +typedef + struct { + RRegLR* lrs; + UInt lrs_size; + UInt lrs_used; + + /* Live range corresponding to the currently processed instruction. + Points into |lrs| array. */ + RRegLR *lr_current; + UInt lr_current_idx; + } + RRegLRState; + +#define IS_VALID_VREGNO(v) ((v) >= 0 && (v) < n_vregs) +#define IS_VALID_RREGNO(r) ((r) >= 0 && (r) < n_rregs) + +/* Compute the index of the highest and lowest 1 in a ULong, respectively. + Results are undefined if the argument is zero. Don't pass it zero :) */ +static inline UInt ULong__maxIndex ( ULong w64 ) { + return 63 - __builtin_clzll(w64); +} + +static inline UInt ULong__minIndex ( ULong w64 ) { + return __builtin_ctzll(w64); +} + +static inline void enlarge_rreg_lrs(RRegLRState* rreg_lrs) +{ + vassert(rreg_lrs->lrs_used == rreg_lrs->lrs_size); + + RRegLR* lr2 = LibVEX_Alloc_inline(2 * rreg_lrs->lrs_used * sizeof(RRegLR)); + for (UInt l = 0; l < rreg_lrs->lrs_used; l++) { + lr2[l] = rreg_lrs->lrs[l]; + } + + rreg_lrs->lrs = lr2; + rreg_lrs->lrs_size = 2 * rreg_lrs->lrs_used; +} + +static inline void print_state( + const RegAllocControl* con, + const VRegState* vreg_state, UInt n_vregs, + const RRegState* rreg_state, UInt n_rregs, + const RRegLRState* rreg_lr_state, + UShort current_ii) +{ + for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { + const VRegState* vreg = &vreg_state[v_idx]; + + if (vreg->live_after == INVALID_INSTRNO) { + continue; /* This is a dead vreg. Never comes into live. */ + } + vex_printf("vreg_state[%3u] \t", v_idx); + + UInt written; + switch (vreg->disp) { + case Unallocated: + written = vex_printf("unallocated"); + break; + case Assigned: + written = vex_printf("assigned to "); + written += con->ppReg(vreg->rreg); + break; + case Spilled: + written = vex_printf("spilled at offset %u", vreg->spill_offset); + break; + default: + vassert(0); + } + + for (Int w = 30 - written; w > 0; w--) { + vex_printf(" "); + } + + if (vreg->live_after > (Short) current_ii) { + vex_printf("[not live yet]\n"); + } else if ((Short) current_ii >= vreg->dead_before) { + vex_printf("[now dead]\n"); + } else { + vex_printf("[live]\n"); + } + } + + for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { + const RRegState* rreg = &rreg_state[r_idx]; + const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; + vex_printf("rreg_state[%2u] = ", r_idx); + UInt written = con->ppReg(con->univ->regs[r_idx]); + for (Int w = 10 - written; w > 0; w--) { + vex_printf(" "); + } + + switch (rreg->disp) { + case Free: + vex_printf("free\n"); + break; + case Bound: + vex_printf("bound for "); + con->ppReg(rreg->vreg); + vex_printf("\n"); + break; + case Reserved: + vex_printf("reserved - live range [%d, %d)\n", + rreg_lrs->lr_current->live_after, + rreg_lrs->lr_current->dead_before); + break; + } + } +} + +static inline void emit_instr(HInstr* instr, HInstrArray* instrs_out, + const RegAllocControl* con, const HChar* why) +{ + if (DEBUG_REGALLOC) { + vex_printf("** "); + con->ppInstr(instr, con->mode64); + if (why != NULL) { + vex_printf(" (%s)", why); + } + vex_printf("\n\n"); + } + + addHInstr(instrs_out, instr); +} + +/* Spills a vreg assigned to some rreg. + The vreg is spilled and the rreg is freed. + Returns rreg's index. */ +static inline UInt spill_vreg( + HReg vreg, UInt v_idx, UInt current_ii, VRegState* vreg_state, UInt n_vregs, + RRegState* rreg_state, UInt n_rregs, HInstrArray* instrs_out, + const RegAllocControl* con) +{ + /* Check some invariants first. */ + vassert(IS_VALID_VREGNO((v_idx))); + vassert(vreg_state[v_idx].disp == Assigned); + HReg rreg = vreg_state[v_idx].rreg; + UInt r_idx = hregIndex(rreg); + vassert(IS_VALID_RREGNO(r_idx)); + vassert(hregClass(con->univ->regs[r_idx]) == hregClass(vreg)); + vassert(vreg_state[v_idx].dead_before > (Short) current_ii); + vassert(vreg_state[v_idx].reg_class != HRcINVALID); + + /* Generate spill. */ + HInstr* spill1 = NULL; + HInstr* spill2 = NULL; + con->genSpill(&spill1, &spill2, rreg, vreg_state[v_idx].spill_offset, + con->mode64); + vassert(spill1 != NULL || spill2 != NULL); /* cannot be both NULL */ + if (spill1 != NULL) { + emit_instr(spill1, instrs_out, con, "spill1"); + } + if (spill2 != NULL) { + emit_instr(spill2, instrs_out, con, "spill2"); + } + + /* Update register allocator state. */ + vreg_state[v_idx].disp = Spilled; + vreg_state[v_idx].rreg = INVALID_HREG; + rreg_state[r_idx].disp = Free; + rreg_state[r_idx].vreg = INVALID_HREG; + + return r_idx; +} + +/* Chooses a vreg to be spilled based on various criteria. + The vreg must not be from the instruction being processed, that is, it must + not be listed in reg_usage->vRegs. */ +static inline HReg find_vreg_to_spill( + VRegState* vreg_state, UInt n_vregs, + RRegState* rreg_state, UInt n_rregs, + const HRegUsage* instr_regusage, HRegClass target_hregclass, + const HRegUsage* reg_usage, UInt scan_forward_from, UInt scan_forward_max, + const RegAllocControl* con) +{ + /* Scan forwards a few instructions to find the most distant mentioned + use of a vreg. We can scan in the range of (inclusive): + - reg_usage[scan_forward_from] + - reg_usage[scan_forward_end], where scan_forward_end + = MIN(scan_forward_max, scan_forward_from + FEW_INSTRUCTIONS). */ +# define FEW_INSTRUCTIONS 5 + UInt scan_forward_end + = (scan_forward_max <= scan_forward_from + FEW_INSTRUCTIONS) ? + scan_forward_max : scan_forward_from + FEW_INSTRUCTIONS; +# undef FEW_INSTRUCTIONS + + HReg vreg_found = INVALID_HREG; + UInt distance_so_far = 0; + + for (UInt r_idx = con->univ->allocable_start[target_hregclass]; + r_idx <= con->univ->allocable_end[target_hregclass]; r_idx++) { + if (rreg_state[r_idx].disp == Bound) { + HReg vreg = rreg_state[r_idx].vreg; + if (! HRegUsage__contains(instr_regusage, vreg)) { + UInt ii = scan_forward_from; + for ( ; ii <= scan_forward_end; ii++) { + if (HRegUsage__contains(®_usage[ii], vreg)) { + break; + } + } + + if (ii - scan_forward_from > distance_so_far) { + distance_so_far = ii = scan_forward_from; + vreg_found = vreg; + if (ii + distance_so_far == scan_forward_end) { + break; /* We are at the end. Nothing could be better. */ + } + } + } + } + } + + if (hregIsInvalid(vreg_found)) { + vex_printf("doRegisterAllocation_v3: cannot find a register in class: "); + ppHRegClass(target_hregclass); + vex_printf("\n"); + vpanic("doRegisterAllocation_v3: cannot find a register."); + } + + return vreg_found; +} + +/* Find a free rreg of the correct class. + Tries to find an rreg whose live range (if any) is as far ahead in the + incoming instruction stream as possible. An ideal rreg candidate is + a callee-save register because it won't be used for parameter passing + around helper function calls. */ +static Bool find_free_rreg( + VRegState* vreg_state, UInt n_vregs, + RRegState* rreg_state, UInt n_rregs, + const RRegLRState* rreg_lr_state, + UInt current_ii, HRegClass target_hregclass, + Bool reserve_phase, const RegAllocControl* con, UInt* r_idx_found) +{ + Bool found = False; + UInt distance_so_far = 0; /* running max for |live_after - current_ii| */ + + for (UInt r_idx = con->univ->allocable_start[target_hregclass]; + r_idx <= con->univ->allocable_end[target_hregclass]; r_idx++) { + const RRegState* rreg = &rreg_state[r_idx]; + const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; + if (rreg->disp == Free) { + if (rreg_lrs->lrs_used == 0) { + found = True; + *r_idx_found = r_idx; + break; /* There could be nothing better, so break now. */ + } else { + const RRegLR* lr = rreg_lrs->lr_current; + if (lr->live_after > (Short) current_ii) { + /* Not live, yet. */ + if ((lr->live_after - (Short) current_ii) > distance_so_far) { + distance_so_far = lr->live_after - (Short) current_ii; + found = True; + *r_idx_found = r_idx; + } + } else if ((Short) current_ii >= lr->dead_before) { + /* Now dead. Effectively as if there is no LR now. */ + found = True; + *r_idx_found = r_idx; + break; /* There could be nothing better, so break now. */ + } else { + /* Going live for this instruction. This could happen only when + rregs are being reserved en mass, for example before + a helper call. */ + vassert(reserve_phase); + } + } + } + } + + return found; +} + +/* A target-independent register allocator (v3). Requires various functions + which it uses to deal abstractly with instructions and registers, since it + cannot have any target-specific knowledge. + + Returns a new list of instructions, which, as a result of the behaviour of + mapRegs, will be in-place modifications of the original instructions. + + Requires that the incoming code has been generated using vreg numbers + 0, 1 .. n_vregs-1. Appearance of a vreg outside that range is a checked + run-time error. + + Takes unallocated instructions and returns allocated instructions. +*/ +HInstrArray* doRegisterAllocation_v3( + /* Incoming virtual-registerised code. */ + HInstrArray* instrs_in, + + /* Register allocator controls to use. */ + const RegAllocControl* con +) +{ + vassert((con->guest_sizeB % LibVEX_GUEST_STATE_ALIGN) == 0); + + /* The main register allocator state. */ + UInt n_vregs = instrs_in->n_vregs; + VRegState* vreg_state = NULL; + if (n_vregs > 0) { + vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(VRegState)); + } + + /* If this is not so, the universe we have is nonsensical. */ + UInt n_rregs = con->univ->allocable; + vassert(n_rregs > 0); + STATIC_ASSERT(N_RREGUNIVERSE_REGS == 64); + + /* Redundant rreg -> vreg state. */ + RRegState* rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState)); + + /* Info on rreg live ranges. */ + RRegLRState* rreg_lr_state + = LibVEX_Alloc_inline(n_rregs * sizeof(RRegLRState)); + + /* Info on register usage in the incoming instruction array. Computed once + and remains unchanged, more or less; updated sometimes by the + direct-reload optimisation. */ + HRegUsage* reg_usage + = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used); + + /* The live range numbers are signed shorts, and so limiting the + number of instructions to 15000 comfortably guards against them + overflowing 32k. */ + vassert(instrs_in->arr_used <= 15000); + + /* The output array of instructions. */ + HInstrArray* instrs_out = newHInstrArray(); + + +# define OFFENDING_VREG(_v_idx, _instr, _mode) \ + do { \ + vex_printf("\n\nOffending vreg = %u\n", (_v_idx)); \ + vex_printf("\nOffending instruction = "); \ + con->ppInstr((_instr), con->mode64); \ + vex_printf("\n"); \ + vpanic("doRegisterAllocation_v3: first event for vreg is "#_mode \ + " (should be Write)"); \ + } while (0) + +# define OFFENDING_RREG(_r_idx, _instr, _mode) \ + do { \ + vex_printf("\n\nOffending rreg = "); \ + con->ppReg(con->univ->regs[(_r_idx)]); \ + vex_printf("\nOffending instruction = "); \ + con->ppInstr((_instr), con->mode64); \ + vex_printf("\n"); \ + vpanic("doRegisterAllocation_v3: first event for rreg is "#_mode \ + " (should be Write)"); \ + } while (0) + + +/* Finds an rreg of the correct class. + If a free rreg is not found, then spills a vreg not used by the current + instruction and makes free the corresponding rreg. */ +# define FIND_OR_MAKE_FREE_RREG(_ii, _v_idx, _reg_class, _reserve_phase) \ + ({ \ + UInt _r_free_idx = -1; \ + Bool free_rreg_found = find_free_rreg( \ + vreg_state, n_vregs, rreg_state, n_rregs, rreg_lr_state, \ + (_ii), (_reg_class), (_reserve_phase), con, &_r_free_idx); \ + if (!free_rreg_found) { \ + HReg vreg_to_spill = find_vreg_to_spill( \ + vreg_state, n_vregs, rreg_state, n_rregs, \ + ®_usage[(_ii)], (_reg_class), \ + reg_usage, (_ii) + 1, \ + instrs_in->arr_used - 1, con); \ + _r_free_idx = spill_vreg(vreg_to_spill, hregIndex(vreg_to_spill), \ + (_ii), vreg_state, n_vregs, \ + rreg_state, n_rregs, \ + instrs_out, con); \ + } \ + \ + vassert(IS_VALID_RREGNO(_r_free_idx)); \ + \ + _r_free_idx; \ + }) + + + /* --- Stage 0. Initialize the state. --- */ + for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { + vreg_state[v_idx].live_after = INVALID_INSTRNO; + vreg_state[v_idx].dead_before = INVALID_INSTRNO; + vreg_state[v_idx].reg_class = HRcINVALID; + vreg_state[v_idx].disp = Unallocated; + vreg_state[v_idx].rreg = INVALID_HREG; + vreg_state[v_idx].spill_offset = 0; + } + + for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { + rreg_state[r_idx].disp = Free; + rreg_state[r_idx].vreg = INVALID_HREG; + } + + for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { + RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; + rreg_lrs->lrs_size = 4; + rreg_lrs->lrs = LibVEX_Alloc_inline(rreg_lrs->lrs_size + * sizeof(RRegLR)); + rreg_lrs->lrs_used = 0; + rreg_lrs->lr_current = &rreg_lrs->lrs[0]; + rreg_lrs->lr_current_idx = 0; + } + + /* --- Stage 1. Scan the incoming instructions. --- */ + for (UShort ii = 0; ii < instrs_in->arr_used; ii++) { + const HInstr* instr = instrs_in->arr[ii]; + + con->getRegUsage(®_usage[ii], instr, con->mode64); + + if (0) { + vex_printf("\n%u stage 1: ", ii); + con->ppInstr(instr, con->mode64); + vex_printf("\n"); + ppHRegUsage(con->univ, ®_usage[ii]); + } + + /* Process virtual registers mentioned in the instruction. */ + for (UInt j = 0; j < reg_usage[ii].n_vRegs; j++) { + HReg vreg = reg_usage[ii].vRegs[j]; + vassert(hregIsVirtual(vreg)); + + UInt v_idx = hregIndex(vreg); + if (!IS_VALID_VREGNO(v_idx)) { + vex_printf("\n"); + con->ppInstr(instr, con->mode64); + vex_printf("\n"); + vex_printf("vreg %u (n_vregs %u)\n", v_idx, n_vregs); + vpanic("doRegisterAllocation_v3: out-of-range vreg"); + } + + /* Note the register class. */ + if (vreg_state[v_idx].reg_class == HRcINVALID) { + /* First mention of this vreg. */ + vreg_state[v_idx].reg_class = hregClass(vreg); + } else { + /* Seen it before, so check for consistency. */ + vassert(vreg_state[v_idx].reg_class == hregClass(vreg)); + } + + /* Consider live ranges. */ + switch (reg_usage[ii].vMode[j]) { + case HRmRead: + if (vreg_state[v_idx].live_after == INVALID_INSTRNO) { + OFFENDING_VREG(v_idx, instr, "Read"); + } + vreg_state[v_idx].dead_before = toShort(ii + 1); + break; + case HRmWrite: + if (vreg_state[v_idx].live_after == INVALID_INSTRNO) { + vreg_state[v_idx].live_after = toShort(ii); + } + vreg_state[v_idx].dead_before = toShort(ii + 1); + break; + case HRmModify: + if (vreg_state[v_idx].live_after == INVALID_INSTRNO) { + OFFENDING_VREG(v_idx, instr, "Modify"); + } + vreg_state[v_idx].dead_before = toShort(ii + 1); + break; + default: + vassert(0); + } + } + + /* Process real registers mentioned in the instruction. */ + const ULong rRead = reg_usage[ii].rRead; + const ULong rWritten = reg_usage[ii].rWritten; + const ULong rMentioned = rRead | rWritten; + + if (rMentioned != 0) { + UInt rReg_minIndex = ULong__minIndex(rMentioned); + UInt rReg_maxIndex = ULong__maxIndex(rMentioned); + /* Don't bother to look at registers which are not available + to the allocator such as the stack or guest state pointers. These + are unavailable to the register allocator and so we never visit + them. We asserted above that n_rregs > 0, so (n_rregs - 1) is + safe. */ + if (rReg_maxIndex >= n_rregs) { + rReg_maxIndex = n_rregs - 1; + } + + for (UInt r_idx = rReg_minIndex; r_idx <= rReg_maxIndex; r_idx++) { + const ULong jMask = 1ULL << r_idx; + + if (LIKELY((rMentioned & jMask) == 0)) { + continue; + } + + RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; + const Bool isR = (rRead & jMask) != 0; + const Bool isW = (rWritten & jMask) != 0; + + if (isW && !isR) { + if (rreg_lrs->lrs_used == rreg_lrs->lrs_size) { + enlarge_rreg_lrs(rreg_lrs); + } + + rreg_lrs->lrs[rreg_lrs->lrs_used].live_after = toShort(ii); + rreg_lrs->lrs[rreg_lrs->lrs_used].dead_before = toShort(ii + 1); + rreg_lrs->lrs_used += 1; + } else if (!isW && isR) { + if ((rreg_lrs->lrs_used == 0) + || (rreg_lrs->lrs[rreg_lrs->lrs_used - 1].live_after + == INVALID_INSTRNO)) { + OFFENDING_RREG(r_idx, instr, "Read"); + } + rreg_lrs->lrs[rreg_lrs->lrs_used - 1].dead_before + = toShort(ii + 1); + } else { + vassert(isR && isW); + if ((rreg_lrs->lrs_used == 0) + || (rreg_lrs->lrs[rreg_lrs->lrs_used - 1].live_after + == INVALID_INSTRNO)) { + OFFENDING_RREG(r_idx, instr, "Modify"); + } + rreg_lrs->lrs[rreg_lrs->lrs_used - 1].dead_before + = toShort(ii + 1); + } + } + } + } + + if (DEBUG_REGALLOC) { + for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { + vex_printf("vreg %3u: [%3d, %3d)\n", + v_idx, vreg_state[v_idx].live_after, + vreg_state[v_idx].dead_before); + } + + for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { + vex_printf("rreg %2u (", r_idx); + UInt written = con->ppReg(con->univ->regs[r_idx]); + vex_printf("):"); + for (Int t = 15 - written; t > 0; t--) { + vex_printf(" "); + } + + const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; + for (UInt l = 0; l < rreg_lrs->lrs_used; l++) { + vex_printf("[%3d, %3d) ", + rreg_lrs->lrs[l].live_after, rreg_lrs->lrs[l].dead_before); + } + vex_printf("\n"); + } + } + + /* --- Stage 2. Allocate spill slots. --- */ + + /* Each spill slot is 8 bytes long. For vregs which take more than 64 bits + to spill (for example classes Flt64 and Vec128), we have to allocate two + consecutive spill slots. For 256 bit registers (class Vec256), we have to + allocate four consecutive spill slots. + + For Vec128-class on PowerPC, the spill slot's actual address must be + 16-byte aligned. Since the spill slot's address is computed as an offset + from the guest state pointer, and since the user of the generated code + must set that pointer to a 32-byte aligned value, we have the residual + obligation here of choosing a 16-byte aligned spill slot offset for + Vec128-class values. Since each spill slot is 8 bytes long, that means for + Vec128-class values we must allocate a spill slot number which is + zero mod 2. + + Similarly, for Vec256 class on amd64, find a spill slot number which is + zero mod 4. This guarantees it will be 32-byte aligned, which isn't + actually necessary on amd64 (we use movUpd etc to spill), but seems like + a good practice. + + Do a rank-based allocation of vregs to spill slot numbers. We put as few + values as possible in spill slots, but nevertheless need to have a spill + slot available for all vregs, just in case. */ + +# define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8) + STATIC_ASSERT((N_SPILL64S % 2) == 0); + STATIC_ASSERT((LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN) == 0); + + Short ss_busy_until_before[N_SPILL64S]; + vex_bzero(&ss_busy_until_before, sizeof(ss_busy_until_before)); + + for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { + /* True iff this vreg is unused. In which case we also expect that the + reg_class field for it has not been set. */ + if (vreg_state[v_idx].live_after == INVALID_INSTRNO) { + vassert(vreg_state[v_idx].reg_class == HRcINVALID); + continue; + } + + /* The spill slots are 64 bits in size. As per the comment on definition + of HRegClass in host_generic_regs.h, that means, to spill a vreg of + class Flt64 or Vec128, we'll need to find two adjacent spill slots to + use. For Vec256, we'll need to find four adjacent slots to use. Note, + this logic needs to be kept in sync with the size info on the + definition of HRegClass. */ + UInt ss_no; + switch (vreg_state[v_idx].reg_class) { + case HRcFlt64: + case HRcVec128: + /* Find two adjacent free slots which provide up to 128 bits to + spill the vreg. Since we are trying to find an even:odd pair, + move along in steps of 2 (slots). */ + for (ss_no = 0; ss_no < N_SPILL64S - 1; ss_no += 2) + if (ss_busy_until_before[ss_no + 0] <= vreg_state[v_idx].live_after + && ss_busy_until_before[ss_no + 1] <= vreg_state[v_idx].live_after) + break; + if (ss_no >= N_SPILL64S - 1) { + vpanic("N_SPILL64S is too low in VEX. Increase and recompile."); + } + ss_busy_until_before[ss_no + 0] = vreg_state[v_idx].dead_before; + ss_busy_until_before[ss_no + 1] = vreg_state[v_idx].dead_before; + break; + default: + /* The ordinary case -- just find a single lowest-numbered spill + slot which is available at the start point of this interval, + and assign the interval to it. */ + for (ss_no = 0; ss_no < N_SPILL64S; ss_no++) { + if (ss_busy_until_before[ss_no] <= vreg_state[v_idx].live_after) + break; + } + if (ss_no == N_SPILL64S) { + vpanic("N_SPILL64S is too low in VEX. Increase and recompile."); + } + ss_busy_until_before[ss_no] = vreg_state[v_idx].dead_before; + break; + } + + /* This reflects VEX's hard-wired knowledge of the guest state layout: + the guest state itself, then two equal sized areas following it for two + sets of shadow state, and then the spill area. */ + vreg_state[v_idx].spill_offset + = toShort(con->guest_sizeB * 3 + ss_no * 8); + + /* Independent check that we've made a sane choice of the slot. */ + switch (vreg_state[v_idx].reg_class) { + case HRcVec128: case HRcFlt64: + vassert((vreg_state[v_idx].spill_offset % 16) == 0); + break; + default: + vassert((vreg_state[v_idx].spill_offset % 8) == 0); + break; + } + } + + if (0) { + vex_printf("\n\n"); + for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) + vex_printf("vreg %3u --> spill offset %u\n", + v_idx, vreg_state[v_idx].spill_offset); + } + + + /* --- State 3. Process instructions. --- */ + for (UShort ii = 0; ii < instrs_in->arr_used; ii++) { + HInstr* instr = instrs_in->arr[ii]; + + if (DEBUG_REGALLOC) { + vex_printf("\n====----====---- Instr %d ----====----====\n", ii); + vex_printf("---- "); + con->ppInstr(instrs_in->arr[ii], con->mode64); + vex_printf("\n\nInitial state:\n"); + print_state(con, vreg_state, n_vregs, rreg_state, n_rregs, + rreg_lr_state, ii); + vex_printf("\n"); + } + + /* ------------ Sanity checks ------------ */ + + /* Sanity checks are relatively expensive. So they are done only once + every 17 instructions, and just before the last instruction. */ + Bool do_sanity_check + = toBool( + SANITY_CHECKS_EVERY_INSTR + || ii == instrs_in->arr_used - 1 + || (ii > 0 && (ii % 17) == 0) + ); + + if (do_sanity_check) { + /* Sanity check: the vreg_state and rreg_state mutually-redundant + mappings are consistent. If vreg_state[v].rreg points at some + rreg_state entry then that rreg_state entry should point back at + vreg_state[v]. */ + for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { + if (vreg_state[v_idx].disp == Assigned) { + vassert(!hregIsVirtual(vreg_state[v_idx].rreg)); + + UInt r_idx = hregIndex(vreg_state[v_idx].rreg); + vassert(IS_VALID_RREGNO(r_idx)); + vassert(rreg_state[r_idx].disp == Bound); + vassert(hregIndex(rreg_state[r_idx].vreg) == v_idx); + + vassert(hregClass(vreg_state[v_idx].rreg) + == hregClass(con->univ->regs[r_idx])); + } + } + + for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { + if (rreg_state[r_idx].disp == Bound) { + vassert(hregIsVirtual(rreg_state[r_idx].vreg)); + + UInt v_idx = hregIndex(rreg_state[r_idx].vreg); + vassert(IS_VALID_VREGNO(v_idx)); + vassert(vreg_state[v_idx].disp == Assigned); + vassert(hregIndex(vreg_state[v_idx].rreg) == r_idx); + } + } + + /* Sanity check: if rreg has been marked as Reserved, there must be + a corresponding hard live range for it. */ + for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { + if (rreg_state[r_idx].disp == Reserved) { + const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; + vassert(rreg_lrs->lrs_used > 0); + vassert(rreg_lrs->lr_current_idx < rreg_lrs->lrs_used); + vassert(rreg_lrs->lr_current->live_after <= (Short) ii); + vassert((Short) ii < rreg_lrs->lr_current->dead_before); + } + } + } + + + /* --- MOV coalescing --- */ + /* Optimise register coalescing: + MOV v <-> v coalescing (done here). + MOV v <-> r coalescing (TODO: not yet). */ + /* If doing a reg-reg move between two vregs, and the src's live + range ends here and the dst's live range starts here, bind the dst + to the src's rreg, and that's all. */ + HReg vregS = INVALID_HREG; + HReg vregD = INVALID_HREG; + if (con->isMove(instr, &vregS, &vregD)) { + if (hregIsVirtual(vregS) && hregIsVirtual(vregD)) { + /* Check that |isMove| is not telling us a bunch of lies ... */ + vassert(hregClass(vregS) == hregClass(vregD)); + UInt vs_idx = hregIndex(vregS); + UInt vd_idx = hregIndex(vregD); + vassert(IS_VALID_VREGNO(vs_idx)); + vassert(IS_VALID_VREGNO(vd_idx)); + + if ((vreg_state[vs_idx].dead_before == ii + 1) + && (vreg_state[vd_idx].live_after == ii) + && (vreg_state[vs_idx].disp == Assigned)) { + + /* Live ranges are adjacent and source vreg is bound. + Finally we can do the coalescing. */ + HReg rreg = vreg_state[vs_idx].rreg; + vreg_state[vd_idx].disp = Assigned; + vreg_state[vd_idx].rreg = rreg; + vreg_state[vs_idx].disp = Unallocated; + vreg_state[vs_idx].rreg = INVALID_HREG; + + UInt r_idx = hregIndex(rreg); + vassert(rreg_state[r_idx].disp == Bound); + rreg_state[r_idx].vreg = vregD; + + if (DEBUG_REGALLOC) { + vex_printf("coalesced: "); + con->ppReg(vregS); + vex_printf(" -> "); + con->ppReg(vregD); + vex_printf("\n\n"); + } + + /* In rare cases it can happen that vregD's live range ends + here. Check and eventually free the vreg and rreg. + This effectively means that either the translated program + contained dead code (but VEX iropt passes are pretty good + at eliminating it) or the VEX backend generated dead code. */ + if (vreg_state[vd_idx].dead_before <= (Short) ii + 1) { + vreg_state[vd_idx].disp = Unallocated; + vreg_state[vd_idx].rreg = INVALID_HREG; + rreg_state[r_idx].disp = Free; + rreg_state[r_idx].vreg = INVALID_HREG; + } + + /* Move on to the next instruction. We skip the post-instruction + stuff because all required house-keeping was done here. */ + continue; + } + } + } + + + /* --- Reserve and free rregs if needed. --- */ + /* If the rreg enters its hard live range and is not free: + 1. If the corresponding vreg is not used by the instruction, spill it. + 2. If the corresponding vreg is used by the instruction, then: + 2a. If there are no free rregs, spill a vreg not used by this + instruction. + 2b. Move the corresponding vreg to a free rreg. This is better than + spilling it and immediatelly reloading it. + */ + const ULong rRead = reg_usage[ii].rRead; + const ULong rWritten = reg_usage[ii].rWritten; + const ULong rMentioned = rRead | rWritten; + + if (rMentioned != 0) { + UInt rReg_minIndex = ULong__minIndex(rMentioned); + UInt rReg_maxIndex = ULong__maxIndex(rMentioned); + if (rReg_maxIndex >= n_rregs) { + rReg_maxIndex = n_rregs - 1; + } + + for (UInt r_idx = rReg_minIndex; r_idx <= rReg_maxIndex; r_idx++) { + const ULong jMask = 1ULL << r_idx; + + if (LIKELY((rMentioned & jMask) == 0)) { + continue; + } + + RRegState* rreg = &rreg_state[r_idx]; + const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; + if (LIKELY(rreg_lrs->lrs_used == 0)) { + continue; + } + if (rreg->disp == Reserved) { + continue; + } + + if ((rreg_lrs->lr_current->live_after <= (Short) ii) + && ((Short) ii < rreg_lrs->lr_current->dead_before)) { + + if (rreg->disp == Bound) { + /* Yes, there is an associated vreg. We need to deal with + it now somehow. */ + HReg vreg = rreg->vreg; + UInt v_idx = hregIndex(vreg); + + if (! HRegUsage__contains(®_usage[ii], vreg)) { + /* Spill the vreg. It is not used by this instruction. */ + spill_vreg(vreg, v_idx, ii, vreg_state, n_vregs, + rreg_state, n_rregs, instrs_out, con); + } else { + /* Find or make a free rreg where to move this vreg to. */ + UInt r_free_idx = FIND_OR_MAKE_FREE_RREG( + ii, v_idx, vreg_state[v_idx].reg_class, True); + + /* Generate "move" between real registers. */ + HInstr* move = con->genMove(con->univ->regs[r_idx], + con->univ->regs[r_free_idx], con->mode64); + vassert(move != NULL); + emit_instr(move, instrs_out, con, "move"); + + /* Update the register allocator state. */ + vassert(vreg_state[v_idx].disp == Assigned); + vreg_state[v_idx].rreg = con->univ->regs[r_free_idx]; + rreg_state[r_free_idx].disp = Bound; + rreg_state[r_free_idx].vreg = vreg; + rreg->disp = Free; + rreg->vreg = INVALID_HREG; + } + } + + /* Finally claim the rreg as reserved. */ + rreg->disp = Reserved; + + if (DEBUG_REGALLOC) { + vex_printf("rreg has been reserved: "); + con->ppReg(con->univ->regs[r_idx]); + vex_printf("\n\n"); + } + } + } + } + + + /* --- Direct reload optimisation. --- */ + /* If the instruction reads exactly one vreg which is currently spilled, + and this is the last use of that vreg, see if we can convert + the instruction into one that reads directly from the spill slot. + This is clearly only possible for x86 and amd64 targets, since ppc and + arm are load-store architectures. If successful, replace + instrs_in->arr[ii] with this new instruction, and recompute + its reg_usage, so that the change is invisible to the standard-case + handling that follows. */ + if ((con->directReload != NULL) && (reg_usage[ii].n_vRegs <= 2)) { + Bool debug_direct_reload = False; + Bool nreads = 0; + HReg vreg_found = INVALID_HREG; + Short spill_offset = 0; + + for (UInt j = 0; j < reg_usage[ii].n_vRegs; j++) { + HReg vreg = reg_usage[ii].vRegs[j]; + vassert(hregIsVirtual(vreg)); + + if (reg_usage[ii].vMode[j] == HRmRead) { + nreads++; + UInt v_idx = hregIndex(vreg); + vassert(IS_VALID_VREGNO(v_idx)); + if (vreg_state[v_idx].disp == Spilled) { + /* Is this its last use? */ + vassert(vreg_state[v_idx].dead_before >= (Short) (ii + 1)); + if ((vreg_state[v_idx].dead_before == (Short) (ii + 1)) + && hregIsInvalid(vreg_found)) { + vreg_found = vreg; + spill_offset = vreg_state[v_idx].spill_offset; + } + } + } + } + + if (!hregIsInvalid(vreg_found) && (nreads == 1)) { + if (reg_usage[ii].n_vRegs == 2) { + vassert(! sameHReg(reg_usage[ii].vRegs[0], + reg_usage[ii].vRegs[1])); + } + + HInstr* reloaded = con->directReload(instrs_in->arr[ii], + vreg_found, spill_offset); + if (debug_direct_reload && (reloaded != NULL)) { + vex_printf("[%3d] ", spill_offset); + ppHReg(vreg_found); + vex_printf(": "); + con->ppInstr(instr, con->mode64); + } + if (reloaded != NULL) { + /* Update info about the instruction, so it looks as if it had + been in this form all along. */ + instr = reloaded; + instrs_in->arr[ii] = reloaded; + con->getRegUsage(®_usage[ii], instr, con->mode64); + if (debug_direct_reload) { + vex_printf(" --> "); + con->ppInstr(reloaded, con->mode64); + } + } + + if (debug_direct_reload && (reloaded != NULL)) { + vex_printf("\n"); + } + } + } + + + /* The vreg -> rreg map constructed and then applied to each + instruction. */ + HRegRemap remap; + initHRegRemap(&remap); + + /* --- Allocate vregs used by the instruction. --- */ + /* Vregs used by the instruction can be in the following states: + - Unallocated: vreg is entering its live range. Find a free rreg. + - Assigned: we do nothing; rreg has been allocated previously. + - Spilled: Find a free rreg and reload vreg into it. + Naturally, finding a free rreg may involve spilling a vreg not used by + the instruction. */ + for (UInt j = 0; j < reg_usage[ii].n_vRegs; j++) { + HReg vreg = reg_usage[ii].vRegs[j]; + vassert(hregIsVirtual(vreg)); + + if (0) { + vex_printf("considering "); con->ppReg(vreg); vex_printf("\n"); + } + + UInt v_idx = hregIndex(vreg); + vassert(IS_VALID_VREGNO(v_idx)); + HReg rreg = vreg_state[v_idx].rreg; + if (vreg_state[v_idx].disp == Assigned) { + UInt r_idx = hregIndex(rreg); + vassert(rreg_state[r_idx].disp == Bound); + addToHRegRemap(&remap, vreg, rreg); + } else { + vassert(hregIsInvalid(rreg)); + + /* Find or make a free rreg of the correct class. */ + UInt r_idx = FIND_OR_MAKE_FREE_RREG( + ii, v_idx, vreg_state[v_idx].reg_class, False); + rreg = con->univ->regs[r_idx]; + + /* Generate reload only if the vreg is spilled and is about to being + read or modified. If it is merely written than reloading it first + would be pointless. */ + if ((vreg_state[v_idx].disp == Spilled) + && (reg_usage[ii].vMode[j] != HRmWrite)) { + + HInstr* reload1 = NULL; + HInstr* reload2 = NULL; + con->genReload(&reload1, &reload2, rreg, + vreg_state[v_idx].spill_offset, con->mode64); + vassert(reload1 != NULL || reload2 != NULL); + if (reload1 != NULL) { + emit_instr(reload1, instrs_out, con, "reload1"); + } + if (reload2 != NULL) { + emit_instr(reload2, instrs_out, con, "reload2"); + } + } + + rreg_state[r_idx].disp = Bound; + rreg_state[r_idx].vreg = vreg; + vreg_state[v_idx].disp = Assigned; + vreg_state[v_idx].rreg = rreg; + addToHRegRemap(&remap, vreg, rreg); + } + } + + con->mapRegs(&remap, instr, con->mode64); + emit_instr(instr, instrs_out, con, NULL); + + if (DEBUG_REGALLOC) { + vex_printf("After dealing with current instruction:\n"); + print_state(con, vreg_state, n_vregs, rreg_state, n_rregs, + rreg_lr_state, ii); + vex_printf("\n"); + } + + /* ------ Post-instruction actions. ------ */ + /* Free rregs which: + - Have been reserved and whose hard live range ended. + - Have been bound to vregs whose live range ended. */ + for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { + RRegState* rreg = &rreg_state[r_idx]; + RRegLRState* rreg_lrs = &rreg_lr_state[r_idx]; + switch (rreg->disp) { + case Free: + break; + case Reserved: + if (rreg_lrs->lrs_used > 0) { + /* Consider "dead before" the next instruction. */ + if (rreg_lrs->lr_current->dead_before <= (Short) ii + 1) { + rreg_state[r_idx].disp = Free; + rreg_state[r_idx].vreg = INVALID_HREG; + if (rreg_lrs->lr_current_idx < rreg_lrs->lrs_used - 1) { + rreg_lrs->lr_current_idx += 1; + rreg_lrs->lr_current + = &rreg_lrs->lrs[rreg_lrs->lr_current_idx]; + } + } + } + break; + case Bound: { + UInt v_idx = hregIndex(rreg->vreg); + /* Consider "dead before" the next instruction. */ + if (vreg_state[v_idx].dead_before <= (Short) ii + 1) { + vreg_state[v_idx].disp = Unallocated; + vreg_state[v_idx].rreg = INVALID_HREG; + rreg_state[r_idx].disp = Free; + rreg_state[r_idx].vreg = INVALID_HREG; + } + break; + } + default: + vassert(0); + } + } + } + + return instrs_out; +} + +/*----------------------------------------------------------------------------*/ +/*--- host_generic_reg_alloc3.c ---*/ +/*----------------------------------------------------------------------------*/ diff --git a/VEX/priv/host_generic_regs.c b/VEX/priv/host_generic_regs.c index 8710b2ba96..67d2ea2eef 100644 --- a/VEX/priv/host_generic_regs.c +++ b/VEX/priv/host_generic_regs.c @@ -58,11 +58,10 @@ void ppHRegClass ( HRegClass hrc ) } /* Generic printing for registers. */ -void ppHReg ( HReg r ) +UInt ppHReg ( HReg r ) { if (hregIsInvalid(r)) { - vex_printf("HReg_INVALID"); - return; + return vex_printf("HReg_INVALID"); } const Bool isV = hregIsVirtual(r); const HChar* maybe_v = isV ? "v" : ""; @@ -71,12 +70,12 @@ void ppHReg ( HReg r ) always zero for virtual registers, so that's pointless -- hence show the index number instead. */ switch (hregClass(r)) { - case HRcInt32: vex_printf("%%%sr%u", maybe_v, regNN); return; - case HRcInt64: vex_printf("%%%sR%u", maybe_v, regNN); return; - case HRcFlt32: vex_printf("%%%sF%u", maybe_v, regNN); return; - case HRcFlt64: vex_printf("%%%sD%u", maybe_v, regNN); return; - case HRcVec64: vex_printf("%%%sv%u", maybe_v, regNN); return; - case HRcVec128: vex_printf("%%%sV%u", maybe_v, regNN); return; + case HRcInt32: return vex_printf("%%%sr%u", maybe_v, regNN); + case HRcInt64: return vex_printf("%%%sR%u", maybe_v, regNN); + case HRcFlt32: return vex_printf("%%%sF%u", maybe_v, regNN); + case HRcFlt64: return vex_printf("%%%sD%u", maybe_v, regNN); + case HRcVec64: return vex_printf("%%%sv%u", maybe_v, regNN); + case HRcVec128: return vex_printf("%%%sV%u", maybe_v, regNN); default: vpanic("ppHReg"); } } @@ -94,6 +93,11 @@ void RRegUniverse__init ( /*OUT*/RRegUniverse* univ ) for (UInt i = 0; i < N_RREGUNIVERSE_REGS; i++) { univ->regs[i] = INVALID_HREG; } + + for (UInt i = 0; i <= HrcLAST; i++) { + univ->allocable_start[i] = N_RREGUNIVERSE_REGS; + univ->allocable_end[i] = N_RREGUNIVERSE_REGS; + } } void RRegUniverse__check_is_sane ( const RRegUniverse* univ ) @@ -113,6 +117,33 @@ void RRegUniverse__check_is_sane ( const RRegUniverse* univ ) HReg reg = univ->regs[i]; vassert(hregIsInvalid(reg)); } + + /* Determine register classes used and if they form contiguous range. */ + Bool regclass_used[HrcLAST + 1]; + for (UInt i = 0; i <= HrcLAST; i++) { + regclass_used[i] = False; + } + + for (UInt i = 0; i < univ->allocable; i++) { + HReg reg = univ->regs[i]; + HRegClass regclass = hregClass(reg); + if (!regclass_used[regclass]) { + regclass_used[regclass] = True; + } + } + + UInt regs_visited = 0; + for (UInt i = 0; i <= HrcLAST; i++) { + if (regclass_used[i]) { + for (UInt j = univ->allocable_start[i]; + j <= univ->allocable_end[i]; j++) { + vassert(hregClass(univ->regs[j]) == i); + regs_visited += 1; + } + } + } + + vassert(regs_visited == univ->allocable); } diff --git a/VEX/priv/host_generic_regs.h b/VEX/priv/host_generic_regs.h index 670114de6a..3db9ea0813 100644 --- a/VEX/priv/host_generic_regs.h +++ b/VEX/priv/host_generic_regs.h @@ -93,7 +93,7 @@ typedef struct { UInt u32; } HReg; available on any specific host. For example on x86, the available classes are: Int32, Flt64, Vec128 only. - IMPORTANT NOTE: host_generic_reg_alloc2.c needs how much space is + IMPORTANT NOTE: host_generic_reg_alloc*.c needs to know how much space is needed to spill each class of register. It allocates the following amount of space: @@ -106,7 +106,7 @@ typedef struct { UInt u32; } HReg; HRcVec128 128 bits If you add another regclass, you must remember to update - host_generic_reg_alloc2.c accordingly. + host_generic_reg_alloc*.c and RRegUniverse accordingly. When adding entries to enum HRegClass, do not use any value > 14 or < 1. */ @@ -118,15 +118,17 @@ typedef HRcFlt32=5, /* 32-bit float */ HRcFlt64=6, /* 64-bit float */ HRcVec64=7, /* 64-bit SIMD */ - HRcVec128=8 /* 128-bit SIMD */ + HRcVec128=8, /* 128-bit SIMD */ + HrcLAST=HRcVec128 } HRegClass; extern void ppHRegClass ( HRegClass ); -/* Print an HReg in a generic (non-target-specific) way. */ -extern void ppHReg ( HReg ); +/* Print an HReg in a generic (non-target-specific) way. + Returns number of HChar's written. */ +extern UInt ppHReg ( HReg ); /* Construct. The goal here is that compiler can fold this down to a constant in the case where the four arguments are constants, which @@ -149,7 +151,7 @@ static inline HReg mkHReg ( Bool virtual, HRegClass rc, UInt enc, UInt ix ) static inline HRegClass hregClass ( HReg r ) { HRegClass rc = (HRegClass)((r.u32 >> 27) & 0xF); - vassert(rc >= HRcInt32 && rc <= HRcVec128); + vassert(rc >= HRcInt32 && rc <= HrcLAST); return rc; } @@ -221,6 +223,25 @@ typedef index here, since this is the only place where we map index numbers to actual registers. */ HReg regs[N_RREGUNIVERSE_REGS]; + + /* Ranges for groups of allocable registers. Used to quickly address only + a group of allocable registers belonging to the same register class. + Indexes into |allocable_{start,end}| are HRcClass entries, such as + HRcInt64. Values in |allocable_{start,end}| give a valid range into + |regs| where registers corresponding to the given register class are + found. + + For example, let's say allocable_start[HRcInt64] == 10 and + allocable_end[HRcInt64] == 14. Then regs[10], regs[11], regs[12], + regs[13], and regs[14] give all registers of register class HRcInt64. + + If a register class is not present, then values of the corresponding + |allocable_{start,end}| elements are equal to N_RREGUNIVERSE_REGS. + + Naturally registers in |regs| must form contiguous groups. This is + checked by RRegUniverse__check_is_sane(). */ + UInt allocable_start[HrcLAST + 1]; + UInt allocable_end[HrcLAST + 1]; } RRegUniverse; @@ -305,7 +326,7 @@ extern Bool HRegUsage__contains ( const HRegUsage*, HReg ); /*---------------------------------------------------------*/ /* Note that such maps can only map virtual regs to real regs. - addToHRegRenap will barf if given a pair not of that form. As a + addToHRegRemap will barf if given a pair not of that form. As a result, no valid HRegRemap will bind a real reg to anything, and so if lookupHRegMap is given a real reg, it returns it unchanged. This is precisely the behaviour that the register allocator needs @@ -442,40 +463,49 @@ static inline Bool is_RetLoc_INVALID ( RetLoc rl ) { /*--- Reg alloc: TODO: move somewhere else ---*/ /*---------------------------------------------------------*/ -extern -HInstrArray* doRegisterAllocation ( +/* Control of the VEX register allocator. */ +typedef + struct { + /* The real-register universe to use. This contains facts about real + registers, one of which is the set of registers available for + allocation. */ + const RRegUniverse* univ; + + /* Return True iff the given insn is a reg-reg move, in which case also + return the src and dst regs. */ + Bool (*isMove)(const HInstr*, HReg*, HReg*); + + /* Get info about register usage in this insn. */ + void (*getRegUsage)(HRegUsage*, const HInstr*, Bool); + + /* Apply a reg-reg mapping to an insn. */ + void (*mapRegs)(HRegRemap*, HInstr*, Bool); + + /* Return insn(s) to spill/restore a real register to a spill slot offset. + Also a function to move between registers. + And optionally a function to do direct reloads. */ + void (*genSpill)(HInstr**, HInstr**, HReg, Int, Bool); + void (*genReload)(HInstr**, HInstr**, HReg, Int, Bool); + HInstr* (*genMove)(HReg from, HReg to, Bool); + HInstr* (*directReload)(HInstr*, HReg, Short); + UInt guest_sizeB; + + /* For debug printing only. */ + void (*ppInstr)(const HInstr*, Bool); + UInt (*ppReg)(HReg); + + /* 32/64bit mode */ + Bool mode64; + } + RegAllocControl; - /* Incoming virtual-registerised code. */ +extern HInstrArray* doRegisterAllocation_v2( HInstrArray* instrs_in, - - /* The real-register universe to use. This contains facts about - real registers, one of which is the set of registers available - for allocation. */ - const RRegUniverse* univ, - - /* Return True iff the given insn is a reg-reg move, in which - case also return the src and dst regs. */ - Bool (*isMove) (const HInstr*, HReg*, HReg*), - - /* Get info about register usage in this insn. */ - void (*getRegUsage) (HRegUsage*, const HInstr*, Bool), - - /* Apply a reg-reg mapping to an insn. */ - void (*mapRegs) (HRegRemap*, HInstr*, Bool), - - /* Return insn(s) to spill/restore a real reg to a spill slot - offset. And optionally a function to do direct reloads. */ - void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ), - void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ), - HInstr* (*directReload) ( HInstr*, HReg, Short ), - Int guest_sizeB, - - /* For debug printing only. */ - void (*ppInstr) ( const HInstr*, Bool ), - void (*ppReg) ( HReg ), - - /* 32/64bit mode */ - Bool mode64 + const RegAllocControl* con +); +extern HInstrArray* doRegisterAllocation_v3( + HInstrArray* instrs_in, + const RegAllocControl* con ); diff --git a/VEX/priv/host_mips_defs.c b/VEX/priv/host_mips_defs.c index d6a3219751..9a6993eda0 100644 --- a/VEX/priv/host_mips_defs.c +++ b/VEX/priv/host_mips_defs.c @@ -63,6 +63,7 @@ const RRegUniverse* getRRegUniverse_MIPS ( Bool mode64 ) /* Add the registers. The initial segment of this array must be those available for allocation by reg-alloc, and those that follow are not available for allocation. */ + ru->allocable_start[(mode64) ? HRcInt64 : HRcInt32] = ru->size; ru->regs[ru->size++] = hregMIPS_GPR16(mode64); ru->regs[ru->size++] = hregMIPS_GPR17(mode64); ru->regs[ru->size++] = hregMIPS_GPR18(mode64); @@ -76,7 +77,10 @@ const RRegUniverse* getRRegUniverse_MIPS ( Bool mode64 ) ru->regs[ru->size++] = hregMIPS_GPR14(mode64); ru->regs[ru->size++] = hregMIPS_GPR15(mode64); ru->regs[ru->size++] = hregMIPS_GPR24(mode64); + ru->allocable_end[(mode64) ? HRcInt64 : HRcInt32] = ru->size - 1; + /* s7 (=guest_state) */ + ru->allocable_start[(mode64) ? HRcFlt64 : HRcFlt32] = ru->size; ru->regs[ru->size++] = hregMIPS_F16(mode64); ru->regs[ru->size++] = hregMIPS_F18(mode64); ru->regs[ru->size++] = hregMIPS_F20(mode64); @@ -85,8 +89,11 @@ const RRegUniverse* getRRegUniverse_MIPS ( Bool mode64 ) ru->regs[ru->size++] = hregMIPS_F26(mode64); ru->regs[ru->size++] = hregMIPS_F28(mode64); ru->regs[ru->size++] = hregMIPS_F30(mode64); + ru->allocable_end[(mode64) ? HRcFlt64 : HRcFlt32] = ru->size - 1; + if (!mode64) { /* Fake double floating point */ + ru->allocable_start[HRcFlt64] = ru->size; ru->regs[ru->size++] = hregMIPS_D0(mode64); ru->regs[ru->size++] = hregMIPS_D1(mode64); ru->regs[ru->size++] = hregMIPS_D2(mode64); @@ -95,6 +102,7 @@ const RRegUniverse* getRRegUniverse_MIPS ( Bool mode64 ) ru->regs[ru->size++] = hregMIPS_D5(mode64); ru->regs[ru->size++] = hregMIPS_D6(mode64); ru->regs[ru->size++] = hregMIPS_D7(mode64); + ru->allocable_end[HRcFlt64] = ru->size - 1; } ru->allocable = ru->size; @@ -126,7 +134,7 @@ const RRegUniverse* getRRegUniverse_MIPS ( Bool mode64 ) } -void ppHRegMIPS(HReg reg, Bool mode64) +UInt ppHRegMIPS(HReg reg, Bool mode64) { Int r; static const HChar *ireg32_names[35] @@ -151,8 +159,7 @@ void ppHRegMIPS(HReg reg, Bool mode64) /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ @@ -164,29 +171,23 @@ void ppHRegMIPS(HReg reg, Bool mode64) case HRcInt32: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("%s", ireg32_names[r]); - return; + return vex_printf("%s", ireg32_names[r]); case HRcInt64: r = hregEncoding (reg); vassert (r >= 0 && r < 32); - vex_printf ("%s", ireg32_names[r]); - return; + return vex_printf ("%s", ireg32_names[r]); case HRcFlt32: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("%s", freg32_names[r]); - return; + return vex_printf("%s", freg32_names[r]); case HRcFlt64: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("%s", freg64_names[r]); - return; + return vex_printf("%s", freg64_names[r]); default: vpanic("ppHRegMIPS"); break; } - - return; } @@ -2029,6 +2030,18 @@ void genReload_MIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2, HReg rreg, } } +MIPSInstr* genMove_MIPS(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt32: + case HRcInt64: + return MIPSInstr_Alu(Malu_OR, to, from, MIPSRH_Reg(from)); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_MIPS: unimplemented regclass"); + } +} + /* --------- The mips assembler --------- */ inline static UInt iregNo(HReg r, Bool mode64) diff --git a/VEX/priv/host_mips_defs.h b/VEX/priv/host_mips_defs.h index 481a4878cf..a4c0e789cd 100644 --- a/VEX/priv/host_mips_defs.h +++ b/VEX/priv/host_mips_defs.h @@ -135,7 +135,7 @@ ST_IN HReg hregMIPS_GPR31 ( Bool mode64 ) { return GPR(mode64, 31, 37, 45); } # define MIPS_N_REGPARMS 8 #endif -extern void ppHRegMIPS ( HReg, Bool ); +extern UInt ppHRegMIPS ( HReg, Bool ); /* --------- Condition codes, Intel encoding. --------- */ @@ -700,6 +700,7 @@ extern void genSpill_MIPS ( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2, HReg rreg, Int offset, Bool); extern void genReload_MIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2, HReg rreg, Int offset, Bool); +extern MIPSInstr* genMove_MIPS(HReg from, HReg to, Bool mode64); extern const RRegUniverse* getRRegUniverse_MIPS ( Bool mode64 ); diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c index 6f7c009ede..33ee292a01 100644 --- a/VEX/priv/host_ppc_defs.c +++ b/VEX/priv/host_ppc_defs.c @@ -68,6 +68,7 @@ const RRegUniverse* getRRegUniverse_PPC ( Bool mode64 ) // GPR0 = scratch reg where poss. - some ops interpret as value zero // GPR1 = stack pointer // GPR2 = TOC pointer + ru->allocable_start[(mode64) ? HRcInt64 : HRcInt32] = ru->size; ru->regs[ru->size++] = hregPPC_GPR3(mode64); ru->regs[ru->size++] = hregPPC_GPR4(mode64); ru->regs[ru->size++] = hregPPC_GPR5(mode64); @@ -100,6 +101,7 @@ const RRegUniverse* getRRegUniverse_PPC ( Bool mode64 ) ru->regs[ru->size++] = hregPPC_GPR26(mode64); ru->regs[ru->size++] = hregPPC_GPR27(mode64); ru->regs[ru->size++] = hregPPC_GPR28(mode64); + ru->allocable_end[(mode64) ? HRcInt64 : HRcInt32] = ru->size - 1; // GPR29 is reserved for the dispatcher // GPR30 is reserved as AltiVec spill reg temporary // GPR31 is reserved for the GuestStatePtr @@ -109,6 +111,7 @@ const RRegUniverse* getRRegUniverse_PPC ( Bool mode64 ) the occasional extra spill instead. */ /* For both ppc32-linux and ppc64-linux, f14-f31 are callee save. So use them. */ + ru->allocable_start[HRcFlt64] = ru->size; ru->regs[ru->size++] = hregPPC_FPR14(mode64); ru->regs[ru->size++] = hregPPC_FPR15(mode64); ru->regs[ru->size++] = hregPPC_FPR16(mode64); @@ -117,11 +120,13 @@ const RRegUniverse* getRRegUniverse_PPC ( Bool mode64 ) ru->regs[ru->size++] = hregPPC_FPR19(mode64); ru->regs[ru->size++] = hregPPC_FPR20(mode64); ru->regs[ru->size++] = hregPPC_FPR21(mode64); + ru->allocable_end[HRcFlt64] = ru->size - 1; /* Same deal re Altivec */ /* For both ppc32-linux and ppc64-linux, v20-v31 are callee save. So use them. */ /* NB, vr29 is used as a scratch temporary -- do not allocate */ + ru->allocable_start[HRcVec128] = ru->size; ru->regs[ru->size++] = hregPPC_VR20(mode64); ru->regs[ru->size++] = hregPPC_VR21(mode64); ru->regs[ru->size++] = hregPPC_VR22(mode64); @@ -130,6 +135,7 @@ const RRegUniverse* getRRegUniverse_PPC ( Bool mode64 ) ru->regs[ru->size++] = hregPPC_VR25(mode64); ru->regs[ru->size++] = hregPPC_VR26(mode64); ru->regs[ru->size++] = hregPPC_VR27(mode64); + ru->allocable_end[HRcVec128] = ru->size - 1; ru->allocable = ru->size; /* And other regs, not available to the allocator. */ @@ -146,7 +152,7 @@ const RRegUniverse* getRRegUniverse_PPC ( Bool mode64 ) } -void ppHRegPPC ( HReg reg ) +UInt ppHRegPPC ( HReg reg ) { Int r; static const HChar* ireg32_names[32] @@ -160,31 +166,26 @@ void ppHRegPPC ( HReg reg ) "%r28", "%r29", "%r30", "%r31" }; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt64: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("%s", ireg32_names[r]); - return; + return vex_printf("%s", ireg32_names[r]); case HRcInt32: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("%s", ireg32_names[r]); - return; + return vex_printf("%s", ireg32_names[r]); case HRcFlt64: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("%%fr%d", r); - return; + return vex_printf("%%fr%d", r); case HRcVec128: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("%%v%d", r); - return; + return vex_printf("%%v%d", r); default: vpanic("ppHRegPPC"); } @@ -3210,6 +3211,20 @@ void genReload_PPC ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +PPCInstr* genMove_PPC(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt32: + case HRcInt64: + return PPCInstr_Alu(Palu_OR, to, from, PPCRH_Reg(from)); + case HRcFlt64: + return PPCInstr_FpUnary(Pfp_MOV, to, from); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_PPC: unimplemented regclass"); + } +} + /* --------- The ppc assembler (bleh.) --------- */ diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h index 62c15ae865..6b7fcc8cb3 100644 --- a/VEX/priv/host_ppc_defs.h +++ b/VEX/priv/host_ppc_defs.h @@ -122,7 +122,7 @@ ST_IN HReg hregPPC_VR29 ( Bool mode64 ) { return VR (mode64, 29, 43, 45); } /* Num registers used for function calls */ #define PPC_N_REGPARMS 8 -extern void ppHRegPPC ( HReg ); +extern UInt ppHRegPPC ( HReg ); /* --------- Condition codes --------- */ @@ -1215,6 +1215,7 @@ extern void genSpill_PPC ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offsetB, Bool mode64 ); extern void genReload_PPC ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offsetB, Bool mode64 ); +extern PPCInstr* genMove_PPC(HReg from, HReg to, Bool mode64); extern const RRegUniverse* getRRegUniverse_PPC ( Bool mode64 ); diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c index 91f08e5e9f..ab2d0b276e 100644 --- a/VEX/priv/host_s390_defs.c +++ b/VEX/priv/host_s390_defs.c @@ -366,10 +366,10 @@ ppS390Instr(const s390_insn *insn, Bool mode64) vex_printf("%s", s390_insn_as_string(insn)); } -void +UInt ppHRegS390(HReg reg) { - vex_printf("%s", s390_hreg_as_string(reg)); + return vex_printf("%s", s390_hreg_as_string(reg)); } /*------------------------------------------------------------*/ @@ -402,15 +402,19 @@ getRRegUniverse_S390(void) FPR12 - FPR15 are also used as register pairs for 128-bit floating point operations */ - UInt regno; - for (regno = 1; regno <= 11; ++regno) { + ru->allocable_start[HRcInt64] = ru->size; + for (UInt regno = 1; regno <= 11; ++regno) { gpr_index[regno] = ru->size; ru->regs[ru->size++] = s390_hreg_gpr(regno); } - for (regno = 0; regno <= 15; ++regno) { + ru->allocable_end[HRcInt64] = ru->size - 1; + + ru->allocable_start[HRcFlt64] = ru->size; + for (UInt regno = 0; regno <= 15; ++regno) { fpr_index[regno] = ru->size; ru->regs[ru->size++] = s390_hreg_fpr(regno); } + ru->allocable_end[HRcFlt64] = ru->size - 1; ru->allocable = ru->size; /* Add the registers that are not available for allocation. @@ -516,6 +520,17 @@ genReload_S390(HInstr **i1, HInstr **i2, HReg rreg, Int offsetB, Bool mode64) } } +s390_insn* genMove_S390(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt64: + return s390_insn_move(sizeofIRType(Ity_I64), to, from); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_S390: unimplemented regclass"); + } +} + /* Helper function for s390_insn_get_reg_usage */ static void s390_opnd_RMI_get_reg_usage(HRegUsage *u, s390_opnd_RMI op) diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h index 202690022f..937829cd8f 100644 --- a/VEX/priv/host_s390_defs.h +++ b/VEX/priv/host_s390_defs.h @@ -736,7 +736,7 @@ const HChar *s390_insn_as_string(const s390_insn *); void ppS390AMode(const s390_amode *); void ppS390Instr(const s390_insn *, Bool mode64); -void ppHRegS390(HReg); +UInt ppHRegS390(HReg); /* Some functions that insulate the register allocator from details of the underlying instruction set. */ @@ -749,6 +749,7 @@ Int emit_S390Instr ( Bool *, UChar *, Int, const s390_insn *, Bool, const RRegUniverse *getRRegUniverse_S390( void ); void genSpill_S390 ( HInstr **, HInstr **, HReg , Int , Bool ); void genReload_S390 ( HInstr **, HInstr **, HReg , Int , Bool ); +extern s390_insn* genMove_S390(HReg from, HReg to, Bool mode64); HInstrArray *iselSB_S390 ( const IRSB *, VexArch, const VexArchInfo *, const VexAbiInfo *, Int, Int, Bool, Bool, Addr); diff --git a/VEX/priv/host_x86_defs.c b/VEX/priv/host_x86_defs.c index 956e3234d7..2e5c044669 100644 --- a/VEX/priv/host_x86_defs.c +++ b/VEX/priv/host_x86_defs.c @@ -63,18 +63,25 @@ const RRegUniverse* getRRegUniverse_X86 ( void ) /* Add the registers. The initial segment of this array must be those available for allocation by reg-alloc, and those that follow are not available for allocation. */ + ru->allocable_start[HRcInt32] = ru->size; ru->regs[ru->size++] = hregX86_EAX(); ru->regs[ru->size++] = hregX86_EBX(); ru->regs[ru->size++] = hregX86_ECX(); ru->regs[ru->size++] = hregX86_EDX(); ru->regs[ru->size++] = hregX86_ESI(); ru->regs[ru->size++] = hregX86_EDI(); + ru->allocable_end[HRcInt32] = ru->size - 1; + + ru->allocable_start[HRcFlt64] = ru->size; ru->regs[ru->size++] = hregX86_FAKE0(); ru->regs[ru->size++] = hregX86_FAKE1(); ru->regs[ru->size++] = hregX86_FAKE2(); ru->regs[ru->size++] = hregX86_FAKE3(); ru->regs[ru->size++] = hregX86_FAKE4(); ru->regs[ru->size++] = hregX86_FAKE5(); + ru->allocable_end[HRcFlt64] = ru->size - 1; + + ru->allocable_start[HRcVec128] = ru->size; ru->regs[ru->size++] = hregX86_XMM0(); ru->regs[ru->size++] = hregX86_XMM1(); ru->regs[ru->size++] = hregX86_XMM2(); @@ -83,7 +90,9 @@ const RRegUniverse* getRRegUniverse_X86 ( void ) ru->regs[ru->size++] = hregX86_XMM5(); ru->regs[ru->size++] = hregX86_XMM6(); ru->regs[ru->size++] = hregX86_XMM7(); + ru->allocable_end[HRcVec128] = ru->size - 1; ru->allocable = ru->size; + /* And other regs, not available to the allocator. */ ru->regs[ru->size++] = hregX86_ESP(); ru->regs[ru->size++] = hregX86_EBP(); @@ -95,33 +104,29 @@ const RRegUniverse* getRRegUniverse_X86 ( void ) } -void ppHRegX86 ( HReg reg ) +UInt ppHRegX86 ( HReg reg ) { Int r; static const HChar* ireg32_names[8] = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" }; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt32: r = hregEncoding(reg); vassert(r >= 0 && r < 8); - vex_printf("%s", ireg32_names[r]); - return; + return vex_printf("%s", ireg32_names[r]); case HRcFlt64: r = hregEncoding(reg); vassert(r >= 0 && r < 6); - vex_printf("%%fake%d", r); - return; + return vex_printf("%%fake%d", r); case HRcVec128: r = hregEncoding(reg); vassert(r >= 0 && r < 8); - vex_printf("%%xmm%d", r); - return; + return vex_printf("%%xmm%d", r); default: vpanic("ppHRegX86"); } @@ -1752,6 +1757,19 @@ void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +X86Instr* genMove_X86(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt32: + return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(from), to); + case HRcVec128: + return X86Instr_SseReRg(Xsse_MOV, from, to); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_X86: unimplemented regclass"); + } +} + /* The given instruction reads the specified vreg exactly once, and that vreg is currently located at the given spill offset. If possible, return a variant of the instruction to one which instead diff --git a/VEX/priv/host_x86_defs.h b/VEX/priv/host_x86_defs.h index 0a3ed75f6f..614b7512c6 100644 --- a/VEX/priv/host_x86_defs.h +++ b/VEX/priv/host_x86_defs.h @@ -74,7 +74,7 @@ ST_IN HReg hregX86_ESP ( void ) { return mkHReg(False, HRcInt32, 4, 20); } ST_IN HReg hregX86_EBP ( void ) { return mkHReg(False, HRcInt32, 5, 21); } #undef ST_IN -extern void ppHRegX86 ( HReg ); +extern UInt ppHRegX86 ( HReg ); /* --------- Condition codes, Intel encoding. --------- */ @@ -730,7 +730,7 @@ extern void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); extern void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); - +extern X86Instr* genMove_X86(HReg from, HReg to, Bool); extern X86Instr* directReload_X86 ( X86Instr* i, HReg vreg, Short spill_off ); extern const RRegUniverse* getRRegUniverse_X86 ( void ); diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c index 7c125cebf3..b27d6ca9ea 100644 --- a/VEX/priv/main_main.c +++ b/VEX/priv/main_main.c @@ -186,6 +186,7 @@ void LibVEX_default_VexControl ( /*OUT*/ VexControl* vcon ) vcon->guest_max_insns = 60; vcon->guest_chase_thresh = 10; vcon->guest_chase_cond = False; + vcon->regalloc_version = 3; } @@ -225,6 +226,7 @@ void LibVEX_Init ( vassert(vcon->guest_chase_thresh < vcon->guest_max_insns); vassert(vcon->guest_chase_cond == True || vcon->guest_chase_cond == False); + vassert(vcon->regalloc_version == 2 || vcon->regalloc_version == 3); /* Check that Vex has been built with sizes of basic types as stated in priv/libvex_basictypes.h. Failure of any of these is @@ -712,9 +714,10 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, void (*mapRegs) ( HRegRemap*, HInstr*, Bool ); void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ); void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ); + HInstr* (*genMove) ( HReg, HReg, Bool ); HInstr* (*directReload) ( HInstr*, HReg, Short ); void (*ppInstr) ( const HInstr*, Bool ); - void (*ppReg) ( HReg ); + UInt (*ppReg) ( HReg ); HInstrArray* (*iselSB) ( const IRSB*, VexArch, const VexArchInfo*, const VexAbiInfo*, Int, Int, Bool, Bool, Addr ); @@ -741,6 +744,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, mapRegs = NULL; genSpill = NULL; genReload = NULL; + genMove = NULL; directReload = NULL; ppInstr = NULL; ppReg = NULL; @@ -859,6 +863,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, mapRegs = CAST_TO_TYPEOF(mapRegs) X86FN(mapRegs_X86Instr); genSpill = CAST_TO_TYPEOF(genSpill) X86FN(genSpill_X86); genReload = CAST_TO_TYPEOF(genReload) X86FN(genReload_X86); + genMove = CAST_TO_TYPEOF(genMove) X86FN(genMove_X86); directReload = CAST_TO_TYPEOF(directReload) X86FN(directReload_X86); ppInstr = CAST_TO_TYPEOF(ppInstr) X86FN(ppX86Instr); ppReg = CAST_TO_TYPEOF(ppReg) X86FN(ppHRegX86); @@ -876,6 +881,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, mapRegs = CAST_TO_TYPEOF(mapRegs) AMD64FN(mapRegs_AMD64Instr); genSpill = CAST_TO_TYPEOF(genSpill) AMD64FN(genSpill_AMD64); genReload = CAST_TO_TYPEOF(genReload) AMD64FN(genReload_AMD64); + genMove = CAST_TO_TYPEOF(genMove) AMD64FN(genMove_AMD64); directReload = CAST_TO_TYPEOF(directReload) AMD64FN(directReload_AMD64); ppInstr = CAST_TO_TYPEOF(ppInstr) AMD64FN(ppAMD64Instr); ppReg = CAST_TO_TYPEOF(ppReg) AMD64FN(ppHRegAMD64); @@ -893,6 +899,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, mapRegs = CAST_TO_TYPEOF(mapRegs) PPC32FN(mapRegs_PPCInstr); genSpill = CAST_TO_TYPEOF(genSpill) PPC32FN(genSpill_PPC); genReload = CAST_TO_TYPEOF(genReload) PPC32FN(genReload_PPC); + genMove = CAST_TO_TYPEOF(genMove) PPC32FN(genMove_PPC); ppInstr = CAST_TO_TYPEOF(ppInstr) PPC32FN(ppPPCInstr); ppReg = CAST_TO_TYPEOF(ppReg) PPC32FN(ppHRegPPC); iselSB = PPC32FN(iselSB_PPC); @@ -909,6 +916,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, mapRegs = CAST_TO_TYPEOF(mapRegs) PPC64FN(mapRegs_PPCInstr); genSpill = CAST_TO_TYPEOF(genSpill) PPC64FN(genSpill_PPC); genReload = CAST_TO_TYPEOF(genReload) PPC64FN(genReload_PPC); + genMove = CAST_TO_TYPEOF(genMove) PPC64FN(genMove_PPC); ppInstr = CAST_TO_TYPEOF(ppInstr) PPC64FN(ppPPCInstr); ppReg = CAST_TO_TYPEOF(ppReg) PPC64FN(ppHRegPPC); iselSB = PPC64FN(iselSB_PPC); @@ -926,6 +934,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, mapRegs = CAST_TO_TYPEOF(mapRegs) S390FN(mapRegs_S390Instr); genSpill = CAST_TO_TYPEOF(genSpill) S390FN(genSpill_S390); genReload = CAST_TO_TYPEOF(genReload) S390FN(genReload_S390); + genMove = CAST_TO_TYPEOF(genMove) S390FN(genMove_S390); // fixs390: consider implementing directReload_S390 ppInstr = CAST_TO_TYPEOF(ppInstr) S390FN(ppS390Instr); ppReg = CAST_TO_TYPEOF(ppReg) S390FN(ppHRegS390); @@ -943,6 +952,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, mapRegs = CAST_TO_TYPEOF(mapRegs) ARMFN(mapRegs_ARMInstr); genSpill = CAST_TO_TYPEOF(genSpill) ARMFN(genSpill_ARM); genReload = CAST_TO_TYPEOF(genReload) ARMFN(genReload_ARM); + genMove = CAST_TO_TYPEOF(genMove) ARMFN(genMove_ARM); ppInstr = CAST_TO_TYPEOF(ppInstr) ARMFN(ppARMInstr); ppReg = CAST_TO_TYPEOF(ppReg) ARMFN(ppHRegARM); iselSB = ARMFN(iselSB_ARM); @@ -959,6 +969,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, mapRegs = CAST_TO_TYPEOF(mapRegs) ARM64FN(mapRegs_ARM64Instr); genSpill = CAST_TO_TYPEOF(genSpill) ARM64FN(genSpill_ARM64); genReload = CAST_TO_TYPEOF(genReload) ARM64FN(genReload_ARM64); + genMove = CAST_TO_TYPEOF(genMove) ARM64FN(genMove_ARM64); ppInstr = CAST_TO_TYPEOF(ppInstr) ARM64FN(ppARM64Instr); ppReg = CAST_TO_TYPEOF(ppReg) ARM64FN(ppHRegARM64); iselSB = ARM64FN(iselSB_ARM64); @@ -975,6 +986,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS32FN(mapRegs_MIPSInstr); genSpill = CAST_TO_TYPEOF(genSpill) MIPS32FN(genSpill_MIPS); genReload = CAST_TO_TYPEOF(genReload) MIPS32FN(genReload_MIPS); + genMove = CAST_TO_TYPEOF(genMove) MIPS32FN(genMove_MIPS); ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS32FN(ppMIPSInstr); ppReg = CAST_TO_TYPEOF(ppReg) MIPS32FN(ppHRegMIPS); iselSB = MIPS32FN(iselSB_MIPS); @@ -992,6 +1004,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS64FN(mapRegs_MIPSInstr); genSpill = CAST_TO_TYPEOF(genSpill) MIPS64FN(genSpill_MIPS); genReload = CAST_TO_TYPEOF(genReload) MIPS64FN(genReload_MIPS); + genMove = CAST_TO_TYPEOF(genMove) MIPS64FN(genMove_MIPS); ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS64FN(ppMIPSInstr); ppReg = CAST_TO_TYPEOF(ppReg) MIPS64FN(ppHRegMIPS); iselSB = MIPS64FN(iselSB_MIPS); @@ -1068,11 +1081,22 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, } /* Register allocate. */ - rcode = doRegisterAllocation ( vcode, rRegUniv, - isMove, getRegUsage, mapRegs, - genSpill, genReload, directReload, - guest_sizeB, - ppInstr, ppReg, mode64 ); + RegAllocControl con = { + .univ = rRegUniv, .isMove = isMove, .getRegUsage = getRegUsage, + .mapRegs = mapRegs, .genSpill = genSpill, .genReload = genReload, + .genMove = genMove, .directReload = directReload, + .guest_sizeB = guest_sizeB, .ppInstr = ppInstr, .ppReg = ppReg, + .mode64 = mode64}; + switch (vex_control.regalloc_version) { + case 2: + rcode = doRegisterAllocation_v2(vcode, &con); + break; + case 3: + rcode = doRegisterAllocation_v3(vcode, &con); + break; + default: + vassert(0); + } vexAllocSanityCheck(); diff --git a/VEX/priv/main_util.c b/VEX/priv/main_util.c index 865fe0c7a7..e9a496be02 100644 --- a/VEX/priv/main_util.c +++ b/VEX/priv/main_util.c @@ -283,13 +283,40 @@ Bool vex_streq ( const HChar* s1, const HChar* s2 ) } } +/* Vectorised memset, copied from Valgrind's m_libcbase.c. */ void vex_bzero ( void* sV, SizeT n ) { - SizeT i; - UChar* s = (UChar*)sV; - /* No laughing, please. Just don't call this too often. Thank you - for your attention. */ - for (i = 0; i < n; i++) s[i] = 0; +# define IS_4_ALIGNED(aaa_p) (0 == (((HWord)(aaa_p)) & ((HWord)0x3))) + + UChar* d = sV; + + while ((!IS_4_ALIGNED(d)) && n >= 1) { + d[0] = 0; + d++; + n--; + } + if (n == 0) + return; + while (n >= 16) { + ((UInt*)d)[0] = 0; + ((UInt*)d)[1] = 0; + ((UInt*)d)[2] = 0; + ((UInt*)d)[3] = 0; + d += 16; + n -= 16; + } + while (n >= 4) { + ((UInt*)d)[0] = 0; + d += 4; + n -= 4; + } + while (n >= 1) { + d[0] = 0; + d++; + n--; + } + return; +# undef IS_4_ALIGNED } diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h index d75919de48..6f55ec93b9 100644 --- a/VEX/pub/libvex.h +++ b/VEX/pub/libvex.h @@ -497,6 +497,11 @@ typedef /* EXPERIMENTAL: chase across conditional branches? Not all front ends honour this. Default: NO. */ Bool guest_chase_cond; + /* Register allocator version. Allowed values are: + - '2': previous, good and slow implementation. + - '3': current, faster implementation; perhaps producing slightly worse + spilling decisions. */ + UInt regalloc_version; } VexControl; diff --git a/coregrind/m_main.c b/coregrind/m_main.c index f2a16bdd16..a37bbb6726 100644 --- a/coregrind/m_main.c +++ b/coregrind/m_main.c @@ -286,6 +286,7 @@ static void usage_NORETURN ( Bool debug_help ) " 0000 0000 show summary profile only\n" " (Nb: you need --trace-notbelow and/or --trace-notabove\n" " with --trace-flags for full details)\n" +" --vex-regalloc-version=2|3 [3]\n" "\n" " debugging options for Valgrind tools that report errors\n" " --dump-error= show translation for basic block associated\n" @@ -708,6 +709,8 @@ void main_process_cmd_line_options( void ) VG_(clo_vex_control).iropt_verbosity, 0, 10) {} else if VG_BINT_CLO(arg, "--vex-iropt-level", VG_(clo_vex_control).iropt_level, 0, 2) {} + else if VG_BINT_CLO(arg, "--vex-regalloc-version", + VG_(clo_vex_control).regalloc_version, 2, 3) {} else if VG_STRINDEX_CLO(arg, "--vex-iropt-register-updates", pxStrings, ix) { diff --git a/none/tests/cmdline2.stdout.exp b/none/tests/cmdline2.stdout.exp index fc7b36ce86..21bec9fbd0 100644 --- a/none/tests/cmdline2.stdout.exp +++ b/none/tests/cmdline2.stdout.exp @@ -198,6 +198,7 @@ usage: valgrind [options] prog-and-args 0000 0000 show summary profile only (Nb: you need --trace-notbelow and/or --trace-notabove with --trace-flags for full details) + --vex-regalloc-version=2|3 [3] debugging options for Valgrind tools that report errors --dump-error= show translation for basic block associated