From: Ivo Raisr Date: Tue, 8 Aug 2017 18:52:10 +0000 (+0200) Subject: Introduce VEX register allocator v3. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ff2a4925d28bc28c606253381a5bfed9fb73681b;p=thirdparty%2Fvalgrind.git Introduce VEX register allocator v3. This is VEX register allocator implementation as found in patch 013 with the following modifications: - VEX register allocator v2 has been removed altogether - command line option --vex-regalloc-version is not recognized - declaration of doRegisterAllocation() has been adjusted to take HInstrSB - file VEX/priv/host_generic_reg_alloc3.c is unmodified --- diff --git a/Makefile.vex.am b/Makefile.vex.am index c47be97430..a1a19189c9 100644 --- a/Makefile.vex.am +++ b/Makefile.vex.am @@ -130,7 +130,7 @@ LIBVEX_SOURCES_COMMON = \ priv/host_generic_simd128.c \ priv/host_generic_simd256.c \ priv/host_generic_maddf.c \ - priv/host_generic_reg_alloc2.c \ + priv/host_generic_reg_alloc3.c \ priv/host_x86_defs.c \ priv/host_x86_isel.c # TODO-JIT: other architectures disabled for now diff --git a/NEWS b/NEWS index 516c4cc62e..446a7fa2d6 100644 --- a/NEWS +++ b/NEWS @@ -40,6 +40,7 @@ where XXXXXX is the bug number as listed below. 381272 ppc64 doesn't compile test_isa_2_06_partx.c without VSX support 381289 epoll_pwait can have a NULL sigmask 381274 powerpc too chatty even with --sigill-diagnostics=no +381553 VEX register allocator v3 381769 Use ucontext_t instead of struct ucontext 381805 arm32 needs ld.so index hardwire for new glibc security fixes 382256 gz compiler flag test doesn't work for gold diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c index 5e0600ac69..9b63017016 100644 --- a/VEX/priv/host_amd64_defs.c +++ b/VEX/priv/host_amd64_defs.c @@ -63,6 +63,7 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) /* Add the registers. The initial segment of this array must be those available for allocation by reg-alloc, and those that follow are not available for allocation. */ + ru->allocable_start[HRcInt64] = ru->size; ru->regs[ru->size++] = hregAMD64_RSI(); ru->regs[ru->size++] = hregAMD64_RDI(); ru->regs[ru->size++] = hregAMD64_R8(); @@ -72,6 +73,10 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) ru->regs[ru->size++] = hregAMD64_R14(); ru->regs[ru->size++] = hregAMD64_R15(); ru->regs[ru->size++] = hregAMD64_RBX(); + ru->regs[ru->size++] = hregAMD64_R10(); + ru->allocable_end[HRcInt64] = ru->size - 1; + + ru->allocable_start[HRcVec128] = ru->size; ru->regs[ru->size++] = hregAMD64_XMM3(); ru->regs[ru->size++] = hregAMD64_XMM4(); ru->regs[ru->size++] = hregAMD64_XMM5(); @@ -82,8 +87,9 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) ru->regs[ru->size++] = hregAMD64_XMM10(); ru->regs[ru->size++] = hregAMD64_XMM11(); ru->regs[ru->size++] = hregAMD64_XMM12(); - ru->regs[ru->size++] = hregAMD64_R10(); + ru->allocable_end[HRcVec128] = ru->size - 1; ru->allocable = ru->size; + /* And other regs, not available to the allocator. */ ru->regs[ru->size++] = hregAMD64_RAX(); ru->regs[ru->size++] = hregAMD64_RCX(); @@ -101,7 +107,7 @@ const RRegUniverse* getRRegUniverse_AMD64 ( void ) } -void ppHRegAMD64 ( HReg reg ) +UInt ppHRegAMD64 ( HReg reg ) { Int r; static const HChar* ireg64_names[16] @@ -109,27 +115,24 @@ void ppHRegAMD64 ( HReg reg ) "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" }; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt64: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("%s", ireg64_names[r]); - return; + return vex_printf("%s", ireg64_names[r]); case HRcVec128: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("%%xmm%d", r); - return; + return vex_printf("%%xmm%d", r); default: vpanic("ppHRegAMD64"); } } -static void ppHRegAMD64_lo32 ( HReg reg ) +static UInt ppHRegAMD64_lo32 ( HReg reg ) { Int r; static const HChar* ireg32_names[16] @@ -137,17 +140,16 @@ static void ppHRegAMD64_lo32 ( HReg reg ) "%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" }; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - vex_printf("d"); - return; + UInt written = ppHReg(reg); + written += vex_printf("d"); + return written; } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt64: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("%s", ireg32_names[r]); - return; + return vex_printf("%s", ireg32_names[r]); default: vpanic("ppHRegAMD64_lo32: invalid regclass"); } @@ -1995,6 +1997,19 @@ void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +AMD64Instr* genMove_AMD64(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt64: + return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(from), to); + case HRcVec128: + return AMD64Instr_SseReRg(Asse_MOV, from, to); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_AMD64: unimplemented regclass"); + } +} + AMD64Instr* directReload_AMD64( AMD64Instr* i, HReg vreg, Short spill_off ) { vassert(spill_off >= 0 && spill_off < 10000); /* let's say */ diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h index e41fe34b55..57ef1698dc 100644 --- a/VEX/priv/host_amd64_defs.h +++ b/VEX/priv/host_amd64_defs.h @@ -56,19 +56,18 @@ ST_IN HReg hregAMD64_R13 ( void ) { return mkHReg(False, HRcInt64, 13, 5); } ST_IN HReg hregAMD64_R14 ( void ) { return mkHReg(False, HRcInt64, 14, 6); } ST_IN HReg hregAMD64_R15 ( void ) { return mkHReg(False, HRcInt64, 15, 7); } ST_IN HReg hregAMD64_RBX ( void ) { return mkHReg(False, HRcInt64, 3, 8); } - -ST_IN HReg hregAMD64_XMM3 ( void ) { return mkHReg(False, HRcVec128, 3, 9); } -ST_IN HReg hregAMD64_XMM4 ( void ) { return mkHReg(False, HRcVec128, 4, 10); } -ST_IN HReg hregAMD64_XMM5 ( void ) { return mkHReg(False, HRcVec128, 5, 11); } -ST_IN HReg hregAMD64_XMM6 ( void ) { return mkHReg(False, HRcVec128, 6, 12); } -ST_IN HReg hregAMD64_XMM7 ( void ) { return mkHReg(False, HRcVec128, 7, 13); } -ST_IN HReg hregAMD64_XMM8 ( void ) { return mkHReg(False, HRcVec128, 8, 14); } -ST_IN HReg hregAMD64_XMM9 ( void ) { return mkHReg(False, HRcVec128, 9, 15); } -ST_IN HReg hregAMD64_XMM10 ( void ) { return mkHReg(False, HRcVec128, 10, 16); } -ST_IN HReg hregAMD64_XMM11 ( void ) { return mkHReg(False, HRcVec128, 11, 17); } -ST_IN HReg hregAMD64_XMM12 ( void ) { return mkHReg(False, HRcVec128, 12, 18); } - -ST_IN HReg hregAMD64_R10 ( void ) { return mkHReg(False, HRcInt64, 10, 19); } +ST_IN HReg hregAMD64_R10 ( void ) { return mkHReg(False, HRcInt64, 10, 9); } + +ST_IN HReg hregAMD64_XMM3 ( void ) { return mkHReg(False, HRcVec128, 3, 10); } +ST_IN HReg hregAMD64_XMM4 ( void ) { return mkHReg(False, HRcVec128, 4, 11); } +ST_IN HReg hregAMD64_XMM5 ( void ) { return mkHReg(False, HRcVec128, 5, 12); } +ST_IN HReg hregAMD64_XMM6 ( void ) { return mkHReg(False, HRcVec128, 6, 13); } +ST_IN HReg hregAMD64_XMM7 ( void ) { return mkHReg(False, HRcVec128, 7, 14); } +ST_IN HReg hregAMD64_XMM8 ( void ) { return mkHReg(False, HRcVec128, 8, 15); } +ST_IN HReg hregAMD64_XMM9 ( void ) { return mkHReg(False, HRcVec128, 9, 16); } +ST_IN HReg hregAMD64_XMM10 ( void ) { return mkHReg(False, HRcVec128, 10, 17); } +ST_IN HReg hregAMD64_XMM11 ( void ) { return mkHReg(False, HRcVec128, 11, 18); } +ST_IN HReg hregAMD64_XMM12 ( void ) { return mkHReg(False, HRcVec128, 12, 19); } ST_IN HReg hregAMD64_RAX ( void ) { return mkHReg(False, HRcInt64, 0, 20); } ST_IN HReg hregAMD64_RCX ( void ) { return mkHReg(False, HRcInt64, 1, 21); } @@ -81,7 +80,7 @@ ST_IN HReg hregAMD64_XMM0 ( void ) { return mkHReg(False, HRcVec128, 0, 26); } ST_IN HReg hregAMD64_XMM1 ( void ) { return mkHReg(False, HRcVec128, 1, 27); } #undef ST_IN -extern void ppHRegAMD64 ( HReg ); +extern UInt ppHRegAMD64 ( HReg ); /* --------- Condition codes, AMD encoding. --------- */ @@ -801,7 +800,7 @@ extern void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); extern void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); - +extern AMD64Instr* genMove_AMD64(HReg from, HReg to, Bool); extern AMD64Instr* directReload_AMD64 ( AMD64Instr* i, HReg vreg, Short spill_off ); diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 380a24d6d9..bc700c9c0b 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -64,7 +64,7 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) /* Add the registers. The initial segment of this array must be those available for allocation by reg-alloc, and those that follow are not available for allocation. */ - + ru->allocable_start[HRcInt64] = ru->size; ru->regs[ru->size++] = hregARM64_X22(); ru->regs[ru->size++] = hregARM64_X23(); ru->regs[ru->size++] = hregARM64_X24(); @@ -81,6 +81,7 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) ru->regs[ru->size++] = hregARM64_X5(); ru->regs[ru->size++] = hregARM64_X6(); ru->regs[ru->size++] = hregARM64_X7(); + ru->allocable_end[HRcInt64] = ru->size - 1; // X8 is used as a ProfInc temporary, not available to regalloc. // X9 is a chaining/spill temporary, not available to regalloc. @@ -94,19 +95,23 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) // X21 is the guest state pointer, not available to regalloc. // vector regs. Unfortunately not callee-saved. + ru->allocable_start[HRcVec128] = ru->size; ru->regs[ru->size++] = hregARM64_Q16(); ru->regs[ru->size++] = hregARM64_Q17(); ru->regs[ru->size++] = hregARM64_Q18(); ru->regs[ru->size++] = hregARM64_Q19(); ru->regs[ru->size++] = hregARM64_Q20(); + ru->allocable_end[HRcVec128] = ru->size - 1; // F64 regs, all of which are callee-saved + ru->allocable_start[HRcFlt64] = ru->size; ru->regs[ru->size++] = hregARM64_D8(); ru->regs[ru->size++] = hregARM64_D9(); ru->regs[ru->size++] = hregARM64_D10(); ru->regs[ru->size++] = hregARM64_D11(); ru->regs[ru->size++] = hregARM64_D12(); ru->regs[ru->size++] = hregARM64_D13(); + ru->allocable_end[HRcFlt64] = ru->size - 1; ru->allocable = ru->size; /* And other regs, not available to the allocator. */ @@ -142,43 +147,41 @@ const RRegUniverse* getRRegUniverse_ARM64 ( void ) } -void ppHRegARM64 ( HReg reg ) { +UInt ppHRegARM64 ( HReg reg ) { Int r; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt64: r = hregEncoding(reg); vassert(r >= 0 && r < 31); - vex_printf("x%d", r); - return; + return vex_printf("x%d", r); case HRcFlt64: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("d%d", r); - return; + return vex_printf("d%d", r); case HRcVec128: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("q%d", r); - return; + return vex_printf("q%d", r); default: vpanic("ppHRegARM64"); } } -static void ppHRegARM64asSreg ( HReg reg ) { - ppHRegARM64(reg); - vex_printf("(S-reg)"); +static UInt ppHRegARM64asSreg ( HReg reg ) { + UInt written = ppHRegARM64(reg); + written += vex_printf("(S-reg)"); + return written; } -static void ppHRegARM64asHreg ( HReg reg ) { - ppHRegARM64(reg); - vex_printf("(H-reg)"); +static UInt ppHRegARM64asHreg ( HReg reg ) { + UInt written = ppHRegARM64(reg); + written += vex_printf("(H-reg)"); + return written; } @@ -1745,7 +1748,7 @@ void ppARM64Instr ( const ARM64Instr* i ) { ppHRegARM64asSreg(i->ARM64in.VCmpS.argR); return; case ARM64in_VFCSel: { - void (*ppHRegARM64fp)(HReg) + UInt (*ppHRegARM64fp)(HReg) = (i->ARM64in.VFCSel.isD ? ppHRegARM64 : ppHRegARM64asSreg); vex_printf("fcsel "); ppHRegARM64fp(i->ARM64in.VFCSel.dst); @@ -2616,6 +2619,21 @@ void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +ARM64Instr* genMove_ARM64(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt64: + return ARM64Instr_MovI(to, from); + case HRcFlt64: + return ARM64Instr_VMov(8, to, from); + case HRcVec128: + return ARM64Instr_VMov(16, to, from); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_ARM64: unimplemented regclass"); + } +} + /* Emit an instruction into buf and return the number of bytes used. Note that buf is not the insn's final place, and therefore it is diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index bbc211d453..840e0aabc9 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -74,7 +74,7 @@ ST_IN HReg hregARM64_X9 ( void ) { return mkHReg(False, HRcInt64, 9, 27); } ST_IN HReg hregARM64_X21 ( void ) { return mkHReg(False, HRcInt64, 21, 28); } #undef ST_IN -extern void ppHRegARM64 ( HReg ); +extern UInt ppHRegARM64 ( HReg ); /* Number of registers used arg passing in function calls */ #define ARM64_N_ARGREGS 8 /* x0 .. x7 */ @@ -1007,6 +1007,7 @@ extern void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); extern void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); +extern ARM64Instr* genMove_ARM64(HReg from, HReg to, Bool); extern const RRegUniverse* getRRegUniverse_ARM64 ( void ); diff --git a/VEX/priv/host_arm_defs.c b/VEX/priv/host_arm_defs.c index a986f37877..e0e6bb2075 100644 --- a/VEX/priv/host_arm_defs.c +++ b/VEX/priv/host_arm_defs.c @@ -68,6 +68,7 @@ const RRegUniverse* getRRegUniverse_ARM ( void ) /* Callee saves ones are listed first, since we prefer them if they're available. */ + ru->allocable_start[HRcInt32] = ru->size; ru->regs[ru->size++] = hregARM_R4(); ru->regs[ru->size++] = hregARM_R5(); ru->regs[ru->size++] = hregARM_R6(); @@ -80,24 +81,34 @@ const RRegUniverse* getRRegUniverse_ARM ( void ) ru->regs[ru->size++] = hregARM_R2(); ru->regs[ru->size++] = hregARM_R3(); ru->regs[ru->size++] = hregARM_R9(); + ru->allocable_end[HRcInt32] = ru->size - 1; + /* FP registers. Note: these are all callee-save. Yay! Hence we don't need to mention them as trashed in getHRegUsage for ARMInstr_Call. */ + ru->allocable_start[HRcFlt64] = ru->size; ru->regs[ru->size++] = hregARM_D8(); ru->regs[ru->size++] = hregARM_D9(); ru->regs[ru->size++] = hregARM_D10(); ru->regs[ru->size++] = hregARM_D11(); ru->regs[ru->size++] = hregARM_D12(); + ru->allocable_end[HRcFlt64] = ru->size - 1; + + ru->allocable_start[HRcFlt32] = ru->size; ru->regs[ru->size++] = hregARM_S26(); ru->regs[ru->size++] = hregARM_S27(); ru->regs[ru->size++] = hregARM_S28(); ru->regs[ru->size++] = hregARM_S29(); ru->regs[ru->size++] = hregARM_S30(); + ru->allocable_end[HRcFlt32] = ru->size - 1; + + ru->allocable_start[HRcVec128] = ru->size; ru->regs[ru->size++] = hregARM_Q8(); ru->regs[ru->size++] = hregARM_Q9(); ru->regs[ru->size++] = hregARM_Q10(); ru->regs[ru->size++] = hregARM_Q11(); ru->regs[ru->size++] = hregARM_Q12(); + ru->allocable_end[HRcVec128] = ru->size - 1; ru->allocable = ru->size; /* And other regs, not available to the allocator. */ @@ -140,35 +151,30 @@ const RRegUniverse* getRRegUniverse_ARM ( void ) } -void ppHRegARM ( HReg reg ) { +UInt ppHRegARM ( HReg reg ) { Int r; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt32: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("r%d", r); - return; + return vex_printf("r%d", r); case HRcFlt64: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("d%d", r); - return; + return vex_printf("d%d", r); case HRcFlt32: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("s%d", r); - return; + return vex_printf("s%d", r); case HRcVec128: r = hregEncoding(reg); vassert(r >= 0 && r < 16); - vex_printf("q%d", r); - return; + return vex_printf("q%d", r); default: vpanic("ppHRegARM"); } @@ -2772,6 +2778,22 @@ void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +ARMInstr* genMove_ARM(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt32: + return ARMInstr_Mov(to, ARMRI84_R(from)); + case HRcFlt32: + return ARMInstr_VUnaryS(ARMvfpu_COPY, to, from); + case HRcFlt64: + return ARMInstr_VUnaryD(ARMvfpu_COPY, to, from); + case HRcVec128: + return ARMInstr_NUnary(ARMneon_COPY, to, from, 4, False); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_ARM: unimplemented regclass"); + } +} /* Emit an instruction into buf and return the number of bytes used. Note that buf is not the insn's final place, and therefore it is diff --git a/VEX/priv/host_arm_defs.h b/VEX/priv/host_arm_defs.h index 19c429993f..ec6358ee15 100644 --- a/VEX/priv/host_arm_defs.h +++ b/VEX/priv/host_arm_defs.h @@ -81,7 +81,7 @@ ST_IN HReg hregARM_Q14 ( void ) { return mkHReg(False, HRcVec128, 14, 32); } ST_IN HReg hregARM_Q15 ( void ) { return mkHReg(False, HRcVec128, 15, 33); } #undef ST_IN -extern void ppHRegARM ( HReg ); +extern UInt ppHRegARM ( HReg ); /* Number of registers used arg passing in function calls */ #define ARM_N_ARGREGS 4 /* r0, r1, r2, r3 */ @@ -1070,6 +1070,7 @@ extern void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); extern void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); +extern ARMInstr* genMove_ARM(HReg from, HReg to, Bool); extern const RRegUniverse* getRRegUniverse_ARM ( void ); diff --git a/VEX/priv/host_generic_reg_alloc2.c b/VEX/priv/host_generic_reg_alloc2.c deleted file mode 100644 index 2294a9bcc9..0000000000 --- a/VEX/priv/host_generic_reg_alloc2.c +++ /dev/null @@ -1,1613 +0,0 @@ - -/*---------------------------------------------------------------*/ -/*--- begin host_reg_alloc2.c ---*/ -/*---------------------------------------------------------------*/ - -/* - This file is part of Valgrind, a dynamic binary instrumentation - framework. - - Copyright (C) 2004-2017 OpenWorks LLP - info@open-works.net - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. - - The GNU General Public License is contained in the file COPYING. - - Neither the names of the U.S. Department of Energy nor the - University of California nor the names of its contributors may be - used to endorse or promote products derived from this software - without prior written permission. -*/ - -#include "libvex_basictypes.h" -#include "libvex.h" - -#include "main_util.h" -#include "host_generic_regs.h" - -/* Set to 1 for lots of debugging output. */ -#define DEBUG_REGALLOC 0 - - -/* TODO 27 Oct 04: - - Better consistency checking from what isMove tells us. - - We can possibly do V-V coalescing even when the src is spilled, - providing we can arrange for the dst to have the same spill slot. - - Note that state[].hreg is the same as the available real regs. - - Generally rationalise data structures. */ - - -/* Records information on virtual register live ranges. Computed once - and remains unchanged after that. */ -typedef - struct { - /* Becomes live for the first time after this insn ... */ - Short live_after; - /* Becomes dead for the last time before this insn ... */ - Short dead_before; - /* The "home" spill slot, if needed. Never changes. */ - Short spill_offset; - Short spill_size; - /* What kind of register this is. */ - HRegClass reg_class; - } - VRegLR; - - -/* Records information on real-register live ranges. Computed once - and remains unchanged after that. */ -typedef - struct { - HReg rreg; - /* Becomes live after this insn ... */ - Short live_after; - /* Becomes dead before this insn ... */ - Short dead_before; - } - RRegLR; - - -/* An array of the following structs (rreg_state) comprises the - running state of the allocator. It indicates what the current - disposition of each allocatable real register is. The array gets - updated as the allocator processes instructions. The identity of - the register is not recorded here, because the index of this - structure in doRegisterAllocation()'s |rreg_state| is the index - number of the register, and the register itself can be extracted - from the RRegUniverse supplied to doRegisterAllocation(). */ -typedef - struct { - /* ------ FIELDS WHICH DO NOT CHANGE ------ */ - /* Is this involved in any HLRs? (only an optimisation hint) */ - Bool has_hlrs; - /* ------ FIELDS WHICH DO CHANGE ------ */ - /* 6 May 07: rearranged fields below so the whole struct fits - into 16 bytes on both x86 and amd64. */ - /* Used when .disp == Bound and we are looking for vregs to - spill. */ - Bool is_spill_cand; - /* Optimisation: used when .disp == Bound. Indicates when the - rreg has the same value as the spill slot for the associated - vreg. Is safely left at False, and becomes True after a - spill store or reload for this rreg. */ - Bool eq_spill_slot; - /* What's it's current disposition? */ - enum { Free, /* available for use */ - Unavail, /* in a real-reg live range */ - Bound /* in use (holding value of some vreg) */ - } - disp; - /* If .disp == Bound, what vreg is it bound to? */ - HReg vreg; - } - RRegState; - - -/* The allocator also maintains a redundant array of indexes - (vreg_state) from vreg numbers back to entries in rreg_state. It - is redundant because iff vreg_state[i] == j then - hregNumber(rreg_state[j].vreg) == i -- that is, the two entries - point at each other. The purpose of this is to speed up activities - which involve looking for a particular vreg: there is no need to - scan the rreg_state looking for it, just index directly into - vreg_state. The FAQ "does this vreg already have an associated - rreg" is the main beneficiary. - - To indicate, in vreg_state[i], that a given vreg is not currently - associated with any rreg, that entry can be set to INVALID_RREG_NO. - - Because the vreg_state entries are signed Shorts, the max number - of vregs that can be handed by regalloc is 32767. -*/ - -#define INVALID_RREG_NO ((Short)(-1)) - -#define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs) -#define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs) - - -/* Search forward from some given point in the incoming instruction - sequence. Point is to select a virtual register to spill, by - finding the vreg which is mentioned as far ahead as possible, in - the hope that this will minimise the number of consequent reloads. - - Only do the search for vregs which are Bound in the running state, - and for which the .is_spill_cand field is set. This allows the - caller to arbitrarily restrict the set of spill candidates to be - considered. - - To do this we don't actually need to see the incoming instruction - stream. Rather, what we need us the HRegUsage records for the - incoming instruction stream. Hence that is passed in. - - Returns an index into the state array indicating the (v,r) pair to - spill, or -1 if none was found. */ -static -Int findMostDistantlyMentionedVReg ( - HRegUsage* reg_usages_in, - Int search_from_instr, - Int num_instrs, - RRegState* state, - Int n_state -) -{ - Int k, m; - Int furthest_k = -1; - Int furthest = -1; - vassert(search_from_instr >= 0); - for (k = 0; k < n_state; k++) { - if (!state[k].is_spill_cand) - continue; - vassert(state[k].disp == Bound); - for (m = search_from_instr; m < num_instrs; m++) { - if (HRegUsage__contains(®_usages_in[m], state[k].vreg)) - break; - } - if (m > furthest) { - furthest = m; - furthest_k = k; - } - } - return furthest_k; -} - - -/* Check that this vreg has been assigned a sane spill offset. */ -inline -static void sanity_check_spill_offset ( VRegLR* vreg ) -{ - switch (vreg->reg_class) { - case HRcVec128: case HRcFlt64: - vassert(0 == ((UShort)vreg->spill_offset % 16)); break; - default: - vassert(0 == ((UShort)vreg->spill_offset % 8)); break; - } -} - - -/* Double the size of the real-reg live-range array, if needed. */ -__attribute__((noinline)) -static void ensureRRLRspace_SLOW ( RRegLR** info, Int* size, Int used ) -{ - Int k; - RRegLR* arr2; - if (0) - vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size); - vassert(used == *size); - arr2 = LibVEX_Alloc_inline(2 * *size * sizeof(RRegLR)); - for (k = 0; k < *size; k++) - arr2[k] = (*info)[k]; - *size *= 2; - *info = arr2; -} -inline -static void ensureRRLRspace ( RRegLR** info, Int* size, Int used ) -{ - if (LIKELY(used < *size)) return; - ensureRRLRspace_SLOW(info, size, used); -} - - -/* Sort an array of RRegLR entries by either the .live_after or - .dead_before fields. This is performance-critical. */ -static void sortRRLRarray ( RRegLR* arr, - Int size, Bool by_live_after ) -{ - Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, - 9841, 29524, 88573, 265720, - 797161, 2391484 }; - Int lo = 0; - Int hi = size-1; - Int i, j, h, bigN, hp; - RRegLR v; - - vassert(size >= 0); - if (size == 0) - return; - - bigN = hi - lo + 1; if (bigN < 2) return; - hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--; - - if (by_live_after) { - - for ( ; hp >= 0; hp--) { - h = incs[hp]; - for (i = lo + h; i <= hi; i++) { - v = arr[i]; - j = i; - while (arr[j-h].live_after > v.live_after) { - arr[j] = arr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - arr[j] = v; - } - } - - } else { - - for ( ; hp >= 0; hp--) { - h = incs[hp]; - for (i = lo + h; i <= hi; i++) { - v = arr[i]; - j = i; - while (arr[j-h].dead_before > v.dead_before) { - arr[j] = arr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - arr[j] = v; - } - } - - } -} - - -/* Compute the index of the highest and lowest 1 in a ULong, - respectively. Results are undefined if the argument is zero. - Don't pass it zero :) */ -static inline UInt ULong__maxIndex ( ULong w64 ) { - return 63 - __builtin_clzll(w64); -} - -static inline UInt ULong__minIndex ( ULong w64 ) { - return __builtin_ctzll(w64); -} - - -/* Vectorised memset, copied from Valgrind's m_libcbase.c. */ -static void* local_memset ( void *destV, Int c, SizeT sz ) -{ -# define IS_4_ALIGNED(aaa_p) (0 == (((HWord)(aaa_p)) & ((HWord)0x3))) - - UInt c4; - UChar* d = destV; - UChar uc = c; - - while ((!IS_4_ALIGNED(d)) && sz >= 1) { - d[0] = uc; - d++; - sz--; - } - if (sz == 0) - return destV; - c4 = uc; - c4 |= (c4 << 8); - c4 |= (c4 << 16); - while (sz >= 16) { - ((UInt*)d)[0] = c4; - ((UInt*)d)[1] = c4; - ((UInt*)d)[2] = c4; - ((UInt*)d)[3] = c4; - d += 16; - sz -= 16; - } - while (sz >= 4) { - ((UInt*)d)[0] = c4; - d += 4; - sz -= 4; - } - while (sz >= 1) { - d[0] = c; - d++; - sz--; - } - return destV; - -# undef IS_4_ALIGNED -} - - -/* A target-independent register allocator. Requires various - functions which it uses to deal abstractly with instructions and - registers, since it cannot have any target-specific knowledge. - - Returns a new list of instructions, which, as a result of the - behaviour of mapRegs, will be in-place modifications of the - original instructions. - - Requires that the incoming code has been generated using - vreg numbers 0, 1 .. n_vregs-1. Appearance of a vreg outside - that range is a checked run-time error. - - Takes an expandable array of pointers to unallocated insns. - Returns an expandable array of pointers to allocated insns. -*/ -HInstrArray* doRegisterAllocation ( - - /* Incoming virtual-registerised code. */ - HInstrArray* instrs_in, - - /* The real-register universe to use. This contains facts about - real registers, one of which is the set of registers available - for allocation. */ - const RRegUniverse* univ, - - /* Return True iff the given insn is a reg-reg move, in which - case also return the src and dst regs. */ - Bool (*isMove) ( const HInstr*, HReg*, HReg* ), - - /* Get info about register usage in this insn. */ - void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ), - - /* Apply a reg-reg mapping to an insn. */ - void (*mapRegs) ( HRegRemap*, HInstr*, Bool ), - - /* Return one, or, if we're unlucky, two insn(s) to spill/restore a - real reg to a spill slot byte offset. The two leading HInstr** - args are out parameters, through which the generated insns are - returned. Also (optionally) a 'directReload' function, which - attempts to replace a given instruction by one which reads - directly from a specified spill slot. May be NULL, in which - case the optimisation is not attempted. */ - void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ), - void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ), - HInstr* (*directReload) ( HInstr*, HReg, Short ), - Int guest_sizeB, - - /* For debug printing only. */ - void (*ppInstr) ( const HInstr*, Bool ), - void (*ppReg) ( HReg ), - - /* 32/64bit mode */ - Bool mode64 -) -{ -# define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8) - - const Bool eq_spill_opt = True; - - /* Info on vregs and rregs. Computed once and remains - unchanged. */ - Int n_vregs; - VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */ - - /* We keep two copies of the real-reg live range info, one sorted - by .live_after and the other by .dead_before. First the - unsorted info is created in the _la variant is copied into the - _db variant. Once that's done both of them are sorted. - We also need two integer cursors which record the next - location in the two arrays to consider. */ - RRegLR* rreg_lrs_la; - RRegLR* rreg_lrs_db; - Int rreg_lrs_size; - Int rreg_lrs_used; - Int rreg_lrs_la_next; - Int rreg_lrs_db_next; - - /* Info on register usage in the incoming instruction array. - Computed once and remains unchanged, more or less; updated - sometimes by the direct-reload optimisation. */ - HRegUsage* reg_usage_arr; /* [0 .. instrs_in->arr_used-1] */ - - /* Used when constructing vreg_lrs (for allocating stack - slots). */ - Short ss_busy_until_before[N_SPILL64S]; - - /* Used when constructing rreg_lrs. */ - Int* rreg_live_after; - Int* rreg_dead_before; - - /* Running state of the core allocation algorithm. */ - RRegState* rreg_state; /* [0 .. n_rregs-1] */ - Int n_rregs; - - /* .. and the redundant backward map */ - /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO. - This implies n_rregs must be <= 32768. */ - Short* vreg_state; /* [0 .. n_vregs-1] */ - - /* The vreg -> rreg map constructed and then applied to each - instr. */ - HRegRemap remap; - - /* The output array of instructions. */ - HInstrArray* instrs_out; - - /* Sanity checks are expensive. They are only done periodically, - not at each insn processed. */ - Bool do_sanity_check; - - vassert(0 == (guest_sizeB % LibVEX_GUEST_STATE_ALIGN)); - vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN)); - vassert(0 == (N_SPILL64S % 2)); - - /* The live range numbers are signed shorts, and so limiting the - number of insns to 15000 comfortably guards against them - overflowing 32k. */ - vassert(instrs_in->arr_used <= 15000); - -# define INVALID_INSTRNO (-2) - -# define EMIT_INSTR(_instr) \ - do { \ - HInstr* _tmp = (_instr); \ - if (DEBUG_REGALLOC) { \ - vex_printf("** "); \ - (*ppInstr)(_tmp, mode64); \ - vex_printf("\n\n"); \ - } \ - addHInstr ( instrs_out, _tmp ); \ - } while (0) - -# define PRINT_STATE \ - do { \ - Int z, q; \ - for (z = 0; z < n_rregs; z++) { \ - vex_printf(" rreg_state[%2d] = ", z); \ - (*ppReg)(univ->regs[z]); \ - vex_printf(" \t"); \ - switch (rreg_state[z].disp) { \ - case Free: vex_printf("Free\n"); break; \ - case Unavail: vex_printf("Unavail\n"); break; \ - case Bound: vex_printf("BoundTo "); \ - (*ppReg)(rreg_state[z].vreg); \ - vex_printf("\n"); break; \ - } \ - } \ - vex_printf("\n vreg_state[0 .. %d]:\n ", n_vregs-1); \ - q = 0; \ - for (z = 0; z < n_vregs; z++) { \ - if (vreg_state[z] == INVALID_RREG_NO) \ - continue; \ - vex_printf("[%d] -> %d ", z, vreg_state[z]); \ - q++; \ - if (q > 0 && (q % 6) == 0) \ - vex_printf("\n "); \ - } \ - vex_printf("\n"); \ - } while (0) - - - /* --------- Stage 0: set up output array --------- */ - /* --------- and allocate/initialise running state. --------- */ - - instrs_out = newHInstrArray(); - - /* ... and initialise running state. */ - /* n_rregs is no more than a short name for n_available_real_regs. */ - n_rregs = univ->allocable; - n_vregs = instrs_in->n_vregs; - - /* If this is not so, vreg_state entries will overflow. */ - vassert(n_vregs < 32767); - - /* If this is not so, the universe we have is nonsensical. */ - vassert(n_rregs > 0); - - rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState)); - vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(Short)); - - for (Int j = 0; j < n_rregs; j++) { - rreg_state[j].has_hlrs = False; - rreg_state[j].disp = Free; - rreg_state[j].vreg = INVALID_HREG; - rreg_state[j].is_spill_cand = False; - rreg_state[j].eq_spill_slot = False; - } - - for (Int j = 0; j < n_vregs; j++) - vreg_state[j] = INVALID_RREG_NO; - - - /* --------- Stage 1: compute vreg live ranges. --------- */ - /* --------- Stage 2: compute rreg live ranges. --------- */ - - /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */ - - /* This is relatively simple, because (1) we only seek the complete - end-to-end live range of each vreg, and are not interested in - any holes in it, and (2) the vregs are conveniently numbered 0 - .. n_vregs-1, so we can just dump the results in a - pre-allocated array. */ - - vreg_lrs = NULL; - if (n_vregs > 0) - vreg_lrs = LibVEX_Alloc_inline(sizeof(VRegLR) * n_vregs); - - for (Int j = 0; j < n_vregs; j++) { - vreg_lrs[j].live_after = INVALID_INSTRNO; - vreg_lrs[j].dead_before = INVALID_INSTRNO; - vreg_lrs[j].spill_offset = 0; - vreg_lrs[j].spill_size = 0; - vreg_lrs[j].reg_class = HRcINVALID; - } - - /* An array to hold the reg-usage info for the incoming - instructions. */ - reg_usage_arr = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used); - - /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */ - - /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */ - - /* This is more complex than Stage 1, because we need to compute - exactly all the live ranges of all the allocatable real regs, - and we don't know in advance how many there will be. */ - - rreg_lrs_used = 0; - rreg_lrs_size = 4; - rreg_lrs_la = LibVEX_Alloc_inline(rreg_lrs_size * sizeof(RRegLR)); - rreg_lrs_db = NULL; /* we'll create this later */ - - /* We'll need to track live range start/end points seperately for - each rreg. Sigh. */ - vassert(n_rregs > 0); - rreg_live_after = LibVEX_Alloc_inline(n_rregs * sizeof(Int)); - rreg_dead_before = LibVEX_Alloc_inline(n_rregs * sizeof(Int)); - - for (Int j = 0; j < n_rregs; j++) { - rreg_live_after[j] = - rreg_dead_before[j] = INVALID_INSTRNO; - } - - /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */ - - /* ------ start of ITERATE OVER INSNS ------ */ - - for (Int ii = 0; ii < instrs_in->arr_used; ii++) { - - (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 ); - - if (0) { - vex_printf("\n%d stage1: ", ii); - (*ppInstr)(instrs_in->arr[ii], mode64); - vex_printf("\n"); - ppHRegUsage(univ, ®_usage_arr[ii]); - } - - /* ------ start of DEAL WITH VREG LIVE RANGES ------ */ - - /* for each virtual reg mentioned in the insn ... */ - for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) { - - HReg vreg = reg_usage_arr[ii].vRegs[j]; - vassert(hregIsVirtual(vreg)); - - Int k = hregIndex(vreg); - if (k < 0 || k >= n_vregs) { - vex_printf("\n"); - (*ppInstr)(instrs_in->arr[ii], mode64); - vex_printf("\n"); - vex_printf("vreg %d, n_vregs %d\n", k, n_vregs); - vpanic("doRegisterAllocation: out-of-range vreg"); - } - - /* Take the opportunity to note its regclass. We'll need - that when allocating spill slots. */ - if (vreg_lrs[k].reg_class == HRcINVALID) { - /* First mention of this vreg. */ - vreg_lrs[k].reg_class = hregClass(vreg); - } else { - /* Seen it before, so check for consistency. */ - vassert(vreg_lrs[k].reg_class == hregClass(vreg)); - } - - /* Now consider live ranges. */ - switch (reg_usage_arr[ii].vMode[j]) { - case HRmRead: - if (vreg_lrs[k].live_after == INVALID_INSTRNO) { - vex_printf("\n\nOFFENDING VREG = %d\n", k); - vpanic("doRegisterAllocation: " - "first event for vreg is Read"); - } - vreg_lrs[k].dead_before = toShort(ii + 1); - break; - case HRmWrite: - if (vreg_lrs[k].live_after == INVALID_INSTRNO) - vreg_lrs[k].live_after = toShort(ii); - vreg_lrs[k].dead_before = toShort(ii + 1); - break; - case HRmModify: - if (vreg_lrs[k].live_after == INVALID_INSTRNO) { - vex_printf("\n\nOFFENDING VREG = %d\n", k); - vpanic("doRegisterAllocation: " - "first event for vreg is Modify"); - } - vreg_lrs[k].dead_before = toShort(ii + 1); - break; - default: - vpanic("doRegisterAllocation(1)"); - } /* switch */ - - } /* iterate over virtual registers */ - - /* ------ end of DEAL WITH VREG LIVE RANGES ------ */ - - /* ------ start of DEAL WITH RREG LIVE RANGES ------ */ - - /* If this doesn't hold, the following iteration over real registers - will fail miserably. */ - vassert(N_RREGUNIVERSE_REGS == 64); - - const ULong rRead = reg_usage_arr[ii].rRead; - const ULong rWritten = reg_usage_arr[ii].rWritten; - const ULong rMentioned = rRead | rWritten; - - UInt rReg_minIndex; - UInt rReg_maxIndex; - if (rMentioned == 0) { - /* There are no real register uses in this insn. Set - rReg_{min,max}Index so that the following loop doesn't iterate - at all, so as to avoid wasting time. */ - rReg_minIndex = 1; - rReg_maxIndex = 0; - } else { - rReg_minIndex = ULong__minIndex(rMentioned); - rReg_maxIndex = ULong__maxIndex(rMentioned); - /* Don't bother to look at registers which are not available - to the allocator. We asserted above that n_rregs > 0, so - n_rregs-1 is safe. */ - if (rReg_maxIndex >= n_rregs) - rReg_maxIndex = n_rregs-1; - } - - /* for each allocator-available real reg mentioned in the insn ... */ - /* Note. We are allocating only over the real regs available to - the allocator. Others, eg the stack or baseblock pointers, - are unavailable to allocation and so we never visit them. - Hence the iteration is cut off at n_rregs-1, since n_rregs == - univ->allocable. */ - for (Int j = rReg_minIndex; j <= rReg_maxIndex; j++) { - - const ULong jMask = 1ULL << j; - if (LIKELY((rMentioned & jMask) == 0)) - continue; - - const Bool isR = (rRead & jMask) != 0; - const Bool isW = (rWritten & jMask) != 0; - - /* Dummy initialisations of flush_la and flush_db to avoid - possible bogus uninit-var warnings from gcc. */ - Int flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO; - Bool flush = False; - - if (isW && !isR) { - flush_la = rreg_live_after[j]; - flush_db = rreg_dead_before[j]; - if (flush_la != INVALID_INSTRNO && flush_db != INVALID_INSTRNO) - flush = True; - rreg_live_after[j] = ii; - rreg_dead_before[j] = ii+1; - } else if (!isW && isR) { - if (rreg_live_after[j] == INVALID_INSTRNO) { - vex_printf("\nOFFENDING RREG = "); - (*ppReg)(univ->regs[j]); - vex_printf("\n"); - vex_printf("\nOFFENDING instr = "); - (*ppInstr)(instrs_in->arr[ii], mode64); - vex_printf("\n"); - vpanic("doRegisterAllocation: " - "first event for rreg is Read"); - } - rreg_dead_before[j] = ii+1; - } else { - vassert(isR && isW); - if (rreg_live_after[j] == INVALID_INSTRNO) { - vex_printf("\nOFFENDING RREG = "); - (*ppReg)(univ->regs[j]); - vex_printf("\n"); - vex_printf("\nOFFENDING instr = "); - (*ppInstr)(instrs_in->arr[ii], mode64); - vex_printf("\n"); - vpanic("doRegisterAllocation: " - "first event for rreg is Modify"); - } - rreg_dead_before[j] = ii+1; - } - - if (flush) { - vassert(flush_la != INVALID_INSTRNO); - vassert(flush_db != INVALID_INSTRNO); - ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); - if (0) - vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db); - rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j]; - rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la); - rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db); - rreg_lrs_used++; - } - - } /* iterate over rregs in the instr */ - - /* ------ end of DEAL WITH RREG LIVE RANGES ------ */ - - } /* iterate over insns */ - - /* ------ end of ITERATE OVER INSNS ------ */ - - /* ------ start of FINALISE RREG LIVE RANGES ------ */ - - /* Now finish up any live ranges left over. */ - for (Int j = 0; j < n_rregs; j++) { - - if (0) { - vex_printf("residual %d: %d %d\n", j, rreg_live_after[j], - rreg_dead_before[j]); - } - vassert( (rreg_live_after[j] == INVALID_INSTRNO - && rreg_dead_before[j] == INVALID_INSTRNO) - || - (rreg_live_after[j] != INVALID_INSTRNO - && rreg_dead_before[j] != INVALID_INSTRNO) - ); - - if (rreg_live_after[j] == INVALID_INSTRNO) - continue; - - ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used); - if (0) - vex_printf("FLUSH 2 (%d,%d)\n", - rreg_live_after[j], rreg_dead_before[j]); - rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j]; - rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]); - rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]); - rreg_lrs_used++; - } - - /* Compute summary hints for choosing real regs. If a real reg is - involved in a hard live range, record that fact in the fixed - part of the running rreg_state. Later, when offered a choice between - rregs, it's better to choose one which is not marked as having - any HLRs, since ones with HLRs may need to be spilled around - their HLRs. Correctness of final assignment is unaffected by - this mechanism -- it is only an optimisation. */ - - for (Int j = 0; j < rreg_lrs_used; j++) { - HReg rreg = rreg_lrs_la[j].rreg; - vassert(!hregIsVirtual(rreg)); - /* rreg is involved in a HLR. Record this info in the array, if - there is space. */ - UInt ix = hregIndex(rreg); - vassert(ix < n_rregs); - rreg_state[ix].has_hlrs = True; - } - if (0) { - for (Int j = 0; j < n_rregs; j++) { - if (!rreg_state[j].has_hlrs) - continue; - ppReg(univ->regs[j]); - vex_printf(" hinted\n"); - } - } - - /* Finally, copy the _la variant into the _db variant and - sort both by their respective fields. */ - rreg_lrs_db = LibVEX_Alloc_inline(rreg_lrs_used * sizeof(RRegLR)); - for (Int j = 0; j < rreg_lrs_used; j++) - rreg_lrs_db[j] = rreg_lrs_la[j]; - - sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/ ); - sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ ); - - /* And set up the cursors. */ - rreg_lrs_la_next = 0; - rreg_lrs_db_next = 0; - - for (Int j = 1; j < rreg_lrs_used; j++) { - vassert(rreg_lrs_la[j-1].live_after <= rreg_lrs_la[j].live_after); - vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before); - } - - /* ------ end of FINALISE RREG LIVE RANGES ------ */ - - if (DEBUG_REGALLOC) { - for (Int j = 0; j < n_vregs; j++) { - vex_printf("vreg %d: la = %d, db = %d\n", - j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before ); - } - } - - if (DEBUG_REGALLOC) { - vex_printf("RRegLRs by LA:\n"); - for (Int j = 0; j < rreg_lrs_used; j++) { - vex_printf(" "); - (*ppReg)(rreg_lrs_la[j].rreg); - vex_printf(" la = %d, db = %d\n", - rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before ); - } - vex_printf("RRegLRs by DB:\n"); - for (Int j = 0; j < rreg_lrs_used; j++) { - vex_printf(" "); - (*ppReg)(rreg_lrs_db[j].rreg); - vex_printf(" la = %d, db = %d\n", - rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before ); - } - } - - /* --------- Stage 3: allocate spill slots. --------- */ - - /* Each spill slot is 8 bytes long. For vregs which take more than - 64 bits to spill (classes Flt64 and Vec128), we have to allocate - two consecutive spill slots. For 256 bit registers (class - Vec256), we have to allocate four consecutive spill slots. - - For Vec128-class on PowerPC, the spill slot's actual address - must be 16-byte aligned. Since the spill slot's address is - computed as an offset from the guest state pointer, and since - the user of the generated code must set that pointer to a - 32-aligned value, we have the residual obligation here of - choosing a 16-aligned spill slot offset for Vec128-class values. - Since each spill slot is 8 bytes long, that means for - Vec128-class values we must allocated a spill slot number which - is zero mod 2. - - Similarly, for Vec256 class on amd64, find a spill slot number - which is zero mod 4. This guarantees it will be 32 byte - aligned, which isn't actually necessary on amd64 (we use movUpd - etc to spill), but seems like good practice. - - Do a rank-based allocation of vregs to spill slot numbers. We - put as few values as possible in spill slots, but nevertheless - need to have a spill slot available for all vregs, just in case. - */ - /* Int max_ss_no = -1; */ - - local_memset(ss_busy_until_before, 0, sizeof(ss_busy_until_before)); - - for (Int j = 0; j < n_vregs; j++) { - - /* True iff this vreg is unused. In which case we also expect - that the reg_class field for it has not been set. */ - if (vreg_lrs[j].live_after == INVALID_INSTRNO) { - vassert(vreg_lrs[j].reg_class == HRcINVALID); - continue; - } - - /* The spill slots are 64 bits in size. As per the comment on - definition of HRegClass in host_generic_regs.h, that means, - to spill a vreg of class Flt64 or Vec128, we'll need to find - two adjacent spill slots to use. For Vec256, we'll need to - find four adjacent slots to use. Note, this logic needs to - kept in sync with the size info on the definition of - HRegClass. */ - Int ss_no = -1; - switch (vreg_lrs[j].reg_class) { - - case HRcVec128: case HRcFlt64: - /* Find two adjacent free slots in which between them - provide up to 128 bits in which to spill the vreg. - Since we are trying to find an even:odd pair, move - along in steps of 2 (slots). */ - for (ss_no = 0; ss_no < N_SPILL64S-1; ss_no += 2) - if (ss_busy_until_before[ss_no+0] <= vreg_lrs[j].live_after - && ss_busy_until_before[ss_no+1] <= vreg_lrs[j].live_after) - break; - if (ss_no >= N_SPILL64S-1) { - vpanic("LibVEX_N_SPILL_BYTES is too low. " - "Increase and recompile."); - } - ss_busy_until_before[ss_no+0] = vreg_lrs[j].dead_before; - ss_busy_until_before[ss_no+1] = vreg_lrs[j].dead_before; - break; - - default: - /* The ordinary case -- just find a single spill slot. */ - /* Find the lowest-numbered spill slot which is available - at the start point of this interval, and assign the - interval to it. */ - for (ss_no = 0; ss_no < N_SPILL64S; ss_no++) - if (ss_busy_until_before[ss_no] <= vreg_lrs[j].live_after) - break; - if (ss_no == N_SPILL64S) { - vpanic("LibVEX_N_SPILL_BYTES is too low. " - "Increase and recompile."); - } - ss_busy_until_before[ss_no] = vreg_lrs[j].dead_before; - break; - - } /* switch (vreg_lrs[j].reg_class) */ - - /* This reflects LibVEX's hard-wired knowledge of the baseBlock - layout: the guest state, then two equal sized areas following - it for two sets of shadow state, and then the spill area. */ - vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + ss_no * 8); - - /* Independent check that we've made a sane choice of slot */ - sanity_check_spill_offset( &vreg_lrs[j] ); - /* if (j > max_ss_no) */ - /* max_ss_no = j; */ - } - - if (0) { - vex_printf("\n\n"); - for (Int j = 0; j < n_vregs; j++) - vex_printf("vreg %d --> spill offset %d\n", - j, vreg_lrs[j].spill_offset); - } - - /* --------- Stage 4: establish rreg preferences --------- */ - - /* It may be advantageous to allocating certain vregs to specific - rregs, as a way of avoiding reg-reg moves later. Here we - establish which, if any, rreg each vreg would prefer to be in. - Note that this constrains the allocator -- ideally we end up - with as few as possible vregs expressing a preference. - - This is an optimisation: if the .preferred_rreg field is never - set to anything different from INVALID_HREG, the allocator still - works. */ - - /* 30 Dec 04: removed this mechanism as it does not seem to - help. */ - - /* --------- Stage 5: process instructions --------- */ - - /* This is the main loop of the allocator. First, we need to - correctly set up our running state, which tracks the status of - each real register. */ - - /* ------ BEGIN: Process each insn in turn. ------ */ - - for (Int ii = 0; ii < instrs_in->arr_used; ii++) { - - if (DEBUG_REGALLOC) { - vex_printf("\n====----====---- Insn %d ----====----====\n", ii); - vex_printf("---- "); - (*ppInstr)(instrs_in->arr[ii], mode64); - vex_printf("\n\nInitial state:\n"); - PRINT_STATE; - vex_printf("\n"); - } - - /* ------------ Sanity checks ------------ */ - - /* Sanity checks are expensive. So they are done only once - every 17 instructions, and just before the last - instruction. */ - do_sanity_check - = toBool( - False /* Set to True for sanity checking of all insns. */ - || ii == instrs_in->arr_used-1 - || (ii > 0 && (ii % 17) == 0) - ); - - if (do_sanity_check) { - - /* Sanity check 1: all rregs with a hard live range crossing - this insn must be marked as unavailable in the running - state. */ - for (Int j = 0; j < rreg_lrs_used; j++) { - if (rreg_lrs_la[j].live_after < ii - && ii < rreg_lrs_la[j].dead_before) { - /* ii is the middle of a hard live range for some real - reg. Check it's marked as such in the running - state. */ - HReg reg = rreg_lrs_la[j].rreg; - - if (0) { - vex_printf("considering la %d .. db %d reg = ", - rreg_lrs_la[j].live_after, - rreg_lrs_la[j].dead_before); - (*ppReg)(reg); - vex_printf("\n"); - } - - /* assert that this rreg is marked as unavailable */ - vassert(!hregIsVirtual(reg)); - vassert(rreg_state[hregIndex(reg)].disp == Unavail); - } - } - - /* Sanity check 2: conversely, all rregs marked as - unavailable in the running rreg_state must have a - corresponding hard live range entry in the rreg_lrs - array. */ - for (Int j = 0; j < n_rregs; j++) { - vassert(rreg_state[j].disp == Bound - || rreg_state[j].disp == Free - || rreg_state[j].disp == Unavail); - if (rreg_state[j].disp != Unavail) - continue; - Int k; - for (k = 0; k < rreg_lrs_used; k++) { - HReg reg = rreg_lrs_la[k].rreg; - vassert(!hregIsVirtual(reg)); - if (hregIndex(reg) == j - && rreg_lrs_la[k].live_after < ii - && ii < rreg_lrs_la[k].dead_before) - break; - } - /* If this vassertion fails, we couldn't find a - corresponding HLR. */ - vassert(k < rreg_lrs_used); - } - - /* Sanity check 3: all vreg-rreg bindings must bind registers - of the same class. */ - for (Int j = 0; j < n_rregs; j++) { - if (rreg_state[j].disp != Bound) { - vassert(rreg_state[j].eq_spill_slot == False); - continue; - } - vassert(hregClass(univ->regs[j]) - == hregClass(rreg_state[j].vreg)); - vassert( hregIsVirtual(rreg_state[j].vreg)); - } - - /* Sanity check 4: the vreg_state and rreg_state - mutually-redundant mappings are consistent. If - rreg_state[j].vreg points at some vreg_state entry then - that vreg_state entry should point back at - rreg_state[j]. */ - for (Int j = 0; j < n_rregs; j++) { - if (rreg_state[j].disp != Bound) - continue; - Int k = hregIndex(rreg_state[j].vreg); - vassert(IS_VALID_VREGNO(k)); - vassert(vreg_state[k] == j); - } - for (Int j = 0; j < n_vregs; j++) { - Int k = vreg_state[j]; - if (k == INVALID_RREG_NO) - continue; - vassert(IS_VALID_RREGNO(k)); - vassert(rreg_state[k].disp == Bound); - vassert(hregIndex(rreg_state[k].vreg) == j); - } - - } /* if (do_sanity_check) */ - - /* ------------ end of Sanity checks ------------ */ - - /* Do various optimisations pertaining to register coalescing - and preferencing: - MOV v <-> v coalescing (done here). - MOV v <-> r coalescing (not yet, if ever) - */ - /* If doing a reg-reg move between two vregs, and the src's live - range ends here and the dst's live range starts here, bind - the dst to the src's rreg, and that's all. */ - HReg vregS = INVALID_HREG; - HReg vregD = INVALID_HREG; - if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) { - if (!hregIsVirtual(vregS)) goto cannot_coalesce; - if (!hregIsVirtual(vregD)) goto cannot_coalesce; - /* Check that *isMove is not telling us a bunch of lies ... */ - vassert(hregClass(vregS) == hregClass(vregD)); - Int k = hregIndex(vregS); - Int m = hregIndex(vregD); - vassert(IS_VALID_VREGNO(k)); - vassert(IS_VALID_VREGNO(m)); - if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce; - if (vreg_lrs[m].live_after != ii) goto cannot_coalesce; - if (DEBUG_REGALLOC) { - vex_printf("COALESCE "); - (*ppReg)(vregS); - vex_printf(" -> "); - (*ppReg)(vregD); - vex_printf("\n\n"); - } - /* Find the state entry for vregS. */ - Int n = vreg_state[k]; /* k is the index of vregS */ - if (n == INVALID_RREG_NO) { - /* vregS is not currently in a real register. So we can't - do the coalescing. Give up. */ - goto cannot_coalesce; - } - vassert(IS_VALID_RREGNO(n)); - - /* Finally, we can do the coalescing. It's trivial -- merely - claim vregS's register for vregD. */ - rreg_state[n].vreg = vregD; - vassert(IS_VALID_VREGNO(hregIndex(vregD))); - vassert(IS_VALID_VREGNO(hregIndex(vregS))); - vreg_state[hregIndex(vregD)] = toShort(n); - vreg_state[hregIndex(vregS)] = INVALID_RREG_NO; - - /* This rreg has become associated with a different vreg and - hence with a different spill slot. Play safe. */ - rreg_state[n].eq_spill_slot = False; - - /* Move on to the next insn. We skip the post-insn stuff for - fixed registers, since this move should not interact with - them in any way. */ - continue; - } - cannot_coalesce: - - /* ------ Free up rregs bound to dead vregs ------ */ - - /* Look for vregs whose live range has just ended, and - mark the associated rreg as free. */ - - for (Int j = 0; j < n_rregs; j++) { - if (rreg_state[j].disp != Bound) - continue; - UInt vregno = hregIndex(rreg_state[j].vreg); - vassert(IS_VALID_VREGNO(vregno)); - if (vreg_lrs[vregno].dead_before <= ii) { - rreg_state[j].disp = Free; - rreg_state[j].eq_spill_slot = False; - Int m = hregIndex(rreg_state[j].vreg); - vassert(IS_VALID_VREGNO(m)); - vreg_state[m] = INVALID_RREG_NO; - if (DEBUG_REGALLOC) { - vex_printf("free up "); - (*ppReg)(univ->regs[j]); - vex_printf("\n"); - } - } - } - - /* ------ Pre-instruction actions for fixed rreg uses ------ */ - - /* Now we have to deal with rregs which are about to be made - live by this instruction -- in other words, are entering into - one of their live ranges. If any such rreg holds a vreg, we - will have to free up the rreg. The simplest solution which - is correct is to spill the rreg. - - Note we could do better: - * Could move it into some other free rreg, if one is available - - Do this efficiently, by incrementally stepping along an array - of rreg HLRs that are known to be sorted by start point - (their .live_after field). - */ - while (True) { - vassert(rreg_lrs_la_next >= 0); - vassert(rreg_lrs_la_next <= rreg_lrs_used); - if (rreg_lrs_la_next == rreg_lrs_used) - break; /* no more real reg live ranges to consider */ - if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after) - break; /* next live range does not yet start */ - vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after); - /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up. - Find the associated rreg_state entry. */ - /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after. - Real register live ranges are guaranteed to be well-formed - in that they start with a write to the register -- Stage 2 - rejects any code not satisfying this. So the correct - question to ask is whether - rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is, - whether the reg becomes live after this insn -- rather - than before it. */ - if (DEBUG_REGALLOC) { - vex_printf("need to free up rreg: "); - (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg); - vex_printf("\n\n"); - } - Int k = hregIndex(rreg_lrs_la[rreg_lrs_la_next].rreg); - - /* If this fails, we don't have an entry for this rreg. - Which we should. */ - vassert(IS_VALID_RREGNO(k)); - Int m = hregIndex(rreg_state[k].vreg); - if (rreg_state[k].disp == Bound) { - /* Yes, there is an associated vreg. Spill it if it's - still live. */ - vassert(IS_VALID_VREGNO(m)); - vreg_state[m] = INVALID_RREG_NO; - if (vreg_lrs[m].dead_before > ii) { - vassert(vreg_lrs[m].reg_class != HRcINVALID); - if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) { - HInstr* spill1 = NULL; - HInstr* spill2 = NULL; - (*genSpill)( &spill1, &spill2, univ->regs[k], - vreg_lrs[m].spill_offset, mode64 ); - vassert(spill1 || spill2); /* can't both be NULL */ - if (spill1) - EMIT_INSTR(spill1); - if (spill2) - EMIT_INSTR(spill2); - } - rreg_state[k].eq_spill_slot = True; - } - } - rreg_state[k].disp = Unavail; - rreg_state[k].vreg = INVALID_HREG; - rreg_state[k].eq_spill_slot = False; - - /* check for further rregs entering HLRs at this point */ - rreg_lrs_la_next++; - } - - if (DEBUG_REGALLOC) { - vex_printf("After pre-insn actions for fixed regs:\n"); - PRINT_STATE; - vex_printf("\n"); - } - - /* ------ Deal with the current instruction. ------ */ - - /* Finally we can begin the processing of this instruction - itself. The aim is to free up enough rregs for this insn. - This may generate spill stores since we may have to evict - some vregs currently in rregs. Also generates spill loads. - We also build up the final vreg->rreg mapping to be applied - to the insn. */ - - initHRegRemap(&remap); - - /* ------------ BEGIN directReload optimisation ----------- */ - - /* If the instruction reads exactly one vreg which is currently - in a spill slot, and this is last use of that vreg, see if we - can convert the instruction into one that reads directly from - the spill slot. This is clearly only possible for x86 and - amd64 targets, since ppc and arm are load-store - architectures. If successful, replace instrs_in->arr[ii] - with this new instruction, and recompute its reg usage, so - that the change is invisible to the standard-case handling - that follows. */ - - if (directReload && reg_usage_arr[ii].n_vRegs <= 2) { - Bool debug_direct_reload = False; - HReg cand = INVALID_HREG; - Bool nreads = 0; - Short spilloff = 0; - - for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) { - - HReg vreg = reg_usage_arr[ii].vRegs[j]; - vassert(hregIsVirtual(vreg)); - - if (reg_usage_arr[ii].vMode[j] == HRmRead) { - nreads++; - Int m = hregIndex(vreg); - vassert(IS_VALID_VREGNO(m)); - Int k = vreg_state[m]; - if (!IS_VALID_RREGNO(k)) { - /* ok, it is spilled. Now, is this its last use? */ - vassert(vreg_lrs[m].dead_before >= ii+1); - if (vreg_lrs[m].dead_before == ii+1 - && hregIsInvalid(cand)) { - spilloff = vreg_lrs[m].spill_offset; - cand = vreg; - } - } - } - } - - if (nreads == 1 && ! hregIsInvalid(cand)) { - HInstr* reloaded; - if (reg_usage_arr[ii].n_vRegs == 2) - vassert(! sameHReg(reg_usage_arr[ii].vRegs[0], - reg_usage_arr[ii].vRegs[1])); - - reloaded = directReload ( instrs_in->arr[ii], cand, spilloff ); - if (debug_direct_reload && !reloaded) { - vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" "); - ppInstr(instrs_in->arr[ii], mode64); - } - if (reloaded) { - /* Update info about the insn, so it looks as if it had - been in this form all along. */ - instrs_in->arr[ii] = reloaded; - (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 ); - if (debug_direct_reload && !reloaded) { - vex_printf(" --> "); - ppInstr(reloaded, mode64); - } - } - - if (debug_direct_reload && !reloaded) - vex_printf("\n"); - } - - } - - /* ------------ END directReload optimisation ------------ */ - - /* for each virtual reg mentioned in the insn ... */ - for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) { - - HReg vreg = reg_usage_arr[ii].vRegs[j]; - vassert(hregIsVirtual(vreg)); - - if (0) { - vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n"); - } - - /* Now we're trying to find a rreg for "vreg". First of all, - if it already has an rreg assigned, we don't need to do - anything more. Inspect the current state to find out. */ - Int m = hregIndex(vreg); - vassert(IS_VALID_VREGNO(m)); - Int n = vreg_state[m]; - if (IS_VALID_RREGNO(n)) { - vassert(rreg_state[n].disp == Bound); - addToHRegRemap(&remap, vreg, univ->regs[n]); - /* If this rreg is written or modified, mark it as different - from any spill slot value. */ - if (reg_usage_arr[ii].vMode[j] != HRmRead) - rreg_state[n].eq_spill_slot = False; - continue; - } else { - vassert(n == INVALID_RREG_NO); - } - - /* No luck. The next thing to do is see if there is a - currently free rreg available, of the correct class. If - so, bag it. NOTE, we could improve this by selecting an - rreg for which the next live-range event is as far ahead - as possible. */ - Int k_suboptimal = -1; - Int k; - for (k = 0; k < n_rregs; k++) { - if (rreg_state[k].disp != Free - || hregClass(univ->regs[k]) != hregClass(vreg)) - continue; - if (rreg_state[k].has_hlrs) { - /* Well, at least we can use k_suboptimal if we really - have to. Keep on looking for a better candidate. */ - k_suboptimal = k; - } else { - /* Found a preferable reg. Use it. */ - k_suboptimal = -1; - break; - } - } - if (k_suboptimal >= 0) - k = k_suboptimal; - - if (k < n_rregs) { - rreg_state[k].disp = Bound; - rreg_state[k].vreg = vreg; - Int p = hregIndex(vreg); - vassert(IS_VALID_VREGNO(p)); - vreg_state[p] = toShort(k); - addToHRegRemap(&remap, vreg, univ->regs[k]); - /* Generate a reload if needed. This only creates needed - reloads because the live range builder for vregs will - guarantee that the first event for a vreg is a write. - Hence, if this reference is not a write, it cannot be - the first reference for this vreg, and so a reload is - indeed needed. */ - if (reg_usage_arr[ii].vMode[j] != HRmWrite) { - vassert(vreg_lrs[p].reg_class != HRcINVALID); - HInstr* reload1 = NULL; - HInstr* reload2 = NULL; - (*genReload)( &reload1, &reload2, univ->regs[k], - vreg_lrs[p].spill_offset, mode64 ); - vassert(reload1 || reload2); /* can't both be NULL */ - if (reload1) - EMIT_INSTR(reload1); - if (reload2) - EMIT_INSTR(reload2); - /* This rreg is read or modified by the instruction. - If it's merely read we can claim it now equals the - spill slot, but not so if it is modified. */ - if (reg_usage_arr[ii].vMode[j] == HRmRead) { - rreg_state[k].eq_spill_slot = True; - } else { - vassert(reg_usage_arr[ii].vMode[j] == HRmModify); - rreg_state[k].eq_spill_slot = False; - } - } else { - rreg_state[k].eq_spill_slot = False; - } - - continue; - } - - /* Well, now we have no option but to spill a vreg. It's - important to make a good choice of vreg to spill, and of - course we need to be careful not to spill a vreg which is - needed by this insn. */ - - /* First, mark in the rreg_state, those rregs which are not spill - candidates, due to holding a vreg mentioned by this - instruction. Or being of the wrong class. */ - for (k = 0; k < n_rregs; k++) { - rreg_state[k].is_spill_cand = False; - if (rreg_state[k].disp != Bound) - continue; - if (hregClass(univ->regs[k]) != hregClass(vreg)) - continue; - rreg_state[k].is_spill_cand = True; - /* Note, the following loop visits only the virtual regs - mentioned by the instruction. */ - for (m = 0; m < reg_usage_arr[ii].n_vRegs; m++) { - if (sameHReg(rreg_state[k].vreg, reg_usage_arr[ii].vRegs[m])) { - rreg_state[k].is_spill_cand = False; - break; - } - } - } - - /* We can choose to spill any rreg satisfying - rreg_state[r].is_spill_cand (so to speak). Choose r so that - the next use of its associated vreg is as far ahead as - possible, in the hope that this will minimise the number - of consequent reloads required. */ - Int spillee - = findMostDistantlyMentionedVReg ( - reg_usage_arr, ii+1, instrs_in->arr_used, rreg_state, n_rregs ); - - if (spillee == -1) { - /* Hmmmmm. There don't appear to be any spill candidates. - We're hosed. */ - vex_printf("reg_alloc: can't find a register in class: "); - ppHRegClass(hregClass(vreg)); - vex_printf("\n"); - vpanic("reg_alloc: can't create a free register."); - } - - /* Right. So we're going to spill rreg_state[spillee]. */ - vassert(IS_VALID_RREGNO(spillee)); - vassert(rreg_state[spillee].disp == Bound); - /* check it's the right class */ - vassert(hregClass(univ->regs[spillee]) == hregClass(vreg)); - /* check we're not ejecting the vreg for which we are trying - to free up a register. */ - vassert(! sameHReg(rreg_state[spillee].vreg, vreg)); - - m = hregIndex(rreg_state[spillee].vreg); - vassert(IS_VALID_VREGNO(m)); - - /* So here's the spill store. Assert that we're spilling a - live vreg. */ - vassert(vreg_lrs[m].dead_before > ii); - vassert(vreg_lrs[m].reg_class != HRcINVALID); - if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) { - HInstr* spill1 = NULL; - HInstr* spill2 = NULL; - (*genSpill)( &spill1, &spill2, univ->regs[spillee], - vreg_lrs[m].spill_offset, mode64 ); - vassert(spill1 || spill2); /* can't both be NULL */ - if (spill1) - EMIT_INSTR(spill1); - if (spill2) - EMIT_INSTR(spill2); - } - - /* Update the rreg_state to reflect the new assignment for this - rreg. */ - rreg_state[spillee].vreg = vreg; - vreg_state[m] = INVALID_RREG_NO; - - rreg_state[spillee].eq_spill_slot = False; /* be safe */ - - m = hregIndex(vreg); - vassert(IS_VALID_VREGNO(m)); - vreg_state[m] = toShort(spillee); - - /* Now, if this vreg is being read or modified (as opposed to - written), we have to generate a reload for it. */ - if (reg_usage_arr[ii].vMode[j] != HRmWrite) { - vassert(vreg_lrs[m].reg_class != HRcINVALID); - HInstr* reload1 = NULL; - HInstr* reload2 = NULL; - (*genReload)( &reload1, &reload2, univ->regs[spillee], - vreg_lrs[m].spill_offset, mode64 ); - vassert(reload1 || reload2); /* can't both be NULL */ - if (reload1) - EMIT_INSTR(reload1); - if (reload2) - EMIT_INSTR(reload2); - /* This rreg is read or modified by the instruction. - If it's merely read we can claim it now equals the - spill slot, but not so if it is modified. */ - if (reg_usage_arr[ii].vMode[j] == HRmRead) { - rreg_state[spillee].eq_spill_slot = True; - } else { - vassert(reg_usage_arr[ii].vMode[j] == HRmModify); - rreg_state[spillee].eq_spill_slot = False; - } - } - - /* So after much twisting and turning, we have vreg mapped to - rreg_state[spillee].rreg. Note that in the map. */ - addToHRegRemap(&remap, vreg, univ->regs[spillee]); - - } /* iterate over virtual registers in this instruction. */ - - /* We've finished clowning around with registers in this instruction. - Three results: - - the running rreg_state[] has been updated - - a suitable vreg->rreg mapping for this instruction has been - constructed - - spill and reload instructions may have been emitted. - - The final step is to apply the mapping to the instruction, - and emit that. - */ - - /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */ - (*mapRegs)( &remap, instrs_in->arr[ii], mode64 ); - EMIT_INSTR( instrs_in->arr[ii] ); - - if (DEBUG_REGALLOC) { - vex_printf("After dealing with current insn:\n"); - PRINT_STATE; - vex_printf("\n"); - } - - /* ------ Post-instruction actions for fixed rreg uses ------ */ - - /* Now we need to check for rregs exiting fixed live ranges - after this instruction, and if so mark them as free. */ - while (True) { - vassert(rreg_lrs_db_next >= 0); - vassert(rreg_lrs_db_next <= rreg_lrs_used); - if (rreg_lrs_db_next == rreg_lrs_used) - break; /* no more real reg live ranges to consider */ - if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before) - break; /* next live range does not yet start */ - vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before); - /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live - range. Mark it as such in the main rreg_state array. */ - HReg reg = rreg_lrs_db[rreg_lrs_db_next].rreg; - vassert(!hregIsVirtual(reg)); - Int k = hregIndex(reg); - vassert(IS_VALID_RREGNO(k)); - vassert(rreg_state[k].disp == Unavail); - rreg_state[k].disp = Free; - rreg_state[k].vreg = INVALID_HREG; - rreg_state[k].eq_spill_slot = False; - - /* check for further rregs leaving HLRs at this point */ - rreg_lrs_db_next++; - } - - if (DEBUG_REGALLOC) { - vex_printf("After post-insn actions for fixed regs:\n"); - PRINT_STATE; - vex_printf("\n"); - } - - } /* iterate over insns */ - - /* ------ END: Process each insn in turn. ------ */ - - /* free(rreg_state); */ - /* free(rreg_lrs); */ - /* if (vreg_lrs) free(vreg_lrs); */ - - /* Paranoia */ - vassert(rreg_lrs_la_next == rreg_lrs_used); - vassert(rreg_lrs_db_next == rreg_lrs_used); - - return instrs_out; - -# undef INVALID_INSTRNO -# undef EMIT_INSTR -# undef PRINT_STATE -} - - - -/*---------------------------------------------------------------*/ -/*--- host_reg_alloc2.c ---*/ -/*---------------------------------------------------------------*/ diff --git a/VEX/priv/host_generic_reg_alloc3.c b/VEX/priv/host_generic_reg_alloc3.c new file mode 100644 index 0000000000..a1c64ee9a3 --- /dev/null +++ b/VEX/priv/host_generic_reg_alloc3.c @@ -0,0 +1,1143 @@ +/*----------------------------------------------------------------------------*/ +/*--- begin host_generic_reg_alloc3.c ---*/ +/*----------------------------------------------------------------------------*/ + +/* + This file is part of Valgrind, a dynamic binary instrumentation framework. + + Copyright (C) 2017-2017 Ivo Raisr + ivosh@ivosh.net + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +#include "libvex_basictypes.h" +#include "libvex.h" + +#include "main_util.h" +#include "host_generic_regs.h" + +/* Set to 1 for lots of debugging output. */ +#define DEBUG_REGALLOC 0 + +/* Set to 1 for sanity checking at every instruction. + Set to 0 for sanity checking only every 17th one and the last one. */ +#define SANITY_CHECKS_EVERY_INSTR 0 + + +#define INVALID_INSTRNO (-2) + +/* Register allocator state is kept in an array of VRegState's. + There is an element for every virtual register (vreg). + Elements are indexed [0 .. n_vregs-1]. + Records information about vreg live range and its state. */ +typedef + struct { + /* Live range, register class and spill offset are computed during the + first register allocator pass and remain unchanged after that. */ + + /* This vreg becomes live with this instruction (inclusive). Contains + either an instruction number or INVALID_INSTRNO. */ + Short live_after; + /* This vreg becomes dead before this instruction (exclusive). Contains + either an instruction number or INVALID_INSTRNO. */ + Short dead_before; + /* What kind of register this is. */ + HRegClass reg_class; + + /* What is its current disposition? */ + enum { Unallocated, /* Neither spilled nor assigned to a real reg. */ + Assigned, /* Assigned to a real register, viz rreg. */ + Spilled /* Spilled to the spill slot. */ + } disp; + + /* If .disp == Assigned, what rreg is it bound to? */ + HReg rreg; + + /* The "home" spill slot. The offset is relative to the beginning of + the guest state. */ + UShort spill_offset; + } + VRegState; + +/* Records information on a real-register live range, associated with + a particular real register. Computed once; does not change. */ +typedef + struct { + /* This rreg becomes live with this instruction (inclusive). Contains + either an instruction number or INVALID_INSTRNO. */ + Short live_after; + /* This rreg becomes dead before this instruction (exclusive). Contains + either an instruction number or INVALID_INSTRNO. */ + Short dead_before; + } + RRegLR; + +/* The allocator also maintains a redundant array of indexes (rreg_state) from + rreg numbers back to entries in vreg_state. It is redundant because iff + rreg_state[r] == v then hregNumber(vreg_state[v].rreg) == r -- that is, the + two entries point at each other. The purpose of this is to speed up + activities which involve looking for a particular rreg: there is no need to + scan the vreg_state looking for it, just index directly into rreg_state. + The FAQ "does this rreg already have an associated vreg" is the main + beneficiary. + The identity of the real register is not recorded here, because the index + of this structure in |rreg_state| is the index number of the register, and + the register itself can be extracted from the RRegUniverse (univ). */ +typedef + struct { + /* What is its current disposition? */ + enum { Free, /* Not bound to any vreg. */ + Bound, /* Bound to a vreg, viz vreg. */ + Reserved /* Reserved for an instruction. */ + } disp; + + /* If .disp == Bound, what vreg is it bound to? */ + HReg vreg; + + /* Live ranges. Computed during the first register allocator pass and + remain unchanged after that. */ + RRegLR* lrs; + UInt lrs_size; + UInt lrs_used; + + /* Live range corresponding to the currently processed instruction. + Index into |lrs| array. */ + UInt lr_current; + } + RRegState; + +#define IS_VALID_VREGNO(v) ((v) >= 0 && (v) < n_vregs) +#define IS_VALID_RREGNO(r) ((r) >= 0 && (r) < n_rregs) + +/* Compute the index of the highest and lowest 1 in a ULong, respectively. + Results are undefined if the argument is zero. Don't pass it zero :) */ +static inline UInt ULong__maxIndex ( ULong w64 ) { + return 63 - __builtin_clzll(w64); +} + +static inline UInt ULong__minIndex ( ULong w64 ) { + return __builtin_ctzll(w64); +} + +static inline void enlarge_rreg_lrs(RRegState* rreg) +{ + vassert(rreg->lrs_used == rreg->lrs_size); + + RRegLR* lr2 = LibVEX_Alloc_inline(2 * rreg->lrs_used * sizeof(RRegLR)); + for (UInt l = 0; l < rreg->lrs_used; l++) { + lr2[l] = rreg->lrs[l]; + } + + rreg->lrs = lr2; + rreg->lrs_size = 2 * rreg->lrs_used; +} + +static inline void print_state( + const RegAllocControl* con, + VRegState* vreg_state, UInt n_vregs, + RRegState* rreg_state, UInt n_rregs, + UShort current_ii) +{ + for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { + const VRegState* vreg = &vreg_state[v_idx]; + + if (vreg->live_after == INVALID_INSTRNO) { + continue; /* This is a dead vreg. Never comes into live. */ + } + vex_printf("vreg_state[%3u] \t", v_idx); + + UInt written; + switch (vreg->disp) { + case Unallocated: + written = vex_printf("unallocated"); + break; + case Assigned: + written = vex_printf("assigned to "); + written += con->ppReg(vreg->rreg); + break; + case Spilled: + written = vex_printf("spilled at offset %u", vreg->spill_offset); + break; + default: + vassert(0); + } + + for (Int w = 30 - written; w > 0; w--) { + vex_printf(" "); + } + + if (vreg->live_after > (Short) current_ii) { + vex_printf("[not live yet]\n"); + } else if ((Short) current_ii >= vreg->dead_before) { + vex_printf("[now dead]\n"); + } else { + vex_printf("[live]\n"); + } + } + + for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { + const RRegState* rreg = &rreg_state[r_idx]; + vex_printf("rreg_state[%2u] = ", r_idx); + UInt written = con->ppReg(con->univ->regs[r_idx]); + for (Int w = 10 - written; w > 0; w--) { + vex_printf(" "); + } + + switch (rreg->disp) { + case Free: + vex_printf("free\n"); + break; + case Bound: + vex_printf("bound for "); + con->ppReg(rreg->vreg); + vex_printf("\n"); + break; + case Reserved: + vex_printf("reserved - live range [%d, %d)\n", + rreg->lrs[rreg->lr_current].live_after, + rreg->lrs[rreg->lr_current].dead_before); + break; + } + } +} + +static inline void emit_instr(HInstr* instr, HInstrArray* instrs_out, + const RegAllocControl* con, const HChar* why) +{ + if (DEBUG_REGALLOC) { + vex_printf("** "); + con->ppInstr(instr, con->mode64); + if (why != NULL) { + vex_printf(" (%s)", why); + } + vex_printf("\n\n"); + } + + addHInstr(instrs_out, instr); +} + +/* Spills a vreg assigned to some rreg. + The vreg is spilled and the rreg is freed. + Returns rreg's index. */ +static inline UInt spill_vreg( + HReg vreg, UInt v_idx, UInt current_ii, VRegState* vreg_state, UInt n_vregs, + RRegState* rreg_state, UInt n_rregs, HInstrArray* instrs_out, + const RegAllocControl* con) +{ + /* Check some invariants first. */ + vassert(IS_VALID_VREGNO((v_idx))); + vassert(vreg_state[v_idx].disp == Assigned); + HReg rreg = vreg_state[v_idx].rreg; + UInt r_idx = hregIndex(rreg); + vassert(IS_VALID_RREGNO(r_idx)); + vassert(hregClass(con->univ->regs[r_idx]) == hregClass(vreg)); + vassert(vreg_state[v_idx].dead_before > (Short) current_ii); + vassert(vreg_state[v_idx].reg_class != HRcINVALID); + + /* Generate spill. */ + HInstr* spill1 = NULL; + HInstr* spill2 = NULL; + con->genSpill(&spill1, &spill2, rreg, vreg_state[v_idx].spill_offset, + con->mode64); + vassert(spill1 != NULL || spill2 != NULL); /* cannot be both NULL */ + if (spill1 != NULL) { + emit_instr(spill1, instrs_out, con, "spill1"); + } + if (spill2 != NULL) { + emit_instr(spill2, instrs_out, con, "spill2"); + } + + /* Update register allocator state. */ + vreg_state[v_idx].disp = Spilled; + vreg_state[v_idx].rreg = INVALID_HREG; + rreg_state[r_idx].disp = Free; + rreg_state[r_idx].vreg = INVALID_HREG; + + return r_idx; +} + +/* Chooses a vreg to be spilled based on various criteria. + The vreg must not be from the instruction being processed, that is, it must + not be listed in reg_usage->vRegs. */ +static inline HReg find_vreg_to_spill( + VRegState* vreg_state, UInt n_vregs, + RRegState* rreg_state, UInt n_rregs, + const HRegUsage* instr_regusage, HRegClass target_hregclass, + const HRegUsage* reg_usage, UInt scan_forward_from, UInt scan_forward_max, + const RegAllocControl* con) +{ + /* Scan forwards a few instructions to find the most distant mentioned + use of a vreg. We can scan in the range of (inclusive): + - reg_usage[scan_forward_from] + - reg_usage[scan_forward_end], where scan_forward_end + = MIN(scan_forward_max, scan_forward_from + FEW_INSTRUCTIONS). */ +# define FEW_INSTRUCTIONS 5 + UInt scan_forward_end + = (scan_forward_max <= scan_forward_from + FEW_INSTRUCTIONS) ? + scan_forward_max : scan_forward_from + FEW_INSTRUCTIONS; +# undef FEW_INSTRUCTIONS + + HReg vreg_found = INVALID_HREG; + UInt distance_so_far = 0; + + for (UInt r_idx = con->univ->allocable_start[target_hregclass]; + r_idx <= con->univ->allocable_end[target_hregclass]; r_idx++) { + if (rreg_state[r_idx].disp == Bound) { + HReg vreg = rreg_state[r_idx].vreg; + if (! HRegUsage__contains(instr_regusage, vreg)) { + UInt ii = scan_forward_from; + for ( ; ii <= scan_forward_end; ii++) { + if (HRegUsage__contains(®_usage[ii], vreg)) { + break; + } + } + + if (ii - scan_forward_from > distance_so_far) { + distance_so_far = ii = scan_forward_from; + vreg_found = vreg; + if (ii + distance_so_far == scan_forward_end) { + break; /* We are at the end. Nothing could be better. */ + } + } + } + } + } + + if (hregIsInvalid(vreg_found)) { + vex_printf("doRegisterAllocation_v3: cannot find a register in class: "); + ppHRegClass(target_hregclass); + vex_printf("\n"); + vpanic("doRegisterAllocation_v3: cannot find a register."); + } + + return vreg_found; +} + +/* Find a free rreg of the correct class. + Tries to find an rreg whose live range (if any) is as far ahead in the + incoming instruction stream as possible. An ideal rreg candidate is + a callee-save register because it won't be used for parameter passing + around helper function calls. */ +static Bool find_free_rreg( + VRegState* vreg_state, UInt n_vregs, + RRegState* rreg_state, UInt n_rregs, + UInt current_ii, HRegClass target_hregclass, + Bool reserve_phase, const RegAllocControl* con, UInt* r_idx_found) +{ + Bool found = False; + UInt distance_so_far = 0; /* running max for |live_after - current_ii| */ + + for (UInt r_idx = con->univ->allocable_start[target_hregclass]; + r_idx <= con->univ->allocable_end[target_hregclass]; r_idx++) { + const RRegState* rreg = &rreg_state[r_idx]; + if (rreg->disp == Free) { + if (rreg->lrs_used == 0) { + found = True; + *r_idx_found = r_idx; + break; /* There could be nothing better, so break now. */ + } else { + const RRegLR* lr = &rreg->lrs[rreg->lr_current]; + if (lr->live_after > (Short) current_ii) { + /* Not live, yet. */ + if ((lr->live_after - (Short) current_ii) > distance_so_far) { + distance_so_far = lr->live_after - (Short) current_ii; + found = True; + *r_idx_found = r_idx; + } + } else if ((Short) current_ii >= lr->dead_before) { + /* Now dead. Effectively as if there is no LR now. */ + found = True; + *r_idx_found = r_idx; + break; /* There could be nothing better, so break now. */ + } else { + /* Going live for this instruction. This could happen only when + rregs are being reserved en mass, for example before + a helper call. */ + vassert(reserve_phase); + } + } + } + } + + return found; +} + +/* A target-independent register allocator (v3). Requires various functions + which it uses to deal abstractly with instructions and registers, since it + cannot have any target-specific knowledge. + + Returns a new list of instructions, which, as a result of the behaviour of + mapRegs, will be in-place modifications of the original instructions. + + Requires that the incoming code has been generated using vreg numbers + 0, 1 .. n_vregs-1. Appearance of a vreg outside that range is a checked + run-time error. + + Takes unallocated instructions and returns allocated instructions. +*/ +HInstrArray* doRegisterAllocation( + /* Incoming virtual-registerised code. */ + HInstrArray* instrs_in, + + /* Register allocator controls to use. */ + const RegAllocControl* con +) +{ + vassert((con->guest_sizeB % LibVEX_GUEST_STATE_ALIGN) == 0); + + /* The main register allocator state. */ + UInt n_vregs = instrs_in->n_vregs; + VRegState* vreg_state = NULL; + if (n_vregs > 0) { + vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(VRegState)); + } + + /* Redundant rreg -> vreg state. */ + UInt n_rregs = con->univ->allocable; + RRegState* rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState)); + + /* If this is not so, the universe we have is nonsensical. */ + vassert(n_rregs > 0); + STATIC_ASSERT(N_RREGUNIVERSE_REGS == 64); + + /* Info on register usage in the incoming instruction array. Computed once + and remains unchanged, more or less; updated sometimes by the + direct-reload optimisation. */ + HRegUsage* reg_usage + = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used); + + /* The live range numbers are signed shorts, and so limiting the + number of instructions to 15000 comfortably guards against them + overflowing 32k. */ + vassert(instrs_in->arr_used <= 15000); + + /* The output array of instructions. */ + HInstrArray* instrs_out = newHInstrArray(); + + +# define OFFENDING_VREG(_v_idx, _instr, _mode) \ + do { \ + vex_printf("\n\nOffending vreg = %u\n", (_v_idx)); \ + vex_printf("\nOffending instruction = "); \ + con->ppInstr((_instr), con->mode64); \ + vex_printf("\n"); \ + vpanic("doRegisterAllocation_v3: first event for vreg is #_mode# \ + (should be Write)"); \ + } while (0) + +# define OFFENDING_RREG(_r_idx, _instr, _mode) \ + do { \ + vex_printf("\n\nOffending rreg = "); \ + con->ppReg(con->univ->regs[(_r_idx)]); \ + vex_printf("\nOffending instruction = "); \ + con->ppInstr((_instr), con->mode64); \ + vex_printf("\n"); \ + vpanic("doRegisterAllocation_v3: first event for rreg is #_mode# \ + (should be Write)"); \ + } while (0) + + +/* Finds an rreg of the correct class. + If a free rreg is not found, then spills a vreg not used by the current + instruction and makes free the corresponding rreg. */ +# define FIND_OR_MAKE_FREE_RREG(_ii, _v_idx, _reg_class, _reserve_phase) \ + ({ \ + UInt _r_free_idx = -1; \ + Bool free_rreg_found = find_free_rreg(vreg_state, n_vregs, \ + rreg_state, n_rregs, (_ii), \ + (_reg_class), (_reserve_phase), \ + con, &_r_free_idx); \ + if (!free_rreg_found) { \ + HReg vreg_to_spill = find_vreg_to_spill( \ + vreg_state, n_vregs, rreg_state, n_rregs, \ + ®_usage[(_ii)], (_reg_class), \ + reg_usage, (_ii) + 1, \ + instrs_in->arr_used - 1, con); \ + _r_free_idx = spill_vreg(vreg_to_spill, hregIndex(vreg_to_spill), \ + (_ii), vreg_state, n_vregs, \ + rreg_state, n_rregs, \ + instrs_out, con); \ + } \ + \ + vassert(IS_VALID_RREGNO(_r_free_idx)); \ + \ + _r_free_idx; \ + }) + + + /* --- Stage 0. Initialize the state. --- */ + for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { + vreg_state[v_idx].live_after = INVALID_INSTRNO; + vreg_state[v_idx].dead_before = INVALID_INSTRNO; + vreg_state[v_idx].reg_class = HRcINVALID; + vreg_state[v_idx].disp = Unallocated; + vreg_state[v_idx].rreg = INVALID_HREG; + vreg_state[v_idx].spill_offset = 0; + } + + for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { + RRegState* rreg = &rreg_state[r_idx]; + rreg->disp = Free; + rreg->vreg = INVALID_HREG; + rreg->lrs_size = 4; + rreg->lrs = LibVEX_Alloc_inline(rreg->lrs_size * sizeof(RRegLR)); + rreg->lrs_used = 0; + rreg->lr_current = 0; + } + + + /* --- Stage 1. Scan the incoming instructions. --- */ + for (UShort ii = 0; ii < instrs_in->arr_used; ii++) { + const HInstr* instr = instrs_in->arr[ii]; + + con->getRegUsage(®_usage[ii], instr, con->mode64); + + if (0) { + vex_printf("\n%u stage 1: ", ii); + con->ppInstr(instr, con->mode64); + vex_printf("\n"); + ppHRegUsage(con->univ, ®_usage[ii]); + } + + /* Process virtual registers mentioned in the instruction. */ + for (UInt j = 0; j < reg_usage[ii].n_vRegs; j++) { + HReg vreg = reg_usage[ii].vRegs[j]; + vassert(hregIsVirtual(vreg)); + + UInt v_idx = hregIndex(vreg); + if (!IS_VALID_VREGNO(v_idx)) { + vex_printf("\n"); + con->ppInstr(instr, con->mode64); + vex_printf("\n"); + vex_printf("vreg %u (n_vregs %u)\n", v_idx, n_vregs); + vpanic("doRegisterAllocation_v3: out-of-range vreg"); + } + + /* Note the register class. */ + if (vreg_state[v_idx].reg_class == HRcINVALID) { + /* First mention of this vreg. */ + vreg_state[v_idx].reg_class = hregClass(vreg); + } else { + /* Seen it before, so check for consistency. */ + vassert(vreg_state[v_idx].reg_class == hregClass(vreg)); + } + + /* Consider live ranges. */ + switch (reg_usage[ii].vMode[j]) { + case HRmRead: + if (vreg_state[v_idx].live_after == INVALID_INSTRNO) { + OFFENDING_VREG(v_idx, instr, "Read"); + } + vreg_state[v_idx].dead_before = toShort(ii + 1); + break; + case HRmWrite: + if (vreg_state[v_idx].live_after == INVALID_INSTRNO) { + vreg_state[v_idx].live_after = toShort(ii); + } + vreg_state[v_idx].dead_before = toShort(ii + 1); + break; + case HRmModify: + if (vreg_state[v_idx].live_after == INVALID_INSTRNO) { + OFFENDING_VREG(v_idx, instr, "Modify"); + } + vreg_state[v_idx].dead_before = toShort(ii + 1); + break; + default: + vassert(0); + } + } + + /* Process real registers mentioned in the instruction. */ + const ULong rRead = reg_usage[ii].rRead; + const ULong rWritten = reg_usage[ii].rWritten; + const ULong rMentioned = rRead | rWritten; + + if (rMentioned != 0) { + UInt rReg_minIndex = ULong__minIndex(rMentioned); + UInt rReg_maxIndex = ULong__maxIndex(rMentioned); + /* Don't bother to look at registers which are not available + to the allocator such as the stack or guest state pointers. These + are unavailable to the register allocator and so we never visit + them. We asserted above that n_rregs > 0, so (n_rregs - 1) is + safe. */ + if (rReg_maxIndex >= n_rregs) { + rReg_maxIndex = n_rregs - 1; + } + + for (UInt r_idx = rReg_minIndex; r_idx <= rReg_maxIndex; r_idx++) { + const ULong jMask = 1ULL << r_idx; + + if (LIKELY((rMentioned & jMask) == 0)) { + continue; + } + + RRegState* rreg = &rreg_state[r_idx]; + const Bool isR = (rRead & jMask) != 0; + const Bool isW = (rWritten & jMask) != 0; + + if (isW && !isR) { + if (rreg->lrs_used == rreg->lrs_size) { + enlarge_rreg_lrs(rreg); + } + + rreg->lrs[rreg->lrs_used].live_after = ii; + rreg->lrs[rreg->lrs_used].dead_before = ii + 1; + rreg->lrs_used += 1; + } else if (!isW && isR) { + if ((rreg->lrs_used == 0) + || (rreg->lrs[rreg->lrs_used - 1].live_after + == INVALID_INSTRNO)) { + OFFENDING_RREG(r_idx, instr, "Read"); + } + rreg->lrs[rreg->lrs_used - 1].dead_before = ii + 1; + } else { + vassert(isR && isW); + if ((rreg->lrs_used == 0) + || (rreg->lrs[rreg->lrs_used - 1].live_after + == INVALID_INSTRNO)) { + OFFENDING_RREG(r_idx, instr, "Modify"); + } + rreg->lrs[rreg->lrs_used - 1].dead_before = ii + 1; + } + } + } + } + + if (DEBUG_REGALLOC) { + for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { + vex_printf("vreg %3u: [%3d, %3d)\n", + v_idx, vreg_state[v_idx].live_after, + vreg_state[v_idx].dead_before); + } + + for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { + const RRegState* rreg = &rreg_state[r_idx]; + vex_printf("rreg %2u (", r_idx); + UInt written = con->ppReg(con->univ->regs[r_idx]); + vex_printf("):"); + for (Int t = 15 - written; t > 0; t--) { + vex_printf(" "); + } + + for (UInt l = 0; l < rreg->lrs_used; l++) { + vex_printf("[%3d, %3d) ", + rreg->lrs[l].live_after, rreg->lrs[l].dead_before); + } + vex_printf("\n"); + } + } + + /* --- Stage 2. Allocate spill slots. --- */ + + /* Each spill slot is 8 bytes long. For vregs which take more than 64 bits + to spill (for example classes Flt64 and Vec128), we have to allocate two + consecutive spill slots. For 256 bit registers (class Vec256), we have to + allocate four consecutive spill slots. + + For Vec128-class on PowerPC, the spill slot's actual address must be + 16-byte aligned. Since the spill slot's address is computed as an offset + from the guest state pointer, and since the user of the generated code + must set that pointer to a 32-byte aligned value, we have the residual + obligation here of choosing a 16-byte aligned spill slot offset for + Vec128-class values. Since each spill slot is 8 bytes long, that means for + Vec128-class values we must allocate a spill slot number which is + zero mod 2. + + Similarly, for Vec256 class on amd64, find a spill slot number which is + zero mod 4. This guarantees it will be 32-byte aligned, which isn't + actually necessary on amd64 (we use movUpd etc to spill), but seems like + a good practice. + + Do a rank-based allocation of vregs to spill slot numbers. We put as few + values as possible in spill slots, but nevertheless need to have a spill + slot available for all vregs, just in case. */ + +# define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8) + STATIC_ASSERT((N_SPILL64S % 2) == 0); + STATIC_ASSERT((LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN) == 0); + + Short ss_busy_until_before[N_SPILL64S]; + vex_bzero(&ss_busy_until_before, sizeof(ss_busy_until_before)); + + for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { + /* True iff this vreg is unused. In which case we also expect that the + reg_class field for it has not been set. */ + if (vreg_state[v_idx].live_after == INVALID_INSTRNO) { + vassert(vreg_state[v_idx].reg_class == HRcINVALID); + continue; + } + + /* The spill slots are 64 bits in size. As per the comment on definition + of HRegClass in host_generic_regs.h, that means, to spill a vreg of + class Flt64 or Vec128, we'll need to find two adjacent spill slots to + use. For Vec256, we'll need to find four adjacent slots to use. Note, + this logic needs to be kept in sync with the size info on the + definition of HRegClass. */ + UInt ss_no; + switch (vreg_state[v_idx].reg_class) { + case HRcFlt64: + case HRcVec128: + /* Find two adjacent free slots which provide up to 128 bits to + spill the vreg. Since we are trying to find an even:odd pair, + move along in steps of 2 (slots). */ + for (ss_no = 0; ss_no < N_SPILL64S - 1; ss_no += 2) + if (ss_busy_until_before[ss_no + 0] <= vreg_state[v_idx].live_after + && ss_busy_until_before[ss_no + 1] <= vreg_state[v_idx].live_after) + break; + if (ss_no >= N_SPILL64S - 1) { + vpanic("N_SPILL64S is too low in VEX. Increase and recompile."); + } + ss_busy_until_before[ss_no + 0] = vreg_state[v_idx].dead_before; + ss_busy_until_before[ss_no + 1] = vreg_state[v_idx].dead_before; + break; + default: + /* The ordinary case -- just find a single lowest-numbered spill + slot which is available at the start point of this interval, + and assign the interval to it. */ + for (ss_no = 0; ss_no < N_SPILL64S; ss_no++) { + if (ss_busy_until_before[ss_no] <= vreg_state[v_idx].live_after) + break; + } + if (ss_no == N_SPILL64S) { + vpanic("N_SPILL64S is too low in VEX. Increase and recompile."); + } + ss_busy_until_before[ss_no] = vreg_state[v_idx].dead_before; + break; + } + + /* This reflects VEX's hard-wired knowledge of the guest state layout: + the guest state itself, then two equal sized areas following it for two + sets of shadow state, and then the spill area. */ + vreg_state[v_idx].spill_offset + = toShort(con->guest_sizeB * 3 + ss_no * 8); + + /* Independent check that we've made a sane choice of the slot. */ + switch (vreg_state[v_idx].reg_class) { + case HRcVec128: case HRcFlt64: + vassert((vreg_state[v_idx].spill_offset % 16) == 0); + break; + default: + vassert((vreg_state[v_idx].spill_offset % 8) == 0); + break; + } + } + + if (0) { + vex_printf("\n\n"); + for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) + vex_printf("vreg %3u --> spill offset %u\n", + v_idx, vreg_state[v_idx].spill_offset); + } + + + /* --- State 3. Process instructions. --- */ + for (UShort ii = 0; ii < instrs_in->arr_used; ii++) { + HInstr* instr = instrs_in->arr[ii]; + + if (DEBUG_REGALLOC) { + vex_printf("\n====----====---- Instr %d ----====----====\n", ii); + vex_printf("---- "); + con->ppInstr(instrs_in->arr[ii], con->mode64); + vex_printf("\n\nInitial state:\n"); + print_state(con, vreg_state, n_vregs, rreg_state, n_rregs, ii); + vex_printf("\n"); + } + + /* ------------ Sanity checks ------------ */ + + /* Sanity checks are relatively expensive. So they are done only once + every 17 instructions, and just before the last instruction. */ + Bool do_sanity_check + = toBool( + SANITY_CHECKS_EVERY_INSTR + || ii == instrs_in->arr_used - 1 + || (ii > 0 && (ii % 17) == 0) + ); + + if (do_sanity_check) { + /* Sanity check: the vreg_state and rreg_state mutually-redundant + mappings are consistent. If vreg_state[v].rreg points at some + rreg_state entry then that rreg_state entry should point back at + vreg_state[v]. */ + for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) { + if (vreg_state[v_idx].disp == Assigned) { + vassert(!hregIsVirtual(vreg_state[v_idx].rreg)); + + UInt r_idx = hregIndex(vreg_state[v_idx].rreg); + vassert(IS_VALID_RREGNO(r_idx)); + vassert(rreg_state[r_idx].disp == Bound); + vassert(hregIndex(rreg_state[r_idx].vreg) == v_idx); + + vassert(hregClass(vreg_state[v_idx].rreg) + == hregClass(con->univ->regs[r_idx])); + } + } + + for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { + if (rreg_state[r_idx].disp == Bound) { + vassert(hregIsVirtual(rreg_state[r_idx].vreg)); + + UInt v_idx = hregIndex(rreg_state[r_idx].vreg); + vassert(IS_VALID_VREGNO(v_idx)); + vassert(vreg_state[v_idx].disp == Assigned); + vassert(hregIndex(vreg_state[v_idx].rreg) == r_idx); + } + } + + /* Sanity check: if rreg has been marked as Reserved, there must be + a corresponding hard live range for it. */ + for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { + const RRegState* rreg = &rreg_state[r_idx]; + if (rreg->disp == Reserved) { + vassert(rreg->lrs_used > 0); + vassert(rreg->lr_current < rreg->lrs_used); + vassert(rreg->lrs[rreg->lr_current].live_after <= (Short) ii); + vassert((Short) ii < rreg->lrs[rreg->lr_current].dead_before); + } + } + } + + + /* --- MOV coalescing --- */ + /* Optimise register coalescing: + MOV v <-> v coalescing (done here). + MOV v <-> r coalescing (TODO: not yet). */ + /* If doing a reg-reg move between two vregs, and the src's live + range ends here and the dst's live range starts here, bind the dst + to the src's rreg, and that's all. */ + HReg vregS = INVALID_HREG; + HReg vregD = INVALID_HREG; + if (con->isMove(instr, &vregS, &vregD)) { + if (hregIsVirtual(vregS) && hregIsVirtual(vregD)) { + /* Check that |isMove| is not telling us a bunch of lies ... */ + vassert(hregClass(vregS) == hregClass(vregD)); + UInt vs_idx = hregIndex(vregS); + UInt vd_idx = hregIndex(vregD); + vassert(IS_VALID_VREGNO(vs_idx)); + vassert(IS_VALID_VREGNO(vd_idx)); + + if ((vreg_state[vs_idx].dead_before == ii + 1) + && (vreg_state[vd_idx].live_after == ii) + && (vreg_state[vs_idx].disp == Assigned)) { + + /* Live ranges are adjacent and source vreg is bound. + Finally we can do the coalescing. */ + HReg rreg = vreg_state[vs_idx].rreg; + vreg_state[vd_idx].disp = Assigned; + vreg_state[vd_idx].rreg = rreg; + vreg_state[vs_idx].disp = Unallocated; + vreg_state[vs_idx].rreg = INVALID_HREG; + + UInt r_idx = hregIndex(rreg); + vassert(rreg_state[r_idx].disp == Bound); + rreg_state[r_idx].vreg = vregD; + + if (DEBUG_REGALLOC) { + vex_printf("coalesced: "); + con->ppReg(vregS); + vex_printf(" -> "); + con->ppReg(vregD); + vex_printf("\n\n"); + } + + /* In rare cases it can happen that vregD's live range ends + here. Check and eventually free the vreg and rreg. + This effectively means that either the translated program + contained dead code (but VEX iropt passes are pretty good + at eliminating it) or the VEX backend generated dead code. */ + if (vreg_state[vd_idx].dead_before <= (Short) ii + 1) { + vreg_state[vd_idx].disp = Unallocated; + vreg_state[vd_idx].rreg = INVALID_HREG; + rreg_state[r_idx].disp = Free; + rreg_state[r_idx].vreg = INVALID_HREG; + } + + /* Move on to the next instruction. We skip the post-instruction + stuff because all required house-keeping was done here. */ + continue; + } + } + } + + + /* --- Reserve and free rregs if needed. --- */ + /* If the rreg enters its hard live range and is not free: + 1. If the corresponding vreg is not used by the instruction, spill it. + 2. If the corresponding vreg is used by the instruction, then: + 2a. If there are no free rregs, spill a vreg not used by this + instruction. + 2b. Move the corresponding vreg to a free rreg. This is better than + spilling it and immediatelly reloading it. + */ + const ULong rRead = reg_usage[ii].rRead; + const ULong rWritten = reg_usage[ii].rWritten; + const ULong rMentioned = rRead | rWritten; + + if (rMentioned != 0) { + UInt rReg_minIndex = ULong__minIndex(rMentioned); + UInt rReg_maxIndex = ULong__maxIndex(rMentioned); + if (rReg_maxIndex >= n_rregs) { + rReg_maxIndex = n_rregs - 1; + } + + for (UInt r_idx = rReg_minIndex; r_idx <= rReg_maxIndex; r_idx++) { + const ULong jMask = 1ULL << r_idx; + + if (LIKELY((rMentioned & jMask) == 0)) { + continue; + } + + RRegState* rreg = &rreg_state[r_idx]; + if (LIKELY(rreg->lrs_used == 0)) { + continue; + } + if (rreg->disp == Reserved) { + continue; + } + + if ((rreg->lrs[rreg->lr_current].live_after <= (Short) ii) + && ((Short) ii < rreg->lrs[rreg->lr_current].dead_before)) { + + if (rreg->disp == Bound) { + /* Yes, there is an associated vreg. We need to deal with + it now somehow. */ + HReg vreg = rreg->vreg; + UInt v_idx = hregIndex(vreg); + + if (! HRegUsage__contains(®_usage[ii], vreg)) { + /* Spill the vreg. It is not used by this instruction. */ + spill_vreg(vreg, v_idx, ii, vreg_state, n_vregs, + rreg_state, n_rregs, instrs_out, con); + } else { + /* Find or make a free rreg where to move this vreg to. */ + UInt r_free_idx = FIND_OR_MAKE_FREE_RREG( + ii, v_idx, vreg_state[v_idx].reg_class, True); + + /* Generate "move" between real registers. */ + HInstr* move = con->genMove(con->univ->regs[r_idx], + con->univ->regs[r_free_idx], con->mode64); + vassert(move != NULL); + emit_instr(move, instrs_out, con, "move"); + + /* Update the register allocator state. */ + vassert(vreg_state[v_idx].disp == Assigned); + vreg_state[v_idx].rreg = con->univ->regs[r_free_idx]; + rreg_state[r_free_idx].disp = Bound; + rreg_state[r_free_idx].vreg = vreg; + rreg->disp = Free; + rreg->vreg = INVALID_HREG; + } + } + + /* Finally claim the rreg as reserved. */ + rreg->disp = Reserved; + + if (DEBUG_REGALLOC) { + vex_printf("rreg has been reserved: "); + con->ppReg(con->univ->regs[r_idx]); + vex_printf("\n\n"); + } + } + } + } + + + /* --- Direct reload optimisation. --- */ + /* If the instruction reads exactly one vreg which is currently spilled, + and this is the last use of that vreg, see if we can convert + the instruction into one that reads directly from the spill slot. + This is clearly only possible for x86 and amd64 targets, since ppc and + arm are load-store architectures. If successful, replace + instrs_in->arr[ii] with this new instruction, and recompute + its reg_usage, so that the change is invisible to the standard-case + handling that follows. */ + if ((con->directReload != NULL) && (reg_usage[ii].n_vRegs <= 2)) { + Bool debug_direct_reload = False; + Bool nreads = 0; + HReg vreg_found = INVALID_HREG; + Short spill_offset = 0; + + for (UInt j = 0; j < reg_usage[ii].n_vRegs; j++) { + HReg vreg = reg_usage[ii].vRegs[j]; + vassert(hregIsVirtual(vreg)); + + if (reg_usage[ii].vMode[j] == HRmRead) { + nreads++; + UInt v_idx = hregIndex(vreg); + vassert(IS_VALID_VREGNO(v_idx)); + if (vreg_state[v_idx].disp == Spilled) { + /* Is this its last use? */ + vassert(vreg_state[v_idx].dead_before >= (Short) (ii + 1)); + if ((vreg_state[v_idx].dead_before == (Short) (ii + 1)) + && hregIsInvalid(vreg_found)) { + vreg_found = vreg; + spill_offset = vreg_state[v_idx].spill_offset; + } + } + } + } + + if (!hregIsInvalid(vreg_found) && (nreads == 1)) { + if (reg_usage[ii].n_vRegs == 2) { + vassert(! sameHReg(reg_usage[ii].vRegs[0], + reg_usage[ii].vRegs[1])); + } + + HInstr* reloaded = con->directReload(instrs_in->arr[ii], + vreg_found, spill_offset); + if (debug_direct_reload && (reloaded != NULL)) { + vex_printf("[%3d] ", spill_offset); + ppHReg(vreg_found); + vex_printf(": "); + con->ppInstr(instr, con->mode64); + } + if (reloaded != NULL) { + /* Update info about the instruction, so it looks as if it had + been in this form all along. */ + instr = reloaded; + instrs_in->arr[ii] = reloaded; + con->getRegUsage(®_usage[ii], instr, con->mode64); + if (debug_direct_reload) { + vex_printf(" --> "); + con->ppInstr(reloaded, con->mode64); + } + } + + if (debug_direct_reload && (reloaded != NULL)) { + vex_printf("\n"); + } + } + } + + + /* The vreg -> rreg map constructed and then applied to each + instruction. */ + HRegRemap remap; + initHRegRemap(&remap); + + /* --- Allocate vregs used by the instruction. --- */ + /* Vregs used by the instruction can be in the following states: + - Unallocated: vreg is entering its live range. Find a free rreg. + - Assigned: we do nothing; rreg has been allocated previously. + - Spilled: Find a free rreg and reload vreg into it. + Naturally, finding a free rreg may involve spilling a vreg not used by + the instruction. */ + for (UInt j = 0; j < reg_usage[ii].n_vRegs; j++) { + HReg vreg = reg_usage[ii].vRegs[j]; + vassert(hregIsVirtual(vreg)); + + if (0) { + vex_printf("considering "); con->ppReg(vreg); vex_printf("\n"); + } + + UInt v_idx = hregIndex(vreg); + vassert(IS_VALID_VREGNO(v_idx)); + HReg rreg = vreg_state[v_idx].rreg; + if (vreg_state[v_idx].disp == Assigned) { + UInt r_idx = hregIndex(rreg); + vassert(rreg_state[r_idx].disp == Bound); + addToHRegRemap(&remap, vreg, rreg); + } else { + vassert(hregIsInvalid(rreg)); + + /* Find or make a free rreg of the correct class. */ + UInt r_idx = FIND_OR_MAKE_FREE_RREG( + ii, v_idx, vreg_state[v_idx].reg_class, False); + rreg = con->univ->regs[r_idx]; + + /* Generate reload only if the vreg is spilled and is about to being + read or modified. If it is merely written than reloading it first + would be pointless. */ + if ((vreg_state[v_idx].disp == Spilled) + && (reg_usage[ii].vMode[j] != HRmWrite)) { + + HInstr* reload1 = NULL; + HInstr* reload2 = NULL; + con->genReload(&reload1, &reload2, rreg, + vreg_state[v_idx].spill_offset, con->mode64); + vassert(reload1 != NULL || reload2 != NULL); + if (reload1 != NULL) { + emit_instr(reload1, instrs_out, con, "reload1"); + } + if (reload2 != NULL) { + emit_instr(reload2, instrs_out, con, "reload2"); + } + } + + rreg_state[r_idx].disp = Bound; + rreg_state[r_idx].vreg = vreg; + vreg_state[v_idx].disp = Assigned; + vreg_state[v_idx].rreg = rreg; + addToHRegRemap(&remap, vreg, rreg); + } + } + + con->mapRegs(&remap, instr, con->mode64); + emit_instr(instr, instrs_out, con, NULL); + + if (DEBUG_REGALLOC) { + vex_printf("After dealing with current instruction:\n"); + print_state(con, vreg_state, n_vregs, rreg_state, n_rregs, ii); + vex_printf("\n"); + } + + /* ------ Post-instruction actions. ------ */ + /* Free rregs which: + - Have been reserved and whose hard live range ended. + - Have been bound to vregs whose live range ended. */ + for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { + RRegState* rreg = &rreg_state[r_idx]; + switch (rreg->disp) { + case Free: + break; + case Reserved: + if (rreg->lrs_used > 0) { + /* Consider "dead before" the next instruction. */ + if (rreg->lrs[rreg->lr_current].dead_before <= (Short) ii + 1) { + rreg_state[r_idx].disp = Free; + rreg_state[r_idx].vreg = INVALID_HREG; + if (rreg->lr_current < rreg->lrs_used - 1) { + rreg->lr_current += 1; + } + } + } + break; + case Bound: { + UInt v_idx = hregIndex(rreg->vreg); + /* Consider "dead before" the next instruction. */ + if (vreg_state[v_idx].dead_before <= (Short) ii + 1) { + vreg_state[v_idx].disp = Unallocated; + vreg_state[v_idx].rreg = INVALID_HREG; + rreg_state[r_idx].disp = Free; + rreg_state[r_idx].vreg = INVALID_HREG; + } + break; + } + default: + vassert(0); + } + } + } + + return instrs_out; +} + +/*----------------------------------------------------------------------------*/ +/*--- host_generic_reg_alloc3.c ---*/ +/*----------------------------------------------------------------------------*/ diff --git a/VEX/priv/host_generic_regs.c b/VEX/priv/host_generic_regs.c index befabbad18..675b8d0277 100644 --- a/VEX/priv/host_generic_regs.c +++ b/VEX/priv/host_generic_regs.c @@ -58,11 +58,10 @@ void ppHRegClass ( HRegClass hrc ) } /* Generic printing for registers. */ -void ppHReg ( HReg r ) +UInt ppHReg ( HReg r ) { if (hregIsInvalid(r)) { - vex_printf("HReg_INVALID"); - return; + return vex_printf("HReg_INVALID"); } const Bool isV = hregIsVirtual(r); const HChar* maybe_v = isV ? "v" : ""; @@ -71,12 +70,12 @@ void ppHReg ( HReg r ) always zero for virtual registers, so that's pointless -- hence show the index number instead. */ switch (hregClass(r)) { - case HRcInt32: vex_printf("%%%sr%u", maybe_v, regNN); return; - case HRcInt64: vex_printf("%%%sR%u", maybe_v, regNN); return; - case HRcFlt32: vex_printf("%%%sF%u", maybe_v, regNN); return; - case HRcFlt64: vex_printf("%%%sD%u", maybe_v, regNN); return; - case HRcVec64: vex_printf("%%%sv%u", maybe_v, regNN); return; - case HRcVec128: vex_printf("%%%sV%u", maybe_v, regNN); return; + case HRcInt32: return vex_printf("%%%sr%u", maybe_v, regNN); + case HRcInt64: return vex_printf("%%%sR%u", maybe_v, regNN); + case HRcFlt32: return vex_printf("%%%sF%u", maybe_v, regNN); + case HRcFlt64: return vex_printf("%%%sD%u", maybe_v, regNN); + case HRcVec64: return vex_printf("%%%sv%u", maybe_v, regNN); + case HRcVec128: return vex_printf("%%%sV%u", maybe_v, regNN); default: vpanic("ppHReg"); } } @@ -94,6 +93,11 @@ void RRegUniverse__init ( /*OUT*/RRegUniverse* univ ) for (UInt i = 0; i < N_RREGUNIVERSE_REGS; i++) { univ->regs[i] = INVALID_HREG; } + + for (UInt i = 0; i <= HrcLAST; i++) { + univ->allocable_start[i] = N_RREGUNIVERSE_REGS; + univ->allocable_end[i] = N_RREGUNIVERSE_REGS; + } } void RRegUniverse__check_is_sane ( const RRegUniverse* univ ) @@ -113,6 +117,33 @@ void RRegUniverse__check_is_sane ( const RRegUniverse* univ ) HReg reg = univ->regs[i]; vassert(hregIsInvalid(reg)); } + + /* Determine register classes used and if they form contiguous range. */ + Bool regclass_used[HrcLAST + 1]; + for (UInt i = 0; i <= HrcLAST; i++) { + regclass_used[i] = False; + } + + for (UInt i = 0; i < univ->allocable; i++) { + HReg reg = univ->regs[i]; + HRegClass regclass = hregClass(reg); + if (!regclass_used[regclass]) { + regclass_used[regclass] = True; + } + } + + UInt regs_visited = 0; + for (UInt i = 0; i <= HrcLAST; i++) { + if (regclass_used[i]) { + for (UInt j = univ->allocable_start[i]; + j <= univ->allocable_end[i]; j++) { + vassert(hregClass(univ->regs[j]) == i); + regs_visited += 1; + } + } + } + + vassert(regs_visited == univ->allocable); } diff --git a/VEX/priv/host_generic_regs.h b/VEX/priv/host_generic_regs.h index 944cc1bed6..8694922b1f 100644 --- a/VEX/priv/host_generic_regs.h +++ b/VEX/priv/host_generic_regs.h @@ -93,7 +93,7 @@ typedef struct { UInt u32; } HReg; available on any specific host. For example on x86, the available classes are: Int32, Flt64, Vec128 only. - IMPORTANT NOTE: host_generic_reg_alloc2.c needs how much space is + IMPORTANT NOTE: host_generic_reg_alloc*.c needs to know how much space is needed to spill each class of register. It allocates the following amount of space: @@ -106,7 +106,7 @@ typedef struct { UInt u32; } HReg; HRcVec128 128 bits If you add another regclass, you must remember to update - host_generic_reg_alloc2.c accordingly. + host_generic_reg_alloc*.c and RRegUniverse accordingly. When adding entries to enum HRegClass, do not use any value > 14 or < 1. */ @@ -118,15 +118,17 @@ typedef HRcFlt32=5, /* 32-bit float */ HRcFlt64=6, /* 64-bit float */ HRcVec64=7, /* 64-bit SIMD */ - HRcVec128=8 /* 128-bit SIMD */ + HRcVec128=8, /* 128-bit SIMD */ + HrcLAST=HRcVec128 } HRegClass; extern void ppHRegClass ( HRegClass ); -/* Print an HReg in a generic (non-target-specific) way. */ -extern void ppHReg ( HReg ); +/* Print an HReg in a generic (non-target-specific) way. + Returns number of HChar's written. */ +extern UInt ppHReg ( HReg ); /* Construct. The goal here is that compiler can fold this down to a constant in the case where the four arguments are constants, which @@ -149,7 +151,7 @@ static inline HReg mkHReg ( Bool virtual, HRegClass rc, UInt enc, UInt ix ) static inline HRegClass hregClass ( HReg r ) { HRegClass rc = (HRegClass)((r.u32 >> 27) & 0xF); - vassert(rc >= HRcInt32 && rc <= HRcVec128); + vassert(rc >= HRcInt32 && rc <= HrcLAST); return rc; } @@ -221,6 +223,25 @@ typedef index here, since this is the only place where we map index numbers to actual registers. */ HReg regs[N_RREGUNIVERSE_REGS]; + + /* Ranges for groups of allocable registers. Used to quickly address only + a group of allocable registers belonging to the same register class. + Indexes into |allocable_{start,end}| are HRcClass entries, such as + HRcInt64. Values in |allocable_{start,end}| give a valid range into + |regs| where registers corresponding to the given register class are + found. + + For example, let's say allocable_start[HRcInt64] == 10 and + allocable_end[HRcInt64] == 14. Then regs[10], regs[11], regs[12], + regs[13], and regs[14] give all registers of register class HRcInt64. + + If a register class is not present, then values of the corresponding + |allocable_{start,end}| elements are equal to N_RREGUNIVERSE_REGS. + + Naturally registers in |regs| must form contiguous groups. This is + checked by RRegUniverse__check_is_sane(). */ + UInt allocable_start[HrcLAST + 1]; + UInt allocable_end[HrcLAST + 1]; } RRegUniverse; @@ -305,7 +326,7 @@ extern Bool HRegUsage__contains ( const HRegUsage*, HReg ); /*---------------------------------------------------------*/ /* Note that such maps can only map virtual regs to real regs. - addToHRegRenap will barf if given a pair not of that form. As a + addToHRegRemap will barf if given a pair not of that form. As a result, no valid HRegRemap will bind a real reg to anything, and so if lookupHRegMap is given a real reg, it returns it unchanged. This is precisely the behaviour that the register allocator needs @@ -486,48 +507,51 @@ static inline Bool is_RetLoc_INVALID ( RetLoc rl ) { /*--- Reg alloc: TODO: move somewhere else ---*/ /*---------------------------------------------------------*/ -extern -HInstrSB* doRegisterAllocation ( - - /* Incoming virtual-registerised code. */ - HInstrSB* sb_in, - - /* The real-register universe to use. This contains facts about - real registers, one of which is the set of registers available - for allocation. */ - const RRegUniverse* univ, - - /* Return True iff the given insn is a reg-reg move, in which - case also return the src and dst regs. */ - Bool (*isMove) (const HInstr*, HReg*, HReg*), - - /* Get info about register usage in this insn. */ - void (*getRegUsage) (HRegUsage*, const HInstr*, Bool), - - /* Apply a reg-reg mapping to an insn. */ - void (*mapRegs) (HRegRemap*, HInstr*, Bool), - - /* Is this instruction actually HInstrIfThenElse? Returns pointer to - HInstrIfThenElse if yes, NULL otherwise. */ - HInstrIfThenElse* (*isIfThenElse) (const HInstr*), - - /* Return insn(s) to spill/restore a real reg to a spill slot - offset. And optionally a function to do direct reloads. */ - void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ), - void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ), - HInstr* (*directReload) ( HInstr*, HReg, Short ), - Int guest_sizeB, - - /* For debug printing only. */ - void (*ppInstr) ( const HInstr*, Bool ), - void (*ppCondCode)(HCondCode), - void (*ppReg) ( HReg ), +/* Control of the VEX register allocator. */ +typedef + struct { + /* The real-register universe to use. This contains facts about real + registers, one of which is the set of registers available for + allocation. */ + const RRegUniverse* univ; + + /* Return True iff the given insn is a reg-reg move, in which case also + return the src and dst regs. */ + Bool (*isMove)(const HInstr*, HReg*, HReg*); + + /* Get info about register usage in this insn. */ + void (*getRegUsage)(HRegUsage*, const HInstr*, Bool); + + /* Apply a reg-reg mapping to an insn. */ + void (*mapRegs)(HRegRemap*, HInstr*, Bool); + + /* Is this instruction actually HInstrIfThenElse? Returns pointer to + HInstrIfThenElse if yes, NULL otherwise. */ + HInstrIfThenElse* (*isIfThenElse) (const HInstr*); + + /* Return insn(s) to spill/restore a real register to a spill slot offset. + Also a function to move between registers. + And optionally a function to do direct reloads. */ + void (*genSpill)(HInstr**, HInstr**, HReg, Int, Bool); + void (*genReload)(HInstr**, HInstr**, HReg, Int, Bool); + HInstr* (*genMove)(HReg from, HReg to, Bool); + HInstr* (*directReload)(HInstr*, HReg, Short); + UInt guest_sizeB; + + /* For debug printing only. */ + void (*ppInstr)(const HInstr*, Bool); + UInt (*ppReg)(HReg); + + /* 32/64bit mode */ + Bool mode64; + } + RegAllocControl; - /* 32/64bit mode */ - Bool mode64 +extern HInstrSB* doRegisterAllocation( + HInstrSB* instrs_in, + const RegAllocControl* con ); - #endif /* ndef __VEX_HOST_GENERIC_REGS_H */ /*---------------------------------------------------------------*/ diff --git a/VEX/priv/host_mips_defs.c b/VEX/priv/host_mips_defs.c index d6a3219751..4f98bf6593 100644 --- a/VEX/priv/host_mips_defs.c +++ b/VEX/priv/host_mips_defs.c @@ -63,6 +63,7 @@ const RRegUniverse* getRRegUniverse_MIPS ( Bool mode64 ) /* Add the registers. The initial segment of this array must be those available for allocation by reg-alloc, and those that follow are not available for allocation. */ + ru->allocable_start[(mode64) ? HRcInt64 : HRcInt32] = ru->size; ru->regs[ru->size++] = hregMIPS_GPR16(mode64); ru->regs[ru->size++] = hregMIPS_GPR17(mode64); ru->regs[ru->size++] = hregMIPS_GPR18(mode64); @@ -76,7 +77,10 @@ const RRegUniverse* getRRegUniverse_MIPS ( Bool mode64 ) ru->regs[ru->size++] = hregMIPS_GPR14(mode64); ru->regs[ru->size++] = hregMIPS_GPR15(mode64); ru->regs[ru->size++] = hregMIPS_GPR24(mode64); + ru->allocable_end[(mode64) ? HRcInt64 : HRcInt32] = ru->size - 1; + /* s7 (=guest_state) */ + ru->allocable_start[(mode64) ? HRcFlt64 : HRcFlt32] = ru->size; ru->regs[ru->size++] = hregMIPS_F16(mode64); ru->regs[ru->size++] = hregMIPS_F18(mode64); ru->regs[ru->size++] = hregMIPS_F20(mode64); @@ -85,8 +89,11 @@ const RRegUniverse* getRRegUniverse_MIPS ( Bool mode64 ) ru->regs[ru->size++] = hregMIPS_F26(mode64); ru->regs[ru->size++] = hregMIPS_F28(mode64); ru->regs[ru->size++] = hregMIPS_F30(mode64); + ru->allocable_end[(mode64) ? HRcFlt64 : HRcFlt32] = ru->size - 1; + if (!mode64) { /* Fake double floating point */ + ru->allocable_start[HRcFlt64] = ru->size; ru->regs[ru->size++] = hregMIPS_D0(mode64); ru->regs[ru->size++] = hregMIPS_D1(mode64); ru->regs[ru->size++] = hregMIPS_D2(mode64); @@ -95,6 +102,7 @@ const RRegUniverse* getRRegUniverse_MIPS ( Bool mode64 ) ru->regs[ru->size++] = hregMIPS_D5(mode64); ru->regs[ru->size++] = hregMIPS_D6(mode64); ru->regs[ru->size++] = hregMIPS_D7(mode64); + ru->allocable_end[HRcFlt64] = ru->size - 1; } ru->allocable = ru->size; @@ -126,7 +134,7 @@ const RRegUniverse* getRRegUniverse_MIPS ( Bool mode64 ) } -void ppHRegMIPS(HReg reg, Bool mode64) +UInt ppHRegMIPS(HReg reg, Bool mode64) { Int r; static const HChar *ireg32_names[35] @@ -151,8 +159,7 @@ void ppHRegMIPS(HReg reg, Bool mode64) /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ @@ -164,29 +171,23 @@ void ppHRegMIPS(HReg reg, Bool mode64) case HRcInt32: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("%s", ireg32_names[r]); - return; + return vex_printf("%s", ireg32_names[r]); case HRcInt64: r = hregEncoding (reg); vassert (r >= 0 && r < 32); - vex_printf ("%s", ireg32_names[r]); - return; + return vex_printf ("%s", ireg32_names[r]); case HRcFlt32: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("%s", freg32_names[r]); - return; + return vex_printf("%s", freg32_names[r]); case HRcFlt64: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("%s", freg64_names[r]); - return; + return vex_printf("%s", freg64_names[r]); default: vpanic("ppHRegMIPS"); break; } - - return; } @@ -2029,6 +2030,18 @@ void genReload_MIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2, HReg rreg, } } +MIPSInstr* genMove_MIPS(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt32: + case HRcInt64: + return MIPSInstr_Alu(Malu_OR, to, from, MIPSRH_Reg(from)); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_MIPS: unimplemented regclass"); + } +} + /* --------- The mips assembler --------- */ inline static UInt iregNo(HReg r, Bool mode64) diff --git a/VEX/priv/host_mips_defs.h b/VEX/priv/host_mips_defs.h index 11bea76c11..45fff16fd0 100644 --- a/VEX/priv/host_mips_defs.h +++ b/VEX/priv/host_mips_defs.h @@ -135,7 +135,7 @@ ST_IN HReg hregMIPS_GPR31 ( Bool mode64 ) { return GPR(mode64, 31, 37, 45); } # define MIPS_N_REGPARMS 8 #endif -extern void ppHRegMIPS ( HReg, Bool ); +extern UInt ppHRegMIPS ( HReg, Bool ); /* --------- Condition codes, Intel encoding. --------- */ @@ -700,6 +700,7 @@ extern void genSpill_MIPS ( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2, HReg rreg, Int offset, Bool); extern void genReload_MIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2, HReg rreg, Int offset, Bool); +extern MIPSInstr* genMove_MIPS(HReg from, HReg to, Bool mode64); extern const RRegUniverse* getRRegUniverse_MIPS ( Bool mode64 ); diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c index 6f7c009ede..31d6ca689f 100644 --- a/VEX/priv/host_ppc_defs.c +++ b/VEX/priv/host_ppc_defs.c @@ -68,6 +68,7 @@ const RRegUniverse* getRRegUniverse_PPC ( Bool mode64 ) // GPR0 = scratch reg where poss. - some ops interpret as value zero // GPR1 = stack pointer // GPR2 = TOC pointer + ru->allocable_start[(mode64) ? HRcInt64 : HRcInt32] = ru->size; ru->regs[ru->size++] = hregPPC_GPR3(mode64); ru->regs[ru->size++] = hregPPC_GPR4(mode64); ru->regs[ru->size++] = hregPPC_GPR5(mode64); @@ -100,6 +101,7 @@ const RRegUniverse* getRRegUniverse_PPC ( Bool mode64 ) ru->regs[ru->size++] = hregPPC_GPR26(mode64); ru->regs[ru->size++] = hregPPC_GPR27(mode64); ru->regs[ru->size++] = hregPPC_GPR28(mode64); + ru->allocable_end[(mode64) ? HRcInt64 : HRcInt32] = ru->size - 1; // GPR29 is reserved for the dispatcher // GPR30 is reserved as AltiVec spill reg temporary // GPR31 is reserved for the GuestStatePtr @@ -109,6 +111,7 @@ const RRegUniverse* getRRegUniverse_PPC ( Bool mode64 ) the occasional extra spill instead. */ /* For both ppc32-linux and ppc64-linux, f14-f31 are callee save. So use them. */ + ru->allocable_start[HRcFlt64] = ru->size; ru->regs[ru->size++] = hregPPC_FPR14(mode64); ru->regs[ru->size++] = hregPPC_FPR15(mode64); ru->regs[ru->size++] = hregPPC_FPR16(mode64); @@ -117,11 +120,13 @@ const RRegUniverse* getRRegUniverse_PPC ( Bool mode64 ) ru->regs[ru->size++] = hregPPC_FPR19(mode64); ru->regs[ru->size++] = hregPPC_FPR20(mode64); ru->regs[ru->size++] = hregPPC_FPR21(mode64); + ru->allocable_end[HRcFlt64] = ru->size - 1; /* Same deal re Altivec */ /* For both ppc32-linux and ppc64-linux, v20-v31 are callee save. So use them. */ /* NB, vr29 is used as a scratch temporary -- do not allocate */ + ru->allocable_start[HRcVec128] = ru->size; ru->regs[ru->size++] = hregPPC_VR20(mode64); ru->regs[ru->size++] = hregPPC_VR21(mode64); ru->regs[ru->size++] = hregPPC_VR22(mode64); @@ -130,6 +135,7 @@ const RRegUniverse* getRRegUniverse_PPC ( Bool mode64 ) ru->regs[ru->size++] = hregPPC_VR25(mode64); ru->regs[ru->size++] = hregPPC_VR26(mode64); ru->regs[ru->size++] = hregPPC_VR27(mode64); + ru->allocable_end[HRcVec128] = ru->size - 1; ru->allocable = ru->size; /* And other regs, not available to the allocator. */ @@ -146,7 +152,7 @@ const RRegUniverse* getRRegUniverse_PPC ( Bool mode64 ) } -void ppHRegPPC ( HReg reg ) +UInt ppHRegPPC ( HReg reg ) { Int r; static const HChar* ireg32_names[32] @@ -160,31 +166,26 @@ void ppHRegPPC ( HReg reg ) "%r28", "%r29", "%r30", "%r31" }; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt64: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("%s", ireg32_names[r]); - return; + return vex_printf("%s", ireg32_names[r]); case HRcInt32: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("%s", ireg32_names[r]); - return; + return vex_printf("%s", ireg32_names[r]); case HRcFlt64: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("%%fr%d", r); - return; + return vex_printf("%%fr%d", r); case HRcVec128: r = hregEncoding(reg); vassert(r >= 0 && r < 32); - vex_printf("%%v%d", r); - return; + return vex_printf("%%v%d", r); default: vpanic("ppHRegPPC"); } @@ -3210,6 +3211,20 @@ void genReload_PPC ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +PPCInstr* genMove_PPC(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt32: + case HRcInt64: + return PPCInstr_Alu(Palu_OR, to, from, PPCRH_Reg(from)); + case HRcFlt64: + return PPCInstr_FpUnary(Pfp_MOV, to, from); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_PPC: unimplemented regclass"); + } +} + /* --------- The ppc assembler (bleh.) --------- */ diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h index 9a9187ddf1..5cc9a85e92 100644 --- a/VEX/priv/host_ppc_defs.h +++ b/VEX/priv/host_ppc_defs.h @@ -122,7 +122,7 @@ ST_IN HReg hregPPC_VR29 ( Bool mode64 ) { return VR (mode64, 29, 43, 45); } /* Num registers used for function calls */ #define PPC_N_REGPARMS 8 -extern void ppHRegPPC ( HReg ); +extern UInt ppHRegPPC ( HReg ); /* --------- Condition codes --------- */ @@ -1215,6 +1215,7 @@ extern void genSpill_PPC ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offsetB, Bool mode64 ); extern void genReload_PPC ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offsetB, Bool mode64 ); +extern PPCInstr* genMove_PPC(HReg from, HReg to, Bool mode64); extern const RRegUniverse* getRRegUniverse_PPC ( Bool mode64 ); diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c index 91f08e5e9f..16acf3ee79 100644 --- a/VEX/priv/host_s390_defs.c +++ b/VEX/priv/host_s390_defs.c @@ -366,10 +366,10 @@ ppS390Instr(const s390_insn *insn, Bool mode64) vex_printf("%s", s390_insn_as_string(insn)); } -void +UInt ppHRegS390(HReg reg) { - vex_printf("%s", s390_hreg_as_string(reg)); + return vex_printf("%s", s390_hreg_as_string(reg)); } /*------------------------------------------------------------*/ @@ -402,15 +402,19 @@ getRRegUniverse_S390(void) FPR12 - FPR15 are also used as register pairs for 128-bit floating point operations */ - UInt regno; - for (regno = 1; regno <= 11; ++regno) { + ru->allocable_start[HRcInt64] = ru->size; + for (UInt regno = 1; regno <= 11; ++regno) { gpr_index[regno] = ru->size; ru->regs[ru->size++] = s390_hreg_gpr(regno); } - for (regno = 0; regno <= 15; ++regno) { + ru->allocable_end[HRcInt64] = ru->size - 1; + + ru->allocable_start[HRcFlt64] = ru->size; + for (UInt regno = 0; regno <= 15; ++regno) { fpr_index[regno] = ru->size; ru->regs[ru->size++] = s390_hreg_fpr(regno); } + ru->allocable_end[HRcFlt64] = ru->size - 1; ru->allocable = ru->size; /* Add the registers that are not available for allocation. @@ -516,6 +520,17 @@ genReload_S390(HInstr **i1, HInstr **i2, HReg rreg, Int offsetB, Bool mode64) } } +s390_insn* genMove_S390(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt64: + return s390_insn_move(sizeofIRType(Ity_I64), to, from); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_S390: unimplemented regclass"); + } +} + /* Helper function for s390_insn_get_reg_usage */ static void s390_opnd_RMI_get_reg_usage(HRegUsage *u, s390_opnd_RMI op) diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h index 75a0a4484c..41b6ecd672 100644 --- a/VEX/priv/host_s390_defs.h +++ b/VEX/priv/host_s390_defs.h @@ -736,7 +736,7 @@ const HChar *s390_insn_as_string(const s390_insn *); void ppS390AMode(const s390_amode *); void ppS390Instr(const s390_insn *, Bool mode64); -void ppHRegS390(HReg); +UInt ppHRegS390(HReg); /* Some functions that insulate the register allocator from details of the underlying instruction set. */ @@ -749,6 +749,7 @@ Int emit_S390Instr ( Bool *, UChar *, Int, const s390_insn *, Bool, const RRegUniverse *getRRegUniverse_S390( void ); void genSpill_S390 ( HInstr **, HInstr **, HReg , Int , Bool ); void genReload_S390 ( HInstr **, HInstr **, HReg , Int , Bool ); +extern s390_insn* genMove_S390(HReg from, HReg to, Bool mode64); HInstrSB *iselSB_S390 ( const IRSB *, VexArch, const VexArchInfo *, const VexAbiInfo *, Int, Int, Bool, Bool, Addr); diff --git a/VEX/priv/host_x86_defs.c b/VEX/priv/host_x86_defs.c index ac2225c6dc..5e28ec91ee 100644 --- a/VEX/priv/host_x86_defs.c +++ b/VEX/priv/host_x86_defs.c @@ -63,18 +63,25 @@ const RRegUniverse* getRRegUniverse_X86 ( void ) /* Add the registers. The initial segment of this array must be those available for allocation by reg-alloc, and those that follow are not available for allocation. */ + ru->allocable_start[HRcInt32] = ru->size; ru->regs[ru->size++] = hregX86_EAX(); ru->regs[ru->size++] = hregX86_EBX(); ru->regs[ru->size++] = hregX86_ECX(); ru->regs[ru->size++] = hregX86_EDX(); ru->regs[ru->size++] = hregX86_ESI(); ru->regs[ru->size++] = hregX86_EDI(); + ru->allocable_end[HRcInt32] = ru->size - 1; + + ru->allocable_start[HRcFlt64] = ru->size; ru->regs[ru->size++] = hregX86_FAKE0(); ru->regs[ru->size++] = hregX86_FAKE1(); ru->regs[ru->size++] = hregX86_FAKE2(); ru->regs[ru->size++] = hregX86_FAKE3(); ru->regs[ru->size++] = hregX86_FAKE4(); ru->regs[ru->size++] = hregX86_FAKE5(); + ru->allocable_end[HRcFlt64] = ru->size - 1; + + ru->allocable_start[HRcVec128] = ru->size; ru->regs[ru->size++] = hregX86_XMM0(); ru->regs[ru->size++] = hregX86_XMM1(); ru->regs[ru->size++] = hregX86_XMM2(); @@ -83,7 +90,9 @@ const RRegUniverse* getRRegUniverse_X86 ( void ) ru->regs[ru->size++] = hregX86_XMM5(); ru->regs[ru->size++] = hregX86_XMM6(); ru->regs[ru->size++] = hregX86_XMM7(); + ru->allocable_end[HRcVec128] = ru->size - 1; ru->allocable = ru->size; + /* And other regs, not available to the allocator. */ ru->regs[ru->size++] = hregX86_ESP(); ru->regs[ru->size++] = hregX86_EBP(); @@ -95,33 +104,29 @@ const RRegUniverse* getRRegUniverse_X86 ( void ) } -void ppHRegX86 ( HReg reg ) +UInt ppHRegX86 ( HReg reg ) { Int r; static const HChar* ireg32_names[8] = { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" }; /* Be generic for all virtual regs. */ if (hregIsVirtual(reg)) { - ppHReg(reg); - return; + return ppHReg(reg); } /* But specific for real regs. */ switch (hregClass(reg)) { case HRcInt32: r = hregEncoding(reg); vassert(r >= 0 && r < 8); - vex_printf("%s", ireg32_names[r]); - return; + return vex_printf("%s", ireg32_names[r]); case HRcFlt64: r = hregEncoding(reg); vassert(r >= 0 && r < 6); - vex_printf("%%fake%d", r); - return; + return vex_printf("%%fake%d", r); case HRcVec128: r = hregEncoding(reg); vassert(r >= 0 && r < 8); - vex_printf("%%xmm%d", r); - return; + return vex_printf("%%xmm%d", r); default: vpanic("ppHRegX86"); } @@ -1775,6 +1780,19 @@ void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, } } +X86Instr* genMove_X86(HReg from, HReg to, Bool mode64) +{ + switch (hregClass(from)) { + case HRcInt32: + return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(from), to); + case HRcVec128: + return X86Instr_SseReRg(Xsse_MOV, from, to); + default: + ppHRegClass(hregClass(from)); + vpanic("genMove_X86: unimplemented regclass"); + } +} + /* The given instruction reads the specified vreg exactly once, and that vreg is currently located at the given spill offset. If possible, return a variant of the instruction to one which instead diff --git a/VEX/priv/host_x86_defs.h b/VEX/priv/host_x86_defs.h index d6deb963f5..d32ff9862d 100644 --- a/VEX/priv/host_x86_defs.h +++ b/VEX/priv/host_x86_defs.h @@ -74,7 +74,7 @@ ST_IN HReg hregX86_ESP ( void ) { return mkHReg(False, HRcInt32, 4, 20); } ST_IN HReg hregX86_EBP ( void ) { return mkHReg(False, HRcInt32, 5, 21); } #undef ST_IN -extern void ppHRegX86 ( HReg ); +extern UInt ppHRegX86 ( HReg ); /* --------- Condition codes, Intel encoding. --------- */ @@ -737,7 +737,7 @@ extern void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); extern void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2, HReg rreg, Int offset, Bool ); - +extern X86Instr* genMove_X86(HReg from, HReg to, Bool); extern X86Instr* directReload_X86 ( X86Instr* i, HReg vreg, Short spill_off ); extern const RRegUniverse* getRRegUniverse_X86 ( void ); diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c index 87f8a2d45b..98c7936085 100644 --- a/VEX/priv/main_main.c +++ b/VEX/priv/main_main.c @@ -714,10 +714,11 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, HInstrIfThenElse* (*isIfThenElse)(const HInstr*); void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ); void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ); + HInstr* (*genMove) ( HReg, HReg, Bool ); HInstr* (*directReload) ( HInstr*, HReg, Short ); void (*ppInstr) ( const HInstr*, Bool ); void (*ppCondCode) ( HCondCode ); - void (*ppReg) ( HReg ); + UInt (*ppReg) ( HReg ); HInstrSB* (*iselSB) ( const IRSB*, VexArch, const VexArchInfo*, const VexAbiInfo*, Int, Int, Bool, Bool, Addr ); @@ -744,6 +745,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, mapRegs = NULL; genSpill = NULL; genReload = NULL; + genMove = NULL; directReload = NULL; ppInstr = NULL; ppCondCode = NULL; @@ -864,6 +866,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, isIfThenElse = CAST_TO_TYPEOF(isIfThenElse) X86FN(isIfThenElse_X86Instr); genSpill = CAST_TO_TYPEOF(genSpill) X86FN(genSpill_X86); genReload = CAST_TO_TYPEOF(genReload) X86FN(genReload_X86); + genMove = CAST_TO_TYPEOF(genMove) X86FN(genMove_X86); directReload = CAST_TO_TYPEOF(directReload) X86FN(directReload_X86); ppInstr = CAST_TO_TYPEOF(ppInstr) X86FN(ppX86Instr); ppCondCode = CAST_TO_TYPEOF(ppCondCode) X86FN(ppX86CondCode); @@ -882,6 +885,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, mapRegs = CAST_TO_TYPEOF(mapRegs) AMD64FN(mapRegs_AMD64Instr); genSpill = CAST_TO_TYPEOF(genSpill) AMD64FN(genSpill_AMD64); genReload = CAST_TO_TYPEOF(genReload) AMD64FN(genReload_AMD64); + genMove = CAST_TO_TYPEOF(genMove) AMD64FN(genMove_AMD64); directReload = CAST_TO_TYPEOF(directReload) AMD64FN(directReload_AMD64); ppInstr = CAST_TO_TYPEOF(ppInstr) AMD64FN(ppAMD64Instr); ppReg = CAST_TO_TYPEOF(ppReg) AMD64FN(ppHRegAMD64); @@ -899,6 +903,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, mapRegs = CAST_TO_TYPEOF(mapRegs) PPC32FN(mapRegs_PPCInstr); genSpill = CAST_TO_TYPEOF(genSpill) PPC32FN(genSpill_PPC); genReload = CAST_TO_TYPEOF(genReload) PPC32FN(genReload_PPC); + genMove = CAST_TO_TYPEOF(genMove) PPC32FN(genMove_PPC); ppInstr = CAST_TO_TYPEOF(ppInstr) PPC32FN(ppPPCInstr); ppReg = CAST_TO_TYPEOF(ppReg) PPC32FN(ppHRegPPC); iselSB = PPC32FN(iselSB_PPC); @@ -915,6 +920,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, mapRegs = CAST_TO_TYPEOF(mapRegs) PPC64FN(mapRegs_PPCInstr); genSpill = CAST_TO_TYPEOF(genSpill) PPC64FN(genSpill_PPC); genReload = CAST_TO_TYPEOF(genReload) PPC64FN(genReload_PPC); + genMove = CAST_TO_TYPEOF(genMove) PPC64FN(genMove_PPC); ppInstr = CAST_TO_TYPEOF(ppInstr) PPC64FN(ppPPCInstr); ppReg = CAST_TO_TYPEOF(ppReg) PPC64FN(ppHRegPPC); iselSB = PPC64FN(iselSB_PPC); @@ -932,6 +938,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, mapRegs = CAST_TO_TYPEOF(mapRegs) S390FN(mapRegs_S390Instr); genSpill = CAST_TO_TYPEOF(genSpill) S390FN(genSpill_S390); genReload = CAST_TO_TYPEOF(genReload) S390FN(genReload_S390); + genMove = CAST_TO_TYPEOF(genMove) S390FN(genMove_S390); // fixs390: consider implementing directReload_S390 ppInstr = CAST_TO_TYPEOF(ppInstr) S390FN(ppS390Instr); ppReg = CAST_TO_TYPEOF(ppReg) S390FN(ppHRegS390); @@ -949,6 +956,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, mapRegs = CAST_TO_TYPEOF(mapRegs) ARMFN(mapRegs_ARMInstr); genSpill = CAST_TO_TYPEOF(genSpill) ARMFN(genSpill_ARM); genReload = CAST_TO_TYPEOF(genReload) ARMFN(genReload_ARM); + genMove = CAST_TO_TYPEOF(genMove) ARMFN(genMove_ARM); ppInstr = CAST_TO_TYPEOF(ppInstr) ARMFN(ppARMInstr); ppReg = CAST_TO_TYPEOF(ppReg) ARMFN(ppHRegARM); iselSB = ARMFN(iselSB_ARM); @@ -965,6 +973,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, mapRegs = CAST_TO_TYPEOF(mapRegs) ARM64FN(mapRegs_ARM64Instr); genSpill = CAST_TO_TYPEOF(genSpill) ARM64FN(genSpill_ARM64); genReload = CAST_TO_TYPEOF(genReload) ARM64FN(genReload_ARM64); + genMove = CAST_TO_TYPEOF(genMove) ARM64FN(genMove_ARM64); ppInstr = CAST_TO_TYPEOF(ppInstr) ARM64FN(ppARM64Instr); ppReg = CAST_TO_TYPEOF(ppReg) ARM64FN(ppHRegARM64); iselSB = ARM64FN(iselSB_ARM64); @@ -981,6 +990,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS32FN(mapRegs_MIPSInstr); genSpill = CAST_TO_TYPEOF(genSpill) MIPS32FN(genSpill_MIPS); genReload = CAST_TO_TYPEOF(genReload) MIPS32FN(genReload_MIPS); + genMove = CAST_TO_TYPEOF(genMove) MIPS32FN(genMove_MIPS); ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS32FN(ppMIPSInstr); ppReg = CAST_TO_TYPEOF(ppReg) MIPS32FN(ppHRegMIPS); iselSB = MIPS32FN(iselSB_MIPS); @@ -998,6 +1008,7 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS64FN(mapRegs_MIPSInstr); genSpill = CAST_TO_TYPEOF(genSpill) MIPS64FN(genSpill_MIPS); genReload = CAST_TO_TYPEOF(genReload) MIPS64FN(genReload_MIPS); + genMove = CAST_TO_TYPEOF(genMove) MIPS64FN(genMove_MIPS); ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS64FN(ppMIPSInstr); ppReg = CAST_TO_TYPEOF(ppReg) MIPS64FN(ppHRegMIPS); iselSB = MIPS64FN(iselSB_MIPS); @@ -1078,11 +1089,14 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta, } /* Register allocate. */ - rcode = doRegisterAllocation ( vcode, rRegUniv, - isMove, getRegUsage, mapRegs, isIfThenElse, - genSpill, genReload, directReload, - guest_sizeB, - ppInstr, ppCondCode, ppReg, mode64 ); + RegAllocControl con = { + .univ = rRegUniv, .isMove = isMove, .getRegUsage = getRegUsage, + .mapRegs = mapRegs, .isIfThenElse = isIfThenElse, .genSpill = genSpill, + .genReload = genReload, .genMove = genMove, .directReload = directReload, + .guest_sizeB = guest_sizeB, .ppInstr = ppInstr, .ppReg = ppReg, + .mode64 = mode64}; + + rcode = doRegisterAllocation(vcode, &con); vexAllocSanityCheck(); diff --git a/VEX/priv/main_util.c b/VEX/priv/main_util.c index 2da5066bd1..4ed886619c 100644 --- a/VEX/priv/main_util.c +++ b/VEX/priv/main_util.c @@ -285,13 +285,40 @@ Bool vex_streq ( const HChar* s1, const HChar* s2 ) } } +/* Vectorised memset, copied from Valgrind's m_libcbase.c. */ void vex_bzero ( void* sV, SizeT n ) { - SizeT i; - UChar* s = (UChar*)sV; - /* No laughing, please. Just don't call this too often. Thank you - for your attention. */ - for (i = 0; i < n; i++) s[i] = 0; +# define IS_4_ALIGNED(aaa_p) (0 == (((HWord)(aaa_p)) & ((HWord)0x3))) + + UChar* d = sV; + + while ((!IS_4_ALIGNED(d)) && n >= 1) { + d[0] = 0; + d++; + n--; + } + if (n == 0) + return; + while (n >= 16) { + ((UInt*)d)[0] = 0; + ((UInt*)d)[1] = 0; + ((UInt*)d)[2] = 0; + ((UInt*)d)[3] = 0; + d += 16; + n -= 16; + } + while (n >= 4) { + ((UInt*)d)[0] = 0; + d += 4; + n -= 4; + } + while (n >= 1) { + d[0] = 0; + d++; + n--; + } + return; +# undef IS_4_ALIGNED }