priv/host_generic_simd128.c \
priv/host_generic_simd256.c \
priv/host_generic_maddf.c \
- priv/host_generic_reg_alloc2.c \
+ priv/host_generic_reg_alloc3.c \
priv/host_x86_defs.c \
priv/host_x86_isel.c
# TODO-JIT: other architectures disabled for now
381272 ppc64 doesn't compile test_isa_2_06_partx.c without VSX support
381289 epoll_pwait can have a NULL sigmask
381274 powerpc too chatty even with --sigill-diagnostics=no
+381553 VEX register allocator v3
381769 Use ucontext_t instead of struct ucontext
381805 arm32 needs ld.so index hardwire for new glibc security fixes
382256 gz compiler flag test doesn't work for gold
/* Add the registers. The initial segment of this array must be
those available for allocation by reg-alloc, and those that
follow are not available for allocation. */
+ ru->allocable_start[HRcInt64] = ru->size;
ru->regs[ru->size++] = hregAMD64_RSI();
ru->regs[ru->size++] = hregAMD64_RDI();
ru->regs[ru->size++] = hregAMD64_R8();
ru->regs[ru->size++] = hregAMD64_R14();
ru->regs[ru->size++] = hregAMD64_R15();
ru->regs[ru->size++] = hregAMD64_RBX();
+ ru->regs[ru->size++] = hregAMD64_R10();
+ ru->allocable_end[HRcInt64] = ru->size - 1;
+
+ ru->allocable_start[HRcVec128] = ru->size;
ru->regs[ru->size++] = hregAMD64_XMM3();
ru->regs[ru->size++] = hregAMD64_XMM4();
ru->regs[ru->size++] = hregAMD64_XMM5();
ru->regs[ru->size++] = hregAMD64_XMM10();
ru->regs[ru->size++] = hregAMD64_XMM11();
ru->regs[ru->size++] = hregAMD64_XMM12();
- ru->regs[ru->size++] = hregAMD64_R10();
+ ru->allocable_end[HRcVec128] = ru->size - 1;
ru->allocable = ru->size;
+
/* And other regs, not available to the allocator. */
ru->regs[ru->size++] = hregAMD64_RAX();
ru->regs[ru->size++] = hregAMD64_RCX();
}
-void ppHRegAMD64 ( HReg reg )
+UInt ppHRegAMD64 ( HReg reg )
{
Int r;
static const HChar* ireg64_names[16]
"%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" };
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
- return;
+ return ppHReg(reg);
}
/* But specific for real regs. */
switch (hregClass(reg)) {
case HRcInt64:
r = hregEncoding(reg);
vassert(r >= 0 && r < 16);
- vex_printf("%s", ireg64_names[r]);
- return;
+ return vex_printf("%s", ireg64_names[r]);
case HRcVec128:
r = hregEncoding(reg);
vassert(r >= 0 && r < 16);
- vex_printf("%%xmm%d", r);
- return;
+ return vex_printf("%%xmm%d", r);
default:
vpanic("ppHRegAMD64");
}
}
-static void ppHRegAMD64_lo32 ( HReg reg )
+static UInt ppHRegAMD64_lo32 ( HReg reg )
{
Int r;
static const HChar* ireg32_names[16]
"%r8d", "%r9d", "%r10d", "%r11d", "%r12d", "%r13d", "%r14d", "%r15d" };
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
- vex_printf("d");
- return;
+ UInt written = ppHReg(reg);
+ written += vex_printf("d");
+ return written;
}
/* But specific for real regs. */
switch (hregClass(reg)) {
case HRcInt64:
r = hregEncoding(reg);
vassert(r >= 0 && r < 16);
- vex_printf("%s", ireg32_names[r]);
- return;
+ return vex_printf("%s", ireg32_names[r]);
default:
vpanic("ppHRegAMD64_lo32: invalid regclass");
}
}
}
+AMD64Instr* genMove_AMD64(HReg from, HReg to, Bool mode64)
+{
+ switch (hregClass(from)) {
+ case HRcInt64:
+ return AMD64Instr_Alu64R(Aalu_MOV, AMD64RMI_Reg(from), to);
+ case HRcVec128:
+ return AMD64Instr_SseReRg(Asse_MOV, from, to);
+ default:
+ ppHRegClass(hregClass(from));
+ vpanic("genMove_AMD64: unimplemented regclass");
+ }
+}
+
AMD64Instr* directReload_AMD64( AMD64Instr* i, HReg vreg, Short spill_off )
{
vassert(spill_off >= 0 && spill_off < 10000); /* let's say */
ST_IN HReg hregAMD64_R14 ( void ) { return mkHReg(False, HRcInt64, 14, 6); }
ST_IN HReg hregAMD64_R15 ( void ) { return mkHReg(False, HRcInt64, 15, 7); }
ST_IN HReg hregAMD64_RBX ( void ) { return mkHReg(False, HRcInt64, 3, 8); }
-
-ST_IN HReg hregAMD64_XMM3 ( void ) { return mkHReg(False, HRcVec128, 3, 9); }
-ST_IN HReg hregAMD64_XMM4 ( void ) { return mkHReg(False, HRcVec128, 4, 10); }
-ST_IN HReg hregAMD64_XMM5 ( void ) { return mkHReg(False, HRcVec128, 5, 11); }
-ST_IN HReg hregAMD64_XMM6 ( void ) { return mkHReg(False, HRcVec128, 6, 12); }
-ST_IN HReg hregAMD64_XMM7 ( void ) { return mkHReg(False, HRcVec128, 7, 13); }
-ST_IN HReg hregAMD64_XMM8 ( void ) { return mkHReg(False, HRcVec128, 8, 14); }
-ST_IN HReg hregAMD64_XMM9 ( void ) { return mkHReg(False, HRcVec128, 9, 15); }
-ST_IN HReg hregAMD64_XMM10 ( void ) { return mkHReg(False, HRcVec128, 10, 16); }
-ST_IN HReg hregAMD64_XMM11 ( void ) { return mkHReg(False, HRcVec128, 11, 17); }
-ST_IN HReg hregAMD64_XMM12 ( void ) { return mkHReg(False, HRcVec128, 12, 18); }
-
-ST_IN HReg hregAMD64_R10 ( void ) { return mkHReg(False, HRcInt64, 10, 19); }
+ST_IN HReg hregAMD64_R10 ( void ) { return mkHReg(False, HRcInt64, 10, 9); }
+
+ST_IN HReg hregAMD64_XMM3 ( void ) { return mkHReg(False, HRcVec128, 3, 10); }
+ST_IN HReg hregAMD64_XMM4 ( void ) { return mkHReg(False, HRcVec128, 4, 11); }
+ST_IN HReg hregAMD64_XMM5 ( void ) { return mkHReg(False, HRcVec128, 5, 12); }
+ST_IN HReg hregAMD64_XMM6 ( void ) { return mkHReg(False, HRcVec128, 6, 13); }
+ST_IN HReg hregAMD64_XMM7 ( void ) { return mkHReg(False, HRcVec128, 7, 14); }
+ST_IN HReg hregAMD64_XMM8 ( void ) { return mkHReg(False, HRcVec128, 8, 15); }
+ST_IN HReg hregAMD64_XMM9 ( void ) { return mkHReg(False, HRcVec128, 9, 16); }
+ST_IN HReg hregAMD64_XMM10 ( void ) { return mkHReg(False, HRcVec128, 10, 17); }
+ST_IN HReg hregAMD64_XMM11 ( void ) { return mkHReg(False, HRcVec128, 11, 18); }
+ST_IN HReg hregAMD64_XMM12 ( void ) { return mkHReg(False, HRcVec128, 12, 19); }
ST_IN HReg hregAMD64_RAX ( void ) { return mkHReg(False, HRcInt64, 0, 20); }
ST_IN HReg hregAMD64_RCX ( void ) { return mkHReg(False, HRcInt64, 1, 21); }
ST_IN HReg hregAMD64_XMM1 ( void ) { return mkHReg(False, HRcVec128, 1, 27); }
#undef ST_IN
-extern void ppHRegAMD64 ( HReg );
+extern UInt ppHRegAMD64 ( HReg );
/* --------- Condition codes, AMD encoding. --------- */
HReg rreg, Int offset, Bool );
extern void genReload_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offset, Bool );
-
+extern AMD64Instr* genMove_AMD64(HReg from, HReg to, Bool);
extern AMD64Instr* directReload_AMD64 ( AMD64Instr* i,
HReg vreg, Short spill_off );
/* Add the registers. The initial segment of this array must be
those available for allocation by reg-alloc, and those that
follow are not available for allocation. */
-
+ ru->allocable_start[HRcInt64] = ru->size;
ru->regs[ru->size++] = hregARM64_X22();
ru->regs[ru->size++] = hregARM64_X23();
ru->regs[ru->size++] = hregARM64_X24();
ru->regs[ru->size++] = hregARM64_X5();
ru->regs[ru->size++] = hregARM64_X6();
ru->regs[ru->size++] = hregARM64_X7();
+ ru->allocable_end[HRcInt64] = ru->size - 1;
// X8 is used as a ProfInc temporary, not available to regalloc.
// X9 is a chaining/spill temporary, not available to regalloc.
// X21 is the guest state pointer, not available to regalloc.
// vector regs. Unfortunately not callee-saved.
+ ru->allocable_start[HRcVec128] = ru->size;
ru->regs[ru->size++] = hregARM64_Q16();
ru->regs[ru->size++] = hregARM64_Q17();
ru->regs[ru->size++] = hregARM64_Q18();
ru->regs[ru->size++] = hregARM64_Q19();
ru->regs[ru->size++] = hregARM64_Q20();
+ ru->allocable_end[HRcVec128] = ru->size - 1;
// F64 regs, all of which are callee-saved
+ ru->allocable_start[HRcFlt64] = ru->size;
ru->regs[ru->size++] = hregARM64_D8();
ru->regs[ru->size++] = hregARM64_D9();
ru->regs[ru->size++] = hregARM64_D10();
ru->regs[ru->size++] = hregARM64_D11();
ru->regs[ru->size++] = hregARM64_D12();
ru->regs[ru->size++] = hregARM64_D13();
+ ru->allocable_end[HRcFlt64] = ru->size - 1;
ru->allocable = ru->size;
/* And other regs, not available to the allocator. */
}
-void ppHRegARM64 ( HReg reg ) {
+UInt ppHRegARM64 ( HReg reg ) {
Int r;
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
- return;
+ return ppHReg(reg);
}
/* But specific for real regs. */
switch (hregClass(reg)) {
case HRcInt64:
r = hregEncoding(reg);
vassert(r >= 0 && r < 31);
- vex_printf("x%d", r);
- return;
+ return vex_printf("x%d", r);
case HRcFlt64:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("d%d", r);
- return;
+ return vex_printf("d%d", r);
case HRcVec128:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("q%d", r);
- return;
+ return vex_printf("q%d", r);
default:
vpanic("ppHRegARM64");
}
}
-static void ppHRegARM64asSreg ( HReg reg ) {
- ppHRegARM64(reg);
- vex_printf("(S-reg)");
+static UInt ppHRegARM64asSreg ( HReg reg ) {
+ UInt written = ppHRegARM64(reg);
+ written += vex_printf("(S-reg)");
+ return written;
}
-static void ppHRegARM64asHreg ( HReg reg ) {
- ppHRegARM64(reg);
- vex_printf("(H-reg)");
+static UInt ppHRegARM64asHreg ( HReg reg ) {
+ UInt written = ppHRegARM64(reg);
+ written += vex_printf("(H-reg)");
+ return written;
}
ppHRegARM64asSreg(i->ARM64in.VCmpS.argR);
return;
case ARM64in_VFCSel: {
- void (*ppHRegARM64fp)(HReg)
+ UInt (*ppHRegARM64fp)(HReg)
= (i->ARM64in.VFCSel.isD ? ppHRegARM64 : ppHRegARM64asSreg);
vex_printf("fcsel ");
ppHRegARM64fp(i->ARM64in.VFCSel.dst);
}
}
+ARM64Instr* genMove_ARM64(HReg from, HReg to, Bool mode64)
+{
+ switch (hregClass(from)) {
+ case HRcInt64:
+ return ARM64Instr_MovI(to, from);
+ case HRcFlt64:
+ return ARM64Instr_VMov(8, to, from);
+ case HRcVec128:
+ return ARM64Instr_VMov(16, to, from);
+ default:
+ ppHRegClass(hregClass(from));
+ vpanic("genMove_ARM64: unimplemented regclass");
+ }
+}
+
/* Emit an instruction into buf and return the number of bytes used.
Note that buf is not the insn's final place, and therefore it is
ST_IN HReg hregARM64_X21 ( void ) { return mkHReg(False, HRcInt64, 21, 28); }
#undef ST_IN
-extern void ppHRegARM64 ( HReg );
+extern UInt ppHRegARM64 ( HReg );
/* Number of registers used arg passing in function calls */
#define ARM64_N_ARGREGS 8 /* x0 .. x7 */
HReg rreg, Int offset, Bool );
extern void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offset, Bool );
+extern ARM64Instr* genMove_ARM64(HReg from, HReg to, Bool);
extern const RRegUniverse* getRRegUniverse_ARM64 ( void );
/* Callee saves ones are listed first, since we prefer them
if they're available. */
+ ru->allocable_start[HRcInt32] = ru->size;
ru->regs[ru->size++] = hregARM_R4();
ru->regs[ru->size++] = hregARM_R5();
ru->regs[ru->size++] = hregARM_R6();
ru->regs[ru->size++] = hregARM_R2();
ru->regs[ru->size++] = hregARM_R3();
ru->regs[ru->size++] = hregARM_R9();
+ ru->allocable_end[HRcInt32] = ru->size - 1;
+
/* FP registers. Note: these are all callee-save. Yay! Hence we
don't need to mention them as trashed in getHRegUsage for
ARMInstr_Call. */
+ ru->allocable_start[HRcFlt64] = ru->size;
ru->regs[ru->size++] = hregARM_D8();
ru->regs[ru->size++] = hregARM_D9();
ru->regs[ru->size++] = hregARM_D10();
ru->regs[ru->size++] = hregARM_D11();
ru->regs[ru->size++] = hregARM_D12();
+ ru->allocable_end[HRcFlt64] = ru->size - 1;
+
+ ru->allocable_start[HRcFlt32] = ru->size;
ru->regs[ru->size++] = hregARM_S26();
ru->regs[ru->size++] = hregARM_S27();
ru->regs[ru->size++] = hregARM_S28();
ru->regs[ru->size++] = hregARM_S29();
ru->regs[ru->size++] = hregARM_S30();
+ ru->allocable_end[HRcFlt32] = ru->size - 1;
+
+ ru->allocable_start[HRcVec128] = ru->size;
ru->regs[ru->size++] = hregARM_Q8();
ru->regs[ru->size++] = hregARM_Q9();
ru->regs[ru->size++] = hregARM_Q10();
ru->regs[ru->size++] = hregARM_Q11();
ru->regs[ru->size++] = hregARM_Q12();
+ ru->allocable_end[HRcVec128] = ru->size - 1;
ru->allocable = ru->size;
/* And other regs, not available to the allocator. */
}
-void ppHRegARM ( HReg reg ) {
+UInt ppHRegARM ( HReg reg ) {
Int r;
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
- return;
+ return ppHReg(reg);
}
/* But specific for real regs. */
switch (hregClass(reg)) {
case HRcInt32:
r = hregEncoding(reg);
vassert(r >= 0 && r < 16);
- vex_printf("r%d", r);
- return;
+ return vex_printf("r%d", r);
case HRcFlt64:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("d%d", r);
- return;
+ return vex_printf("d%d", r);
case HRcFlt32:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("s%d", r);
- return;
+ return vex_printf("s%d", r);
case HRcVec128:
r = hregEncoding(reg);
vassert(r >= 0 && r < 16);
- vex_printf("q%d", r);
- return;
+ return vex_printf("q%d", r);
default:
vpanic("ppHRegARM");
}
}
}
+ARMInstr* genMove_ARM(HReg from, HReg to, Bool mode64)
+{
+ switch (hregClass(from)) {
+ case HRcInt32:
+ return ARMInstr_Mov(to, ARMRI84_R(from));
+ case HRcFlt32:
+ return ARMInstr_VUnaryS(ARMvfpu_COPY, to, from);
+ case HRcFlt64:
+ return ARMInstr_VUnaryD(ARMvfpu_COPY, to, from);
+ case HRcVec128:
+ return ARMInstr_NUnary(ARMneon_COPY, to, from, 4, False);
+ default:
+ ppHRegClass(hregClass(from));
+ vpanic("genMove_ARM: unimplemented regclass");
+ }
+}
/* Emit an instruction into buf and return the number of bytes used.
Note that buf is not the insn's final place, and therefore it is
ST_IN HReg hregARM_Q15 ( void ) { return mkHReg(False, HRcVec128, 15, 33); }
#undef ST_IN
-extern void ppHRegARM ( HReg );
+extern UInt ppHRegARM ( HReg );
/* Number of registers used arg passing in function calls */
#define ARM_N_ARGREGS 4 /* r0, r1, r2, r3 */
HReg rreg, Int offset, Bool );
extern void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offset, Bool );
+extern ARMInstr* genMove_ARM(HReg from, HReg to, Bool);
extern const RRegUniverse* getRRegUniverse_ARM ( void );
+++ /dev/null
-
-/*---------------------------------------------------------------*/
-/*--- begin host_reg_alloc2.c ---*/
-/*---------------------------------------------------------------*/
-
-/*
- This file is part of Valgrind, a dynamic binary instrumentation
- framework.
-
- Copyright (C) 2004-2017 OpenWorks LLP
- info@open-works.net
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA.
-
- The GNU General Public License is contained in the file COPYING.
-
- Neither the names of the U.S. Department of Energy nor the
- University of California nor the names of its contributors may be
- used to endorse or promote products derived from this software
- without prior written permission.
-*/
-
-#include "libvex_basictypes.h"
-#include "libvex.h"
-
-#include "main_util.h"
-#include "host_generic_regs.h"
-
-/* Set to 1 for lots of debugging output. */
-#define DEBUG_REGALLOC 0
-
-
-/* TODO 27 Oct 04:
-
- Better consistency checking from what isMove tells us.
-
- We can possibly do V-V coalescing even when the src is spilled,
- providing we can arrange for the dst to have the same spill slot.
-
- Note that state[].hreg is the same as the available real regs.
-
- Generally rationalise data structures. */
-
-
-/* Records information on virtual register live ranges. Computed once
- and remains unchanged after that. */
-typedef
- struct {
- /* Becomes live for the first time after this insn ... */
- Short live_after;
- /* Becomes dead for the last time before this insn ... */
- Short dead_before;
- /* The "home" spill slot, if needed. Never changes. */
- Short spill_offset;
- Short spill_size;
- /* What kind of register this is. */
- HRegClass reg_class;
- }
- VRegLR;
-
-
-/* Records information on real-register live ranges. Computed once
- and remains unchanged after that. */
-typedef
- struct {
- HReg rreg;
- /* Becomes live after this insn ... */
- Short live_after;
- /* Becomes dead before this insn ... */
- Short dead_before;
- }
- RRegLR;
-
-
-/* An array of the following structs (rreg_state) comprises the
- running state of the allocator. It indicates what the current
- disposition of each allocatable real register is. The array gets
- updated as the allocator processes instructions. The identity of
- the register is not recorded here, because the index of this
- structure in doRegisterAllocation()'s |rreg_state| is the index
- number of the register, and the register itself can be extracted
- from the RRegUniverse supplied to doRegisterAllocation(). */
-typedef
- struct {
- /* ------ FIELDS WHICH DO NOT CHANGE ------ */
- /* Is this involved in any HLRs? (only an optimisation hint) */
- Bool has_hlrs;
- /* ------ FIELDS WHICH DO CHANGE ------ */
- /* 6 May 07: rearranged fields below so the whole struct fits
- into 16 bytes on both x86 and amd64. */
- /* Used when .disp == Bound and we are looking for vregs to
- spill. */
- Bool is_spill_cand;
- /* Optimisation: used when .disp == Bound. Indicates when the
- rreg has the same value as the spill slot for the associated
- vreg. Is safely left at False, and becomes True after a
- spill store or reload for this rreg. */
- Bool eq_spill_slot;
- /* What's it's current disposition? */
- enum { Free, /* available for use */
- Unavail, /* in a real-reg live range */
- Bound /* in use (holding value of some vreg) */
- }
- disp;
- /* If .disp == Bound, what vreg is it bound to? */
- HReg vreg;
- }
- RRegState;
-
-
-/* The allocator also maintains a redundant array of indexes
- (vreg_state) from vreg numbers back to entries in rreg_state. It
- is redundant because iff vreg_state[i] == j then
- hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
- point at each other. The purpose of this is to speed up activities
- which involve looking for a particular vreg: there is no need to
- scan the rreg_state looking for it, just index directly into
- vreg_state. The FAQ "does this vreg already have an associated
- rreg" is the main beneficiary.
-
- To indicate, in vreg_state[i], that a given vreg is not currently
- associated with any rreg, that entry can be set to INVALID_RREG_NO.
-
- Because the vreg_state entries are signed Shorts, the max number
- of vregs that can be handed by regalloc is 32767.
-*/
-
-#define INVALID_RREG_NO ((Short)(-1))
-
-#define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
-#define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
-
-
-/* Search forward from some given point in the incoming instruction
- sequence. Point is to select a virtual register to spill, by
- finding the vreg which is mentioned as far ahead as possible, in
- the hope that this will minimise the number of consequent reloads.
-
- Only do the search for vregs which are Bound in the running state,
- and for which the .is_spill_cand field is set. This allows the
- caller to arbitrarily restrict the set of spill candidates to be
- considered.
-
- To do this we don't actually need to see the incoming instruction
- stream. Rather, what we need us the HRegUsage records for the
- incoming instruction stream. Hence that is passed in.
-
- Returns an index into the state array indicating the (v,r) pair to
- spill, or -1 if none was found. */
-static
-Int findMostDistantlyMentionedVReg (
- HRegUsage* reg_usages_in,
- Int search_from_instr,
- Int num_instrs,
- RRegState* state,
- Int n_state
-)
-{
- Int k, m;
- Int furthest_k = -1;
- Int furthest = -1;
- vassert(search_from_instr >= 0);
- for (k = 0; k < n_state; k++) {
- if (!state[k].is_spill_cand)
- continue;
- vassert(state[k].disp == Bound);
- for (m = search_from_instr; m < num_instrs; m++) {
- if (HRegUsage__contains(®_usages_in[m], state[k].vreg))
- break;
- }
- if (m > furthest) {
- furthest = m;
- furthest_k = k;
- }
- }
- return furthest_k;
-}
-
-
-/* Check that this vreg has been assigned a sane spill offset. */
-inline
-static void sanity_check_spill_offset ( VRegLR* vreg )
-{
- switch (vreg->reg_class) {
- case HRcVec128: case HRcFlt64:
- vassert(0 == ((UShort)vreg->spill_offset % 16)); break;
- default:
- vassert(0 == ((UShort)vreg->spill_offset % 8)); break;
- }
-}
-
-
-/* Double the size of the real-reg live-range array, if needed. */
-__attribute__((noinline))
-static void ensureRRLRspace_SLOW ( RRegLR** info, Int* size, Int used )
-{
- Int k;
- RRegLR* arr2;
- if (0)
- vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size);
- vassert(used == *size);
- arr2 = LibVEX_Alloc_inline(2 * *size * sizeof(RRegLR));
- for (k = 0; k < *size; k++)
- arr2[k] = (*info)[k];
- *size *= 2;
- *info = arr2;
-}
-inline
-static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
-{
- if (LIKELY(used < *size)) return;
- ensureRRLRspace_SLOW(info, size, used);
-}
-
-
-/* Sort an array of RRegLR entries by either the .live_after or
- .dead_before fields. This is performance-critical. */
-static void sortRRLRarray ( RRegLR* arr,
- Int size, Bool by_live_after )
-{
- Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
- 9841, 29524, 88573, 265720,
- 797161, 2391484 };
- Int lo = 0;
- Int hi = size-1;
- Int i, j, h, bigN, hp;
- RRegLR v;
-
- vassert(size >= 0);
- if (size == 0)
- return;
-
- bigN = hi - lo + 1; if (bigN < 2) return;
- hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
-
- if (by_live_after) {
-
- for ( ; hp >= 0; hp--) {
- h = incs[hp];
- for (i = lo + h; i <= hi; i++) {
- v = arr[i];
- j = i;
- while (arr[j-h].live_after > v.live_after) {
- arr[j] = arr[j-h];
- j = j - h;
- if (j <= (lo + h - 1)) break;
- }
- arr[j] = v;
- }
- }
-
- } else {
-
- for ( ; hp >= 0; hp--) {
- h = incs[hp];
- for (i = lo + h; i <= hi; i++) {
- v = arr[i];
- j = i;
- while (arr[j-h].dead_before > v.dead_before) {
- arr[j] = arr[j-h];
- j = j - h;
- if (j <= (lo + h - 1)) break;
- }
- arr[j] = v;
- }
- }
-
- }
-}
-
-
-/* Compute the index of the highest and lowest 1 in a ULong,
- respectively. Results are undefined if the argument is zero.
- Don't pass it zero :) */
-static inline UInt ULong__maxIndex ( ULong w64 ) {
- return 63 - __builtin_clzll(w64);
-}
-
-static inline UInt ULong__minIndex ( ULong w64 ) {
- return __builtin_ctzll(w64);
-}
-
-
-/* Vectorised memset, copied from Valgrind's m_libcbase.c. */
-static void* local_memset ( void *destV, Int c, SizeT sz )
-{
-# define IS_4_ALIGNED(aaa_p) (0 == (((HWord)(aaa_p)) & ((HWord)0x3)))
-
- UInt c4;
- UChar* d = destV;
- UChar uc = c;
-
- while ((!IS_4_ALIGNED(d)) && sz >= 1) {
- d[0] = uc;
- d++;
- sz--;
- }
- if (sz == 0)
- return destV;
- c4 = uc;
- c4 |= (c4 << 8);
- c4 |= (c4 << 16);
- while (sz >= 16) {
- ((UInt*)d)[0] = c4;
- ((UInt*)d)[1] = c4;
- ((UInt*)d)[2] = c4;
- ((UInt*)d)[3] = c4;
- d += 16;
- sz -= 16;
- }
- while (sz >= 4) {
- ((UInt*)d)[0] = c4;
- d += 4;
- sz -= 4;
- }
- while (sz >= 1) {
- d[0] = c;
- d++;
- sz--;
- }
- return destV;
-
-# undef IS_4_ALIGNED
-}
-
-
-/* A target-independent register allocator. Requires various
- functions which it uses to deal abstractly with instructions and
- registers, since it cannot have any target-specific knowledge.
-
- Returns a new list of instructions, which, as a result of the
- behaviour of mapRegs, will be in-place modifications of the
- original instructions.
-
- Requires that the incoming code has been generated using
- vreg numbers 0, 1 .. n_vregs-1. Appearance of a vreg outside
- that range is a checked run-time error.
-
- Takes an expandable array of pointers to unallocated insns.
- Returns an expandable array of pointers to allocated insns.
-*/
-HInstrArray* doRegisterAllocation (
-
- /* Incoming virtual-registerised code. */
- HInstrArray* instrs_in,
-
- /* The real-register universe to use. This contains facts about
- real registers, one of which is the set of registers available
- for allocation. */
- const RRegUniverse* univ,
-
- /* Return True iff the given insn is a reg-reg move, in which
- case also return the src and dst regs. */
- Bool (*isMove) ( const HInstr*, HReg*, HReg* ),
-
- /* Get info about register usage in this insn. */
- void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ),
-
- /* Apply a reg-reg mapping to an insn. */
- void (*mapRegs) ( HRegRemap*, HInstr*, Bool ),
-
- /* Return one, or, if we're unlucky, two insn(s) to spill/restore a
- real reg to a spill slot byte offset. The two leading HInstr**
- args are out parameters, through which the generated insns are
- returned. Also (optionally) a 'directReload' function, which
- attempts to replace a given instruction by one which reads
- directly from a specified spill slot. May be NULL, in which
- case the optimisation is not attempted. */
- void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ),
- void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
- HInstr* (*directReload) ( HInstr*, HReg, Short ),
- Int guest_sizeB,
-
- /* For debug printing only. */
- void (*ppInstr) ( const HInstr*, Bool ),
- void (*ppReg) ( HReg ),
-
- /* 32/64bit mode */
- Bool mode64
-)
-{
-# define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8)
-
- const Bool eq_spill_opt = True;
-
- /* Info on vregs and rregs. Computed once and remains
- unchanged. */
- Int n_vregs;
- VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
-
- /* We keep two copies of the real-reg live range info, one sorted
- by .live_after and the other by .dead_before. First the
- unsorted info is created in the _la variant is copied into the
- _db variant. Once that's done both of them are sorted.
- We also need two integer cursors which record the next
- location in the two arrays to consider. */
- RRegLR* rreg_lrs_la;
- RRegLR* rreg_lrs_db;
- Int rreg_lrs_size;
- Int rreg_lrs_used;
- Int rreg_lrs_la_next;
- Int rreg_lrs_db_next;
-
- /* Info on register usage in the incoming instruction array.
- Computed once and remains unchanged, more or less; updated
- sometimes by the direct-reload optimisation. */
- HRegUsage* reg_usage_arr; /* [0 .. instrs_in->arr_used-1] */
-
- /* Used when constructing vreg_lrs (for allocating stack
- slots). */
- Short ss_busy_until_before[N_SPILL64S];
-
- /* Used when constructing rreg_lrs. */
- Int* rreg_live_after;
- Int* rreg_dead_before;
-
- /* Running state of the core allocation algorithm. */
- RRegState* rreg_state; /* [0 .. n_rregs-1] */
- Int n_rregs;
-
- /* .. and the redundant backward map */
- /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
- This implies n_rregs must be <= 32768. */
- Short* vreg_state; /* [0 .. n_vregs-1] */
-
- /* The vreg -> rreg map constructed and then applied to each
- instr. */
- HRegRemap remap;
-
- /* The output array of instructions. */
- HInstrArray* instrs_out;
-
- /* Sanity checks are expensive. They are only done periodically,
- not at each insn processed. */
- Bool do_sanity_check;
-
- vassert(0 == (guest_sizeB % LibVEX_GUEST_STATE_ALIGN));
- vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN));
- vassert(0 == (N_SPILL64S % 2));
-
- /* The live range numbers are signed shorts, and so limiting the
- number of insns to 15000 comfortably guards against them
- overflowing 32k. */
- vassert(instrs_in->arr_used <= 15000);
-
-# define INVALID_INSTRNO (-2)
-
-# define EMIT_INSTR(_instr) \
- do { \
- HInstr* _tmp = (_instr); \
- if (DEBUG_REGALLOC) { \
- vex_printf("** "); \
- (*ppInstr)(_tmp, mode64); \
- vex_printf("\n\n"); \
- } \
- addHInstr ( instrs_out, _tmp ); \
- } while (0)
-
-# define PRINT_STATE \
- do { \
- Int z, q; \
- for (z = 0; z < n_rregs; z++) { \
- vex_printf(" rreg_state[%2d] = ", z); \
- (*ppReg)(univ->regs[z]); \
- vex_printf(" \t"); \
- switch (rreg_state[z].disp) { \
- case Free: vex_printf("Free\n"); break; \
- case Unavail: vex_printf("Unavail\n"); break; \
- case Bound: vex_printf("BoundTo "); \
- (*ppReg)(rreg_state[z].vreg); \
- vex_printf("\n"); break; \
- } \
- } \
- vex_printf("\n vreg_state[0 .. %d]:\n ", n_vregs-1); \
- q = 0; \
- for (z = 0; z < n_vregs; z++) { \
- if (vreg_state[z] == INVALID_RREG_NO) \
- continue; \
- vex_printf("[%d] -> %d ", z, vreg_state[z]); \
- q++; \
- if (q > 0 && (q % 6) == 0) \
- vex_printf("\n "); \
- } \
- vex_printf("\n"); \
- } while (0)
-
-
- /* --------- Stage 0: set up output array --------- */
- /* --------- and allocate/initialise running state. --------- */
-
- instrs_out = newHInstrArray();
-
- /* ... and initialise running state. */
- /* n_rregs is no more than a short name for n_available_real_regs. */
- n_rregs = univ->allocable;
- n_vregs = instrs_in->n_vregs;
-
- /* If this is not so, vreg_state entries will overflow. */
- vassert(n_vregs < 32767);
-
- /* If this is not so, the universe we have is nonsensical. */
- vassert(n_rregs > 0);
-
- rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState));
- vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(Short));
-
- for (Int j = 0; j < n_rregs; j++) {
- rreg_state[j].has_hlrs = False;
- rreg_state[j].disp = Free;
- rreg_state[j].vreg = INVALID_HREG;
- rreg_state[j].is_spill_cand = False;
- rreg_state[j].eq_spill_slot = False;
- }
-
- for (Int j = 0; j < n_vregs; j++)
- vreg_state[j] = INVALID_RREG_NO;
-
-
- /* --------- Stage 1: compute vreg live ranges. --------- */
- /* --------- Stage 2: compute rreg live ranges. --------- */
-
- /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
-
- /* This is relatively simple, because (1) we only seek the complete
- end-to-end live range of each vreg, and are not interested in
- any holes in it, and (2) the vregs are conveniently numbered 0
- .. n_vregs-1, so we can just dump the results in a
- pre-allocated array. */
-
- vreg_lrs = NULL;
- if (n_vregs > 0)
- vreg_lrs = LibVEX_Alloc_inline(sizeof(VRegLR) * n_vregs);
-
- for (Int j = 0; j < n_vregs; j++) {
- vreg_lrs[j].live_after = INVALID_INSTRNO;
- vreg_lrs[j].dead_before = INVALID_INSTRNO;
- vreg_lrs[j].spill_offset = 0;
- vreg_lrs[j].spill_size = 0;
- vreg_lrs[j].reg_class = HRcINVALID;
- }
-
- /* An array to hold the reg-usage info for the incoming
- instructions. */
- reg_usage_arr = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used);
-
- /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
-
- /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
-
- /* This is more complex than Stage 1, because we need to compute
- exactly all the live ranges of all the allocatable real regs,
- and we don't know in advance how many there will be. */
-
- rreg_lrs_used = 0;
- rreg_lrs_size = 4;
- rreg_lrs_la = LibVEX_Alloc_inline(rreg_lrs_size * sizeof(RRegLR));
- rreg_lrs_db = NULL; /* we'll create this later */
-
- /* We'll need to track live range start/end points seperately for
- each rreg. Sigh. */
- vassert(n_rregs > 0);
- rreg_live_after = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
- rreg_dead_before = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
-
- for (Int j = 0; j < n_rregs; j++) {
- rreg_live_after[j] =
- rreg_dead_before[j] = INVALID_INSTRNO;
- }
-
- /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
-
- /* ------ start of ITERATE OVER INSNS ------ */
-
- for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
-
- (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 );
-
- if (0) {
- vex_printf("\n%d stage1: ", ii);
- (*ppInstr)(instrs_in->arr[ii], mode64);
- vex_printf("\n");
- ppHRegUsage(univ, ®_usage_arr[ii]);
- }
-
- /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
-
- /* for each virtual reg mentioned in the insn ... */
- for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
-
- HReg vreg = reg_usage_arr[ii].vRegs[j];
- vassert(hregIsVirtual(vreg));
-
- Int k = hregIndex(vreg);
- if (k < 0 || k >= n_vregs) {
- vex_printf("\n");
- (*ppInstr)(instrs_in->arr[ii], mode64);
- vex_printf("\n");
- vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
- vpanic("doRegisterAllocation: out-of-range vreg");
- }
-
- /* Take the opportunity to note its regclass. We'll need
- that when allocating spill slots. */
- if (vreg_lrs[k].reg_class == HRcINVALID) {
- /* First mention of this vreg. */
- vreg_lrs[k].reg_class = hregClass(vreg);
- } else {
- /* Seen it before, so check for consistency. */
- vassert(vreg_lrs[k].reg_class == hregClass(vreg));
- }
-
- /* Now consider live ranges. */
- switch (reg_usage_arr[ii].vMode[j]) {
- case HRmRead:
- if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
- vex_printf("\n\nOFFENDING VREG = %d\n", k);
- vpanic("doRegisterAllocation: "
- "first event for vreg is Read");
- }
- vreg_lrs[k].dead_before = toShort(ii + 1);
- break;
- case HRmWrite:
- if (vreg_lrs[k].live_after == INVALID_INSTRNO)
- vreg_lrs[k].live_after = toShort(ii);
- vreg_lrs[k].dead_before = toShort(ii + 1);
- break;
- case HRmModify:
- if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
- vex_printf("\n\nOFFENDING VREG = %d\n", k);
- vpanic("doRegisterAllocation: "
- "first event for vreg is Modify");
- }
- vreg_lrs[k].dead_before = toShort(ii + 1);
- break;
- default:
- vpanic("doRegisterAllocation(1)");
- } /* switch */
-
- } /* iterate over virtual registers */
-
- /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
-
- /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
-
- /* If this doesn't hold, the following iteration over real registers
- will fail miserably. */
- vassert(N_RREGUNIVERSE_REGS == 64);
-
- const ULong rRead = reg_usage_arr[ii].rRead;
- const ULong rWritten = reg_usage_arr[ii].rWritten;
- const ULong rMentioned = rRead | rWritten;
-
- UInt rReg_minIndex;
- UInt rReg_maxIndex;
- if (rMentioned == 0) {
- /* There are no real register uses in this insn. Set
- rReg_{min,max}Index so that the following loop doesn't iterate
- at all, so as to avoid wasting time. */
- rReg_minIndex = 1;
- rReg_maxIndex = 0;
- } else {
- rReg_minIndex = ULong__minIndex(rMentioned);
- rReg_maxIndex = ULong__maxIndex(rMentioned);
- /* Don't bother to look at registers which are not available
- to the allocator. We asserted above that n_rregs > 0, so
- n_rregs-1 is safe. */
- if (rReg_maxIndex >= n_rregs)
- rReg_maxIndex = n_rregs-1;
- }
-
- /* for each allocator-available real reg mentioned in the insn ... */
- /* Note. We are allocating only over the real regs available to
- the allocator. Others, eg the stack or baseblock pointers,
- are unavailable to allocation and so we never visit them.
- Hence the iteration is cut off at n_rregs-1, since n_rregs ==
- univ->allocable. */
- for (Int j = rReg_minIndex; j <= rReg_maxIndex; j++) {
-
- const ULong jMask = 1ULL << j;
- if (LIKELY((rMentioned & jMask) == 0))
- continue;
-
- const Bool isR = (rRead & jMask) != 0;
- const Bool isW = (rWritten & jMask) != 0;
-
- /* Dummy initialisations of flush_la and flush_db to avoid
- possible bogus uninit-var warnings from gcc. */
- Int flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
- Bool flush = False;
-
- if (isW && !isR) {
- flush_la = rreg_live_after[j];
- flush_db = rreg_dead_before[j];
- if (flush_la != INVALID_INSTRNO && flush_db != INVALID_INSTRNO)
- flush = True;
- rreg_live_after[j] = ii;
- rreg_dead_before[j] = ii+1;
- } else if (!isW && isR) {
- if (rreg_live_after[j] == INVALID_INSTRNO) {
- vex_printf("\nOFFENDING RREG = ");
- (*ppReg)(univ->regs[j]);
- vex_printf("\n");
- vex_printf("\nOFFENDING instr = ");
- (*ppInstr)(instrs_in->arr[ii], mode64);
- vex_printf("\n");
- vpanic("doRegisterAllocation: "
- "first event for rreg is Read");
- }
- rreg_dead_before[j] = ii+1;
- } else {
- vassert(isR && isW);
- if (rreg_live_after[j] == INVALID_INSTRNO) {
- vex_printf("\nOFFENDING RREG = ");
- (*ppReg)(univ->regs[j]);
- vex_printf("\n");
- vex_printf("\nOFFENDING instr = ");
- (*ppInstr)(instrs_in->arr[ii], mode64);
- vex_printf("\n");
- vpanic("doRegisterAllocation: "
- "first event for rreg is Modify");
- }
- rreg_dead_before[j] = ii+1;
- }
-
- if (flush) {
- vassert(flush_la != INVALID_INSTRNO);
- vassert(flush_db != INVALID_INSTRNO);
- ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
- if (0)
- vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
- rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j];
- rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la);
- rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
- rreg_lrs_used++;
- }
-
- } /* iterate over rregs in the instr */
-
- /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
-
- } /* iterate over insns */
-
- /* ------ end of ITERATE OVER INSNS ------ */
-
- /* ------ start of FINALISE RREG LIVE RANGES ------ */
-
- /* Now finish up any live ranges left over. */
- for (Int j = 0; j < n_rregs; j++) {
-
- if (0) {
- vex_printf("residual %d: %d %d\n", j, rreg_live_after[j],
- rreg_dead_before[j]);
- }
- vassert( (rreg_live_after[j] == INVALID_INSTRNO
- && rreg_dead_before[j] == INVALID_INSTRNO)
- ||
- (rreg_live_after[j] != INVALID_INSTRNO
- && rreg_dead_before[j] != INVALID_INSTRNO)
- );
-
- if (rreg_live_after[j] == INVALID_INSTRNO)
- continue;
-
- ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
- if (0)
- vex_printf("FLUSH 2 (%d,%d)\n",
- rreg_live_after[j], rreg_dead_before[j]);
- rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j];
- rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]);
- rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
- rreg_lrs_used++;
- }
-
- /* Compute summary hints for choosing real regs. If a real reg is
- involved in a hard live range, record that fact in the fixed
- part of the running rreg_state. Later, when offered a choice between
- rregs, it's better to choose one which is not marked as having
- any HLRs, since ones with HLRs may need to be spilled around
- their HLRs. Correctness of final assignment is unaffected by
- this mechanism -- it is only an optimisation. */
-
- for (Int j = 0; j < rreg_lrs_used; j++) {
- HReg rreg = rreg_lrs_la[j].rreg;
- vassert(!hregIsVirtual(rreg));
- /* rreg is involved in a HLR. Record this info in the array, if
- there is space. */
- UInt ix = hregIndex(rreg);
- vassert(ix < n_rregs);
- rreg_state[ix].has_hlrs = True;
- }
- if (0) {
- for (Int j = 0; j < n_rregs; j++) {
- if (!rreg_state[j].has_hlrs)
- continue;
- ppReg(univ->regs[j]);
- vex_printf(" hinted\n");
- }
- }
-
- /* Finally, copy the _la variant into the _db variant and
- sort both by their respective fields. */
- rreg_lrs_db = LibVEX_Alloc_inline(rreg_lrs_used * sizeof(RRegLR));
- for (Int j = 0; j < rreg_lrs_used; j++)
- rreg_lrs_db[j] = rreg_lrs_la[j];
-
- sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/ );
- sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
-
- /* And set up the cursors. */
- rreg_lrs_la_next = 0;
- rreg_lrs_db_next = 0;
-
- for (Int j = 1; j < rreg_lrs_used; j++) {
- vassert(rreg_lrs_la[j-1].live_after <= rreg_lrs_la[j].live_after);
- vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
- }
-
- /* ------ end of FINALISE RREG LIVE RANGES ------ */
-
- if (DEBUG_REGALLOC) {
- for (Int j = 0; j < n_vregs; j++) {
- vex_printf("vreg %d: la = %d, db = %d\n",
- j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
- }
- }
-
- if (DEBUG_REGALLOC) {
- vex_printf("RRegLRs by LA:\n");
- for (Int j = 0; j < rreg_lrs_used; j++) {
- vex_printf(" ");
- (*ppReg)(rreg_lrs_la[j].rreg);
- vex_printf(" la = %d, db = %d\n",
- rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
- }
- vex_printf("RRegLRs by DB:\n");
- for (Int j = 0; j < rreg_lrs_used; j++) {
- vex_printf(" ");
- (*ppReg)(rreg_lrs_db[j].rreg);
- vex_printf(" la = %d, db = %d\n",
- rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
- }
- }
-
- /* --------- Stage 3: allocate spill slots. --------- */
-
- /* Each spill slot is 8 bytes long. For vregs which take more than
- 64 bits to spill (classes Flt64 and Vec128), we have to allocate
- two consecutive spill slots. For 256 bit registers (class
- Vec256), we have to allocate four consecutive spill slots.
-
- For Vec128-class on PowerPC, the spill slot's actual address
- must be 16-byte aligned. Since the spill slot's address is
- computed as an offset from the guest state pointer, and since
- the user of the generated code must set that pointer to a
- 32-aligned value, we have the residual obligation here of
- choosing a 16-aligned spill slot offset for Vec128-class values.
- Since each spill slot is 8 bytes long, that means for
- Vec128-class values we must allocated a spill slot number which
- is zero mod 2.
-
- Similarly, for Vec256 class on amd64, find a spill slot number
- which is zero mod 4. This guarantees it will be 32 byte
- aligned, which isn't actually necessary on amd64 (we use movUpd
- etc to spill), but seems like good practice.
-
- Do a rank-based allocation of vregs to spill slot numbers. We
- put as few values as possible in spill slots, but nevertheless
- need to have a spill slot available for all vregs, just in case.
- */
- /* Int max_ss_no = -1; */
-
- local_memset(ss_busy_until_before, 0, sizeof(ss_busy_until_before));
-
- for (Int j = 0; j < n_vregs; j++) {
-
- /* True iff this vreg is unused. In which case we also expect
- that the reg_class field for it has not been set. */
- if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
- vassert(vreg_lrs[j].reg_class == HRcINVALID);
- continue;
- }
-
- /* The spill slots are 64 bits in size. As per the comment on
- definition of HRegClass in host_generic_regs.h, that means,
- to spill a vreg of class Flt64 or Vec128, we'll need to find
- two adjacent spill slots to use. For Vec256, we'll need to
- find four adjacent slots to use. Note, this logic needs to
- kept in sync with the size info on the definition of
- HRegClass. */
- Int ss_no = -1;
- switch (vreg_lrs[j].reg_class) {
-
- case HRcVec128: case HRcFlt64:
- /* Find two adjacent free slots in which between them
- provide up to 128 bits in which to spill the vreg.
- Since we are trying to find an even:odd pair, move
- along in steps of 2 (slots). */
- for (ss_no = 0; ss_no < N_SPILL64S-1; ss_no += 2)
- if (ss_busy_until_before[ss_no+0] <= vreg_lrs[j].live_after
- && ss_busy_until_before[ss_no+1] <= vreg_lrs[j].live_after)
- break;
- if (ss_no >= N_SPILL64S-1) {
- vpanic("LibVEX_N_SPILL_BYTES is too low. "
- "Increase and recompile.");
- }
- ss_busy_until_before[ss_no+0] = vreg_lrs[j].dead_before;
- ss_busy_until_before[ss_no+1] = vreg_lrs[j].dead_before;
- break;
-
- default:
- /* The ordinary case -- just find a single spill slot. */
- /* Find the lowest-numbered spill slot which is available
- at the start point of this interval, and assign the
- interval to it. */
- for (ss_no = 0; ss_no < N_SPILL64S; ss_no++)
- if (ss_busy_until_before[ss_no] <= vreg_lrs[j].live_after)
- break;
- if (ss_no == N_SPILL64S) {
- vpanic("LibVEX_N_SPILL_BYTES is too low. "
- "Increase and recompile.");
- }
- ss_busy_until_before[ss_no] = vreg_lrs[j].dead_before;
- break;
-
- } /* switch (vreg_lrs[j].reg_class) */
-
- /* This reflects LibVEX's hard-wired knowledge of the baseBlock
- layout: the guest state, then two equal sized areas following
- it for two sets of shadow state, and then the spill area. */
- vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + ss_no * 8);
-
- /* Independent check that we've made a sane choice of slot */
- sanity_check_spill_offset( &vreg_lrs[j] );
- /* if (j > max_ss_no) */
- /* max_ss_no = j; */
- }
-
- if (0) {
- vex_printf("\n\n");
- for (Int j = 0; j < n_vregs; j++)
- vex_printf("vreg %d --> spill offset %d\n",
- j, vreg_lrs[j].spill_offset);
- }
-
- /* --------- Stage 4: establish rreg preferences --------- */
-
- /* It may be advantageous to allocating certain vregs to specific
- rregs, as a way of avoiding reg-reg moves later. Here we
- establish which, if any, rreg each vreg would prefer to be in.
- Note that this constrains the allocator -- ideally we end up
- with as few as possible vregs expressing a preference.
-
- This is an optimisation: if the .preferred_rreg field is never
- set to anything different from INVALID_HREG, the allocator still
- works. */
-
- /* 30 Dec 04: removed this mechanism as it does not seem to
- help. */
-
- /* --------- Stage 5: process instructions --------- */
-
- /* This is the main loop of the allocator. First, we need to
- correctly set up our running state, which tracks the status of
- each real register. */
-
- /* ------ BEGIN: Process each insn in turn. ------ */
-
- for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
-
- if (DEBUG_REGALLOC) {
- vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
- vex_printf("---- ");
- (*ppInstr)(instrs_in->arr[ii], mode64);
- vex_printf("\n\nInitial state:\n");
- PRINT_STATE;
- vex_printf("\n");
- }
-
- /* ------------ Sanity checks ------------ */
-
- /* Sanity checks are expensive. So they are done only once
- every 17 instructions, and just before the last
- instruction. */
- do_sanity_check
- = toBool(
- False /* Set to True for sanity checking of all insns. */
- || ii == instrs_in->arr_used-1
- || (ii > 0 && (ii % 17) == 0)
- );
-
- if (do_sanity_check) {
-
- /* Sanity check 1: all rregs with a hard live range crossing
- this insn must be marked as unavailable in the running
- state. */
- for (Int j = 0; j < rreg_lrs_used; j++) {
- if (rreg_lrs_la[j].live_after < ii
- && ii < rreg_lrs_la[j].dead_before) {
- /* ii is the middle of a hard live range for some real
- reg. Check it's marked as such in the running
- state. */
- HReg reg = rreg_lrs_la[j].rreg;
-
- if (0) {
- vex_printf("considering la %d .. db %d reg = ",
- rreg_lrs_la[j].live_after,
- rreg_lrs_la[j].dead_before);
- (*ppReg)(reg);
- vex_printf("\n");
- }
-
- /* assert that this rreg is marked as unavailable */
- vassert(!hregIsVirtual(reg));
- vassert(rreg_state[hregIndex(reg)].disp == Unavail);
- }
- }
-
- /* Sanity check 2: conversely, all rregs marked as
- unavailable in the running rreg_state must have a
- corresponding hard live range entry in the rreg_lrs
- array. */
- for (Int j = 0; j < n_rregs; j++) {
- vassert(rreg_state[j].disp == Bound
- || rreg_state[j].disp == Free
- || rreg_state[j].disp == Unavail);
- if (rreg_state[j].disp != Unavail)
- continue;
- Int k;
- for (k = 0; k < rreg_lrs_used; k++) {
- HReg reg = rreg_lrs_la[k].rreg;
- vassert(!hregIsVirtual(reg));
- if (hregIndex(reg) == j
- && rreg_lrs_la[k].live_after < ii
- && ii < rreg_lrs_la[k].dead_before)
- break;
- }
- /* If this vassertion fails, we couldn't find a
- corresponding HLR. */
- vassert(k < rreg_lrs_used);
- }
-
- /* Sanity check 3: all vreg-rreg bindings must bind registers
- of the same class. */
- for (Int j = 0; j < n_rregs; j++) {
- if (rreg_state[j].disp != Bound) {
- vassert(rreg_state[j].eq_spill_slot == False);
- continue;
- }
- vassert(hregClass(univ->regs[j])
- == hregClass(rreg_state[j].vreg));
- vassert( hregIsVirtual(rreg_state[j].vreg));
- }
-
- /* Sanity check 4: the vreg_state and rreg_state
- mutually-redundant mappings are consistent. If
- rreg_state[j].vreg points at some vreg_state entry then
- that vreg_state entry should point back at
- rreg_state[j]. */
- for (Int j = 0; j < n_rregs; j++) {
- if (rreg_state[j].disp != Bound)
- continue;
- Int k = hregIndex(rreg_state[j].vreg);
- vassert(IS_VALID_VREGNO(k));
- vassert(vreg_state[k] == j);
- }
- for (Int j = 0; j < n_vregs; j++) {
- Int k = vreg_state[j];
- if (k == INVALID_RREG_NO)
- continue;
- vassert(IS_VALID_RREGNO(k));
- vassert(rreg_state[k].disp == Bound);
- vassert(hregIndex(rreg_state[k].vreg) == j);
- }
-
- } /* if (do_sanity_check) */
-
- /* ------------ end of Sanity checks ------------ */
-
- /* Do various optimisations pertaining to register coalescing
- and preferencing:
- MOV v <-> v coalescing (done here).
- MOV v <-> r coalescing (not yet, if ever)
- */
- /* If doing a reg-reg move between two vregs, and the src's live
- range ends here and the dst's live range starts here, bind
- the dst to the src's rreg, and that's all. */
- HReg vregS = INVALID_HREG;
- HReg vregD = INVALID_HREG;
- if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) {
- if (!hregIsVirtual(vregS)) goto cannot_coalesce;
- if (!hregIsVirtual(vregD)) goto cannot_coalesce;
- /* Check that *isMove is not telling us a bunch of lies ... */
- vassert(hregClass(vregS) == hregClass(vregD));
- Int k = hregIndex(vregS);
- Int m = hregIndex(vregD);
- vassert(IS_VALID_VREGNO(k));
- vassert(IS_VALID_VREGNO(m));
- if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
- if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
- if (DEBUG_REGALLOC) {
- vex_printf("COALESCE ");
- (*ppReg)(vregS);
- vex_printf(" -> ");
- (*ppReg)(vregD);
- vex_printf("\n\n");
- }
- /* Find the state entry for vregS. */
- Int n = vreg_state[k]; /* k is the index of vregS */
- if (n == INVALID_RREG_NO) {
- /* vregS is not currently in a real register. So we can't
- do the coalescing. Give up. */
- goto cannot_coalesce;
- }
- vassert(IS_VALID_RREGNO(n));
-
- /* Finally, we can do the coalescing. It's trivial -- merely
- claim vregS's register for vregD. */
- rreg_state[n].vreg = vregD;
- vassert(IS_VALID_VREGNO(hregIndex(vregD)));
- vassert(IS_VALID_VREGNO(hregIndex(vregS)));
- vreg_state[hregIndex(vregD)] = toShort(n);
- vreg_state[hregIndex(vregS)] = INVALID_RREG_NO;
-
- /* This rreg has become associated with a different vreg and
- hence with a different spill slot. Play safe. */
- rreg_state[n].eq_spill_slot = False;
-
- /* Move on to the next insn. We skip the post-insn stuff for
- fixed registers, since this move should not interact with
- them in any way. */
- continue;
- }
- cannot_coalesce:
-
- /* ------ Free up rregs bound to dead vregs ------ */
-
- /* Look for vregs whose live range has just ended, and
- mark the associated rreg as free. */
-
- for (Int j = 0; j < n_rregs; j++) {
- if (rreg_state[j].disp != Bound)
- continue;
- UInt vregno = hregIndex(rreg_state[j].vreg);
- vassert(IS_VALID_VREGNO(vregno));
- if (vreg_lrs[vregno].dead_before <= ii) {
- rreg_state[j].disp = Free;
- rreg_state[j].eq_spill_slot = False;
- Int m = hregIndex(rreg_state[j].vreg);
- vassert(IS_VALID_VREGNO(m));
- vreg_state[m] = INVALID_RREG_NO;
- if (DEBUG_REGALLOC) {
- vex_printf("free up ");
- (*ppReg)(univ->regs[j]);
- vex_printf("\n");
- }
- }
- }
-
- /* ------ Pre-instruction actions for fixed rreg uses ------ */
-
- /* Now we have to deal with rregs which are about to be made
- live by this instruction -- in other words, are entering into
- one of their live ranges. If any such rreg holds a vreg, we
- will have to free up the rreg. The simplest solution which
- is correct is to spill the rreg.
-
- Note we could do better:
- * Could move it into some other free rreg, if one is available
-
- Do this efficiently, by incrementally stepping along an array
- of rreg HLRs that are known to be sorted by start point
- (their .live_after field).
- */
- while (True) {
- vassert(rreg_lrs_la_next >= 0);
- vassert(rreg_lrs_la_next <= rreg_lrs_used);
- if (rreg_lrs_la_next == rreg_lrs_used)
- break; /* no more real reg live ranges to consider */
- if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
- break; /* next live range does not yet start */
- vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
- /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
- Find the associated rreg_state entry. */
- /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
- Real register live ranges are guaranteed to be well-formed
- in that they start with a write to the register -- Stage 2
- rejects any code not satisfying this. So the correct
- question to ask is whether
- rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
- whether the reg becomes live after this insn -- rather
- than before it. */
- if (DEBUG_REGALLOC) {
- vex_printf("need to free up rreg: ");
- (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
- vex_printf("\n\n");
- }
- Int k = hregIndex(rreg_lrs_la[rreg_lrs_la_next].rreg);
-
- /* If this fails, we don't have an entry for this rreg.
- Which we should. */
- vassert(IS_VALID_RREGNO(k));
- Int m = hregIndex(rreg_state[k].vreg);
- if (rreg_state[k].disp == Bound) {
- /* Yes, there is an associated vreg. Spill it if it's
- still live. */
- vassert(IS_VALID_VREGNO(m));
- vreg_state[m] = INVALID_RREG_NO;
- if (vreg_lrs[m].dead_before > ii) {
- vassert(vreg_lrs[m].reg_class != HRcINVALID);
- if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
- HInstr* spill1 = NULL;
- HInstr* spill2 = NULL;
- (*genSpill)( &spill1, &spill2, univ->regs[k],
- vreg_lrs[m].spill_offset, mode64 );
- vassert(spill1 || spill2); /* can't both be NULL */
- if (spill1)
- EMIT_INSTR(spill1);
- if (spill2)
- EMIT_INSTR(spill2);
- }
- rreg_state[k].eq_spill_slot = True;
- }
- }
- rreg_state[k].disp = Unavail;
- rreg_state[k].vreg = INVALID_HREG;
- rreg_state[k].eq_spill_slot = False;
-
- /* check for further rregs entering HLRs at this point */
- rreg_lrs_la_next++;
- }
-
- if (DEBUG_REGALLOC) {
- vex_printf("After pre-insn actions for fixed regs:\n");
- PRINT_STATE;
- vex_printf("\n");
- }
-
- /* ------ Deal with the current instruction. ------ */
-
- /* Finally we can begin the processing of this instruction
- itself. The aim is to free up enough rregs for this insn.
- This may generate spill stores since we may have to evict
- some vregs currently in rregs. Also generates spill loads.
- We also build up the final vreg->rreg mapping to be applied
- to the insn. */
-
- initHRegRemap(&remap);
-
- /* ------------ BEGIN directReload optimisation ----------- */
-
- /* If the instruction reads exactly one vreg which is currently
- in a spill slot, and this is last use of that vreg, see if we
- can convert the instruction into one that reads directly from
- the spill slot. This is clearly only possible for x86 and
- amd64 targets, since ppc and arm are load-store
- architectures. If successful, replace instrs_in->arr[ii]
- with this new instruction, and recompute its reg usage, so
- that the change is invisible to the standard-case handling
- that follows. */
-
- if (directReload && reg_usage_arr[ii].n_vRegs <= 2) {
- Bool debug_direct_reload = False;
- HReg cand = INVALID_HREG;
- Bool nreads = 0;
- Short spilloff = 0;
-
- for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
-
- HReg vreg = reg_usage_arr[ii].vRegs[j];
- vassert(hregIsVirtual(vreg));
-
- if (reg_usage_arr[ii].vMode[j] == HRmRead) {
- nreads++;
- Int m = hregIndex(vreg);
- vassert(IS_VALID_VREGNO(m));
- Int k = vreg_state[m];
- if (!IS_VALID_RREGNO(k)) {
- /* ok, it is spilled. Now, is this its last use? */
- vassert(vreg_lrs[m].dead_before >= ii+1);
- if (vreg_lrs[m].dead_before == ii+1
- && hregIsInvalid(cand)) {
- spilloff = vreg_lrs[m].spill_offset;
- cand = vreg;
- }
- }
- }
- }
-
- if (nreads == 1 && ! hregIsInvalid(cand)) {
- HInstr* reloaded;
- if (reg_usage_arr[ii].n_vRegs == 2)
- vassert(! sameHReg(reg_usage_arr[ii].vRegs[0],
- reg_usage_arr[ii].vRegs[1]));
-
- reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
- if (debug_direct_reload && !reloaded) {
- vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
- ppInstr(instrs_in->arr[ii], mode64);
- }
- if (reloaded) {
- /* Update info about the insn, so it looks as if it had
- been in this form all along. */
- instrs_in->arr[ii] = reloaded;
- (*getRegUsage)( ®_usage_arr[ii], instrs_in->arr[ii], mode64 );
- if (debug_direct_reload && !reloaded) {
- vex_printf(" --> ");
- ppInstr(reloaded, mode64);
- }
- }
-
- if (debug_direct_reload && !reloaded)
- vex_printf("\n");
- }
-
- }
-
- /* ------------ END directReload optimisation ------------ */
-
- /* for each virtual reg mentioned in the insn ... */
- for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
-
- HReg vreg = reg_usage_arr[ii].vRegs[j];
- vassert(hregIsVirtual(vreg));
-
- if (0) {
- vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n");
- }
-
- /* Now we're trying to find a rreg for "vreg". First of all,
- if it already has an rreg assigned, we don't need to do
- anything more. Inspect the current state to find out. */
- Int m = hregIndex(vreg);
- vassert(IS_VALID_VREGNO(m));
- Int n = vreg_state[m];
- if (IS_VALID_RREGNO(n)) {
- vassert(rreg_state[n].disp == Bound);
- addToHRegRemap(&remap, vreg, univ->regs[n]);
- /* If this rreg is written or modified, mark it as different
- from any spill slot value. */
- if (reg_usage_arr[ii].vMode[j] != HRmRead)
- rreg_state[n].eq_spill_slot = False;
- continue;
- } else {
- vassert(n == INVALID_RREG_NO);
- }
-
- /* No luck. The next thing to do is see if there is a
- currently free rreg available, of the correct class. If
- so, bag it. NOTE, we could improve this by selecting an
- rreg for which the next live-range event is as far ahead
- as possible. */
- Int k_suboptimal = -1;
- Int k;
- for (k = 0; k < n_rregs; k++) {
- if (rreg_state[k].disp != Free
- || hregClass(univ->regs[k]) != hregClass(vreg))
- continue;
- if (rreg_state[k].has_hlrs) {
- /* Well, at least we can use k_suboptimal if we really
- have to. Keep on looking for a better candidate. */
- k_suboptimal = k;
- } else {
- /* Found a preferable reg. Use it. */
- k_suboptimal = -1;
- break;
- }
- }
- if (k_suboptimal >= 0)
- k = k_suboptimal;
-
- if (k < n_rregs) {
- rreg_state[k].disp = Bound;
- rreg_state[k].vreg = vreg;
- Int p = hregIndex(vreg);
- vassert(IS_VALID_VREGNO(p));
- vreg_state[p] = toShort(k);
- addToHRegRemap(&remap, vreg, univ->regs[k]);
- /* Generate a reload if needed. This only creates needed
- reloads because the live range builder for vregs will
- guarantee that the first event for a vreg is a write.
- Hence, if this reference is not a write, it cannot be
- the first reference for this vreg, and so a reload is
- indeed needed. */
- if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
- vassert(vreg_lrs[p].reg_class != HRcINVALID);
- HInstr* reload1 = NULL;
- HInstr* reload2 = NULL;
- (*genReload)( &reload1, &reload2, univ->regs[k],
- vreg_lrs[p].spill_offset, mode64 );
- vassert(reload1 || reload2); /* can't both be NULL */
- if (reload1)
- EMIT_INSTR(reload1);
- if (reload2)
- EMIT_INSTR(reload2);
- /* This rreg is read or modified by the instruction.
- If it's merely read we can claim it now equals the
- spill slot, but not so if it is modified. */
- if (reg_usage_arr[ii].vMode[j] == HRmRead) {
- rreg_state[k].eq_spill_slot = True;
- } else {
- vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
- rreg_state[k].eq_spill_slot = False;
- }
- } else {
- rreg_state[k].eq_spill_slot = False;
- }
-
- continue;
- }
-
- /* Well, now we have no option but to spill a vreg. It's
- important to make a good choice of vreg to spill, and of
- course we need to be careful not to spill a vreg which is
- needed by this insn. */
-
- /* First, mark in the rreg_state, those rregs which are not spill
- candidates, due to holding a vreg mentioned by this
- instruction. Or being of the wrong class. */
- for (k = 0; k < n_rregs; k++) {
- rreg_state[k].is_spill_cand = False;
- if (rreg_state[k].disp != Bound)
- continue;
- if (hregClass(univ->regs[k]) != hregClass(vreg))
- continue;
- rreg_state[k].is_spill_cand = True;
- /* Note, the following loop visits only the virtual regs
- mentioned by the instruction. */
- for (m = 0; m < reg_usage_arr[ii].n_vRegs; m++) {
- if (sameHReg(rreg_state[k].vreg, reg_usage_arr[ii].vRegs[m])) {
- rreg_state[k].is_spill_cand = False;
- break;
- }
- }
- }
-
- /* We can choose to spill any rreg satisfying
- rreg_state[r].is_spill_cand (so to speak). Choose r so that
- the next use of its associated vreg is as far ahead as
- possible, in the hope that this will minimise the number
- of consequent reloads required. */
- Int spillee
- = findMostDistantlyMentionedVReg (
- reg_usage_arr, ii+1, instrs_in->arr_used, rreg_state, n_rregs );
-
- if (spillee == -1) {
- /* Hmmmmm. There don't appear to be any spill candidates.
- We're hosed. */
- vex_printf("reg_alloc: can't find a register in class: ");
- ppHRegClass(hregClass(vreg));
- vex_printf("\n");
- vpanic("reg_alloc: can't create a free register.");
- }
-
- /* Right. So we're going to spill rreg_state[spillee]. */
- vassert(IS_VALID_RREGNO(spillee));
- vassert(rreg_state[spillee].disp == Bound);
- /* check it's the right class */
- vassert(hregClass(univ->regs[spillee]) == hregClass(vreg));
- /* check we're not ejecting the vreg for which we are trying
- to free up a register. */
- vassert(! sameHReg(rreg_state[spillee].vreg, vreg));
-
- m = hregIndex(rreg_state[spillee].vreg);
- vassert(IS_VALID_VREGNO(m));
-
- /* So here's the spill store. Assert that we're spilling a
- live vreg. */
- vassert(vreg_lrs[m].dead_before > ii);
- vassert(vreg_lrs[m].reg_class != HRcINVALID);
- if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
- HInstr* spill1 = NULL;
- HInstr* spill2 = NULL;
- (*genSpill)( &spill1, &spill2, univ->regs[spillee],
- vreg_lrs[m].spill_offset, mode64 );
- vassert(spill1 || spill2); /* can't both be NULL */
- if (spill1)
- EMIT_INSTR(spill1);
- if (spill2)
- EMIT_INSTR(spill2);
- }
-
- /* Update the rreg_state to reflect the new assignment for this
- rreg. */
- rreg_state[spillee].vreg = vreg;
- vreg_state[m] = INVALID_RREG_NO;
-
- rreg_state[spillee].eq_spill_slot = False; /* be safe */
-
- m = hregIndex(vreg);
- vassert(IS_VALID_VREGNO(m));
- vreg_state[m] = toShort(spillee);
-
- /* Now, if this vreg is being read or modified (as opposed to
- written), we have to generate a reload for it. */
- if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
- vassert(vreg_lrs[m].reg_class != HRcINVALID);
- HInstr* reload1 = NULL;
- HInstr* reload2 = NULL;
- (*genReload)( &reload1, &reload2, univ->regs[spillee],
- vreg_lrs[m].spill_offset, mode64 );
- vassert(reload1 || reload2); /* can't both be NULL */
- if (reload1)
- EMIT_INSTR(reload1);
- if (reload2)
- EMIT_INSTR(reload2);
- /* This rreg is read or modified by the instruction.
- If it's merely read we can claim it now equals the
- spill slot, but not so if it is modified. */
- if (reg_usage_arr[ii].vMode[j] == HRmRead) {
- rreg_state[spillee].eq_spill_slot = True;
- } else {
- vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
- rreg_state[spillee].eq_spill_slot = False;
- }
- }
-
- /* So after much twisting and turning, we have vreg mapped to
- rreg_state[spillee].rreg. Note that in the map. */
- addToHRegRemap(&remap, vreg, univ->regs[spillee]);
-
- } /* iterate over virtual registers in this instruction. */
-
- /* We've finished clowning around with registers in this instruction.
- Three results:
- - the running rreg_state[] has been updated
- - a suitable vreg->rreg mapping for this instruction has been
- constructed
- - spill and reload instructions may have been emitted.
-
- The final step is to apply the mapping to the instruction,
- and emit that.
- */
-
- /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
- (*mapRegs)( &remap, instrs_in->arr[ii], mode64 );
- EMIT_INSTR( instrs_in->arr[ii] );
-
- if (DEBUG_REGALLOC) {
- vex_printf("After dealing with current insn:\n");
- PRINT_STATE;
- vex_printf("\n");
- }
-
- /* ------ Post-instruction actions for fixed rreg uses ------ */
-
- /* Now we need to check for rregs exiting fixed live ranges
- after this instruction, and if so mark them as free. */
- while (True) {
- vassert(rreg_lrs_db_next >= 0);
- vassert(rreg_lrs_db_next <= rreg_lrs_used);
- if (rreg_lrs_db_next == rreg_lrs_used)
- break; /* no more real reg live ranges to consider */
- if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
- break; /* next live range does not yet start */
- vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
- /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
- range. Mark it as such in the main rreg_state array. */
- HReg reg = rreg_lrs_db[rreg_lrs_db_next].rreg;
- vassert(!hregIsVirtual(reg));
- Int k = hregIndex(reg);
- vassert(IS_VALID_RREGNO(k));
- vassert(rreg_state[k].disp == Unavail);
- rreg_state[k].disp = Free;
- rreg_state[k].vreg = INVALID_HREG;
- rreg_state[k].eq_spill_slot = False;
-
- /* check for further rregs leaving HLRs at this point */
- rreg_lrs_db_next++;
- }
-
- if (DEBUG_REGALLOC) {
- vex_printf("After post-insn actions for fixed regs:\n");
- PRINT_STATE;
- vex_printf("\n");
- }
-
- } /* iterate over insns */
-
- /* ------ END: Process each insn in turn. ------ */
-
- /* free(rreg_state); */
- /* free(rreg_lrs); */
- /* if (vreg_lrs) free(vreg_lrs); */
-
- /* Paranoia */
- vassert(rreg_lrs_la_next == rreg_lrs_used);
- vassert(rreg_lrs_db_next == rreg_lrs_used);
-
- return instrs_out;
-
-# undef INVALID_INSTRNO
-# undef EMIT_INSTR
-# undef PRINT_STATE
-}
-
-
-
-/*---------------------------------------------------------------*/
-/*--- host_reg_alloc2.c ---*/
-/*---------------------------------------------------------------*/
--- /dev/null
+/*----------------------------------------------------------------------------*/
+/*--- begin host_generic_reg_alloc3.c ---*/
+/*----------------------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation framework.
+
+ Copyright (C) 2017-2017 Ivo Raisr
+ ivosh@ivosh.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+#include "libvex_basictypes.h"
+#include "libvex.h"
+
+#include "main_util.h"
+#include "host_generic_regs.h"
+
+/* Set to 1 for lots of debugging output. */
+#define DEBUG_REGALLOC 0
+
+/* Set to 1 for sanity checking at every instruction.
+ Set to 0 for sanity checking only every 17th one and the last one. */
+#define SANITY_CHECKS_EVERY_INSTR 0
+
+
+#define INVALID_INSTRNO (-2)
+
+/* Register allocator state is kept in an array of VRegState's.
+ There is an element for every virtual register (vreg).
+ Elements are indexed [0 .. n_vregs-1].
+ Records information about vreg live range and its state. */
+typedef
+ struct {
+ /* Live range, register class and spill offset are computed during the
+ first register allocator pass and remain unchanged after that. */
+
+ /* This vreg becomes live with this instruction (inclusive). Contains
+ either an instruction number or INVALID_INSTRNO. */
+ Short live_after;
+ /* This vreg becomes dead before this instruction (exclusive). Contains
+ either an instruction number or INVALID_INSTRNO. */
+ Short dead_before;
+ /* What kind of register this is. */
+ HRegClass reg_class;
+
+ /* What is its current disposition? */
+ enum { Unallocated, /* Neither spilled nor assigned to a real reg. */
+ Assigned, /* Assigned to a real register, viz rreg. */
+ Spilled /* Spilled to the spill slot. */
+ } disp;
+
+ /* If .disp == Assigned, what rreg is it bound to? */
+ HReg rreg;
+
+ /* The "home" spill slot. The offset is relative to the beginning of
+ the guest state. */
+ UShort spill_offset;
+ }
+ VRegState;
+
+/* Records information on a real-register live range, associated with
+ a particular real register. Computed once; does not change. */
+typedef
+ struct {
+ /* This rreg becomes live with this instruction (inclusive). Contains
+ either an instruction number or INVALID_INSTRNO. */
+ Short live_after;
+ /* This rreg becomes dead before this instruction (exclusive). Contains
+ either an instruction number or INVALID_INSTRNO. */
+ Short dead_before;
+ }
+ RRegLR;
+
+/* The allocator also maintains a redundant array of indexes (rreg_state) from
+ rreg numbers back to entries in vreg_state. It is redundant because iff
+ rreg_state[r] == v then hregNumber(vreg_state[v].rreg) == r -- that is, the
+ two entries point at each other. The purpose of this is to speed up
+ activities which involve looking for a particular rreg: there is no need to
+ scan the vreg_state looking for it, just index directly into rreg_state.
+ The FAQ "does this rreg already have an associated vreg" is the main
+ beneficiary.
+ The identity of the real register is not recorded here, because the index
+ of this structure in |rreg_state| is the index number of the register, and
+ the register itself can be extracted from the RRegUniverse (univ). */
+typedef
+ struct {
+ /* What is its current disposition? */
+ enum { Free, /* Not bound to any vreg. */
+ Bound, /* Bound to a vreg, viz vreg. */
+ Reserved /* Reserved for an instruction. */
+ } disp;
+
+ /* If .disp == Bound, what vreg is it bound to? */
+ HReg vreg;
+
+ /* Live ranges. Computed during the first register allocator pass and
+ remain unchanged after that. */
+ RRegLR* lrs;
+ UInt lrs_size;
+ UInt lrs_used;
+
+ /* Live range corresponding to the currently processed instruction.
+ Index into |lrs| array. */
+ UInt lr_current;
+ }
+ RRegState;
+
+#define IS_VALID_VREGNO(v) ((v) >= 0 && (v) < n_vregs)
+#define IS_VALID_RREGNO(r) ((r) >= 0 && (r) < n_rregs)
+
+/* Compute the index of the highest and lowest 1 in a ULong, respectively.
+ Results are undefined if the argument is zero. Don't pass it zero :) */
+static inline UInt ULong__maxIndex ( ULong w64 ) {
+ return 63 - __builtin_clzll(w64);
+}
+
+static inline UInt ULong__minIndex ( ULong w64 ) {
+ return __builtin_ctzll(w64);
+}
+
+static inline void enlarge_rreg_lrs(RRegState* rreg)
+{
+ vassert(rreg->lrs_used == rreg->lrs_size);
+
+ RRegLR* lr2 = LibVEX_Alloc_inline(2 * rreg->lrs_used * sizeof(RRegLR));
+ for (UInt l = 0; l < rreg->lrs_used; l++) {
+ lr2[l] = rreg->lrs[l];
+ }
+
+ rreg->lrs = lr2;
+ rreg->lrs_size = 2 * rreg->lrs_used;
+}
+
+static inline void print_state(
+ const RegAllocControl* con,
+ VRegState* vreg_state, UInt n_vregs,
+ RRegState* rreg_state, UInt n_rregs,
+ UShort current_ii)
+{
+ for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
+ const VRegState* vreg = &vreg_state[v_idx];
+
+ if (vreg->live_after == INVALID_INSTRNO) {
+ continue; /* This is a dead vreg. Never comes into live. */
+ }
+ vex_printf("vreg_state[%3u] \t", v_idx);
+
+ UInt written;
+ switch (vreg->disp) {
+ case Unallocated:
+ written = vex_printf("unallocated");
+ break;
+ case Assigned:
+ written = vex_printf("assigned to ");
+ written += con->ppReg(vreg->rreg);
+ break;
+ case Spilled:
+ written = vex_printf("spilled at offset %u", vreg->spill_offset);
+ break;
+ default:
+ vassert(0);
+ }
+
+ for (Int w = 30 - written; w > 0; w--) {
+ vex_printf(" ");
+ }
+
+ if (vreg->live_after > (Short) current_ii) {
+ vex_printf("[not live yet]\n");
+ } else if ((Short) current_ii >= vreg->dead_before) {
+ vex_printf("[now dead]\n");
+ } else {
+ vex_printf("[live]\n");
+ }
+ }
+
+ for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
+ const RRegState* rreg = &rreg_state[r_idx];
+ vex_printf("rreg_state[%2u] = ", r_idx);
+ UInt written = con->ppReg(con->univ->regs[r_idx]);
+ for (Int w = 10 - written; w > 0; w--) {
+ vex_printf(" ");
+ }
+
+ switch (rreg->disp) {
+ case Free:
+ vex_printf("free\n");
+ break;
+ case Bound:
+ vex_printf("bound for ");
+ con->ppReg(rreg->vreg);
+ vex_printf("\n");
+ break;
+ case Reserved:
+ vex_printf("reserved - live range [%d, %d)\n",
+ rreg->lrs[rreg->lr_current].live_after,
+ rreg->lrs[rreg->lr_current].dead_before);
+ break;
+ }
+ }
+}
+
+static inline void emit_instr(HInstr* instr, HInstrArray* instrs_out,
+ const RegAllocControl* con, const HChar* why)
+{
+ if (DEBUG_REGALLOC) {
+ vex_printf("** ");
+ con->ppInstr(instr, con->mode64);
+ if (why != NULL) {
+ vex_printf(" (%s)", why);
+ }
+ vex_printf("\n\n");
+ }
+
+ addHInstr(instrs_out, instr);
+}
+
+/* Spills a vreg assigned to some rreg.
+ The vreg is spilled and the rreg is freed.
+ Returns rreg's index. */
+static inline UInt spill_vreg(
+ HReg vreg, UInt v_idx, UInt current_ii, VRegState* vreg_state, UInt n_vregs,
+ RRegState* rreg_state, UInt n_rregs, HInstrArray* instrs_out,
+ const RegAllocControl* con)
+{
+ /* Check some invariants first. */
+ vassert(IS_VALID_VREGNO((v_idx)));
+ vassert(vreg_state[v_idx].disp == Assigned);
+ HReg rreg = vreg_state[v_idx].rreg;
+ UInt r_idx = hregIndex(rreg);
+ vassert(IS_VALID_RREGNO(r_idx));
+ vassert(hregClass(con->univ->regs[r_idx]) == hregClass(vreg));
+ vassert(vreg_state[v_idx].dead_before > (Short) current_ii);
+ vassert(vreg_state[v_idx].reg_class != HRcINVALID);
+
+ /* Generate spill. */
+ HInstr* spill1 = NULL;
+ HInstr* spill2 = NULL;
+ con->genSpill(&spill1, &spill2, rreg, vreg_state[v_idx].spill_offset,
+ con->mode64);
+ vassert(spill1 != NULL || spill2 != NULL); /* cannot be both NULL */
+ if (spill1 != NULL) {
+ emit_instr(spill1, instrs_out, con, "spill1");
+ }
+ if (spill2 != NULL) {
+ emit_instr(spill2, instrs_out, con, "spill2");
+ }
+
+ /* Update register allocator state. */
+ vreg_state[v_idx].disp = Spilled;
+ vreg_state[v_idx].rreg = INVALID_HREG;
+ rreg_state[r_idx].disp = Free;
+ rreg_state[r_idx].vreg = INVALID_HREG;
+
+ return r_idx;
+}
+
+/* Chooses a vreg to be spilled based on various criteria.
+ The vreg must not be from the instruction being processed, that is, it must
+ not be listed in reg_usage->vRegs. */
+static inline HReg find_vreg_to_spill(
+ VRegState* vreg_state, UInt n_vregs,
+ RRegState* rreg_state, UInt n_rregs,
+ const HRegUsage* instr_regusage, HRegClass target_hregclass,
+ const HRegUsage* reg_usage, UInt scan_forward_from, UInt scan_forward_max,
+ const RegAllocControl* con)
+{
+ /* Scan forwards a few instructions to find the most distant mentioned
+ use of a vreg. We can scan in the range of (inclusive):
+ - reg_usage[scan_forward_from]
+ - reg_usage[scan_forward_end], where scan_forward_end
+ = MIN(scan_forward_max, scan_forward_from + FEW_INSTRUCTIONS). */
+# define FEW_INSTRUCTIONS 5
+ UInt scan_forward_end
+ = (scan_forward_max <= scan_forward_from + FEW_INSTRUCTIONS) ?
+ scan_forward_max : scan_forward_from + FEW_INSTRUCTIONS;
+# undef FEW_INSTRUCTIONS
+
+ HReg vreg_found = INVALID_HREG;
+ UInt distance_so_far = 0;
+
+ for (UInt r_idx = con->univ->allocable_start[target_hregclass];
+ r_idx <= con->univ->allocable_end[target_hregclass]; r_idx++) {
+ if (rreg_state[r_idx].disp == Bound) {
+ HReg vreg = rreg_state[r_idx].vreg;
+ if (! HRegUsage__contains(instr_regusage, vreg)) {
+ UInt ii = scan_forward_from;
+ for ( ; ii <= scan_forward_end; ii++) {
+ if (HRegUsage__contains(®_usage[ii], vreg)) {
+ break;
+ }
+ }
+
+ if (ii - scan_forward_from > distance_so_far) {
+ distance_so_far = ii = scan_forward_from;
+ vreg_found = vreg;
+ if (ii + distance_so_far == scan_forward_end) {
+ break; /* We are at the end. Nothing could be better. */
+ }
+ }
+ }
+ }
+ }
+
+ if (hregIsInvalid(vreg_found)) {
+ vex_printf("doRegisterAllocation_v3: cannot find a register in class: ");
+ ppHRegClass(target_hregclass);
+ vex_printf("\n");
+ vpanic("doRegisterAllocation_v3: cannot find a register.");
+ }
+
+ return vreg_found;
+}
+
+/* Find a free rreg of the correct class.
+ Tries to find an rreg whose live range (if any) is as far ahead in the
+ incoming instruction stream as possible. An ideal rreg candidate is
+ a callee-save register because it won't be used for parameter passing
+ around helper function calls. */
+static Bool find_free_rreg(
+ VRegState* vreg_state, UInt n_vregs,
+ RRegState* rreg_state, UInt n_rregs,
+ UInt current_ii, HRegClass target_hregclass,
+ Bool reserve_phase, const RegAllocControl* con, UInt* r_idx_found)
+{
+ Bool found = False;
+ UInt distance_so_far = 0; /* running max for |live_after - current_ii| */
+
+ for (UInt r_idx = con->univ->allocable_start[target_hregclass];
+ r_idx <= con->univ->allocable_end[target_hregclass]; r_idx++) {
+ const RRegState* rreg = &rreg_state[r_idx];
+ if (rreg->disp == Free) {
+ if (rreg->lrs_used == 0) {
+ found = True;
+ *r_idx_found = r_idx;
+ break; /* There could be nothing better, so break now. */
+ } else {
+ const RRegLR* lr = &rreg->lrs[rreg->lr_current];
+ if (lr->live_after > (Short) current_ii) {
+ /* Not live, yet. */
+ if ((lr->live_after - (Short) current_ii) > distance_so_far) {
+ distance_so_far = lr->live_after - (Short) current_ii;
+ found = True;
+ *r_idx_found = r_idx;
+ }
+ } else if ((Short) current_ii >= lr->dead_before) {
+ /* Now dead. Effectively as if there is no LR now. */
+ found = True;
+ *r_idx_found = r_idx;
+ break; /* There could be nothing better, so break now. */
+ } else {
+ /* Going live for this instruction. This could happen only when
+ rregs are being reserved en mass, for example before
+ a helper call. */
+ vassert(reserve_phase);
+ }
+ }
+ }
+ }
+
+ return found;
+}
+
+/* A target-independent register allocator (v3). Requires various functions
+ which it uses to deal abstractly with instructions and registers, since it
+ cannot have any target-specific knowledge.
+
+ Returns a new list of instructions, which, as a result of the behaviour of
+ mapRegs, will be in-place modifications of the original instructions.
+
+ Requires that the incoming code has been generated using vreg numbers
+ 0, 1 .. n_vregs-1. Appearance of a vreg outside that range is a checked
+ run-time error.
+
+ Takes unallocated instructions and returns allocated instructions.
+*/
+HInstrArray* doRegisterAllocation(
+ /* Incoming virtual-registerised code. */
+ HInstrArray* instrs_in,
+
+ /* Register allocator controls to use. */
+ const RegAllocControl* con
+)
+{
+ vassert((con->guest_sizeB % LibVEX_GUEST_STATE_ALIGN) == 0);
+
+ /* The main register allocator state. */
+ UInt n_vregs = instrs_in->n_vregs;
+ VRegState* vreg_state = NULL;
+ if (n_vregs > 0) {
+ vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(VRegState));
+ }
+
+ /* Redundant rreg -> vreg state. */
+ UInt n_rregs = con->univ->allocable;
+ RRegState* rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState));
+
+ /* If this is not so, the universe we have is nonsensical. */
+ vassert(n_rregs > 0);
+ STATIC_ASSERT(N_RREGUNIVERSE_REGS == 64);
+
+ /* Info on register usage in the incoming instruction array. Computed once
+ and remains unchanged, more or less; updated sometimes by the
+ direct-reload optimisation. */
+ HRegUsage* reg_usage
+ = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used);
+
+ /* The live range numbers are signed shorts, and so limiting the
+ number of instructions to 15000 comfortably guards against them
+ overflowing 32k. */
+ vassert(instrs_in->arr_used <= 15000);
+
+ /* The output array of instructions. */
+ HInstrArray* instrs_out = newHInstrArray();
+
+
+# define OFFENDING_VREG(_v_idx, _instr, _mode) \
+ do { \
+ vex_printf("\n\nOffending vreg = %u\n", (_v_idx)); \
+ vex_printf("\nOffending instruction = "); \
+ con->ppInstr((_instr), con->mode64); \
+ vex_printf("\n"); \
+ vpanic("doRegisterAllocation_v3: first event for vreg is #_mode# \
+ (should be Write)"); \
+ } while (0)
+
+# define OFFENDING_RREG(_r_idx, _instr, _mode) \
+ do { \
+ vex_printf("\n\nOffending rreg = "); \
+ con->ppReg(con->univ->regs[(_r_idx)]); \
+ vex_printf("\nOffending instruction = "); \
+ con->ppInstr((_instr), con->mode64); \
+ vex_printf("\n"); \
+ vpanic("doRegisterAllocation_v3: first event for rreg is #_mode# \
+ (should be Write)"); \
+ } while (0)
+
+
+/* Finds an rreg of the correct class.
+ If a free rreg is not found, then spills a vreg not used by the current
+ instruction and makes free the corresponding rreg. */
+# define FIND_OR_MAKE_FREE_RREG(_ii, _v_idx, _reg_class, _reserve_phase) \
+ ({ \
+ UInt _r_free_idx = -1; \
+ Bool free_rreg_found = find_free_rreg(vreg_state, n_vregs, \
+ rreg_state, n_rregs, (_ii), \
+ (_reg_class), (_reserve_phase), \
+ con, &_r_free_idx); \
+ if (!free_rreg_found) { \
+ HReg vreg_to_spill = find_vreg_to_spill( \
+ vreg_state, n_vregs, rreg_state, n_rregs, \
+ ®_usage[(_ii)], (_reg_class), \
+ reg_usage, (_ii) + 1, \
+ instrs_in->arr_used - 1, con); \
+ _r_free_idx = spill_vreg(vreg_to_spill, hregIndex(vreg_to_spill), \
+ (_ii), vreg_state, n_vregs, \
+ rreg_state, n_rregs, \
+ instrs_out, con); \
+ } \
+ \
+ vassert(IS_VALID_RREGNO(_r_free_idx)); \
+ \
+ _r_free_idx; \
+ })
+
+
+ /* --- Stage 0. Initialize the state. --- */
+ for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
+ vreg_state[v_idx].live_after = INVALID_INSTRNO;
+ vreg_state[v_idx].dead_before = INVALID_INSTRNO;
+ vreg_state[v_idx].reg_class = HRcINVALID;
+ vreg_state[v_idx].disp = Unallocated;
+ vreg_state[v_idx].rreg = INVALID_HREG;
+ vreg_state[v_idx].spill_offset = 0;
+ }
+
+ for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
+ RRegState* rreg = &rreg_state[r_idx];
+ rreg->disp = Free;
+ rreg->vreg = INVALID_HREG;
+ rreg->lrs_size = 4;
+ rreg->lrs = LibVEX_Alloc_inline(rreg->lrs_size * sizeof(RRegLR));
+ rreg->lrs_used = 0;
+ rreg->lr_current = 0;
+ }
+
+
+ /* --- Stage 1. Scan the incoming instructions. --- */
+ for (UShort ii = 0; ii < instrs_in->arr_used; ii++) {
+ const HInstr* instr = instrs_in->arr[ii];
+
+ con->getRegUsage(®_usage[ii], instr, con->mode64);
+
+ if (0) {
+ vex_printf("\n%u stage 1: ", ii);
+ con->ppInstr(instr, con->mode64);
+ vex_printf("\n");
+ ppHRegUsage(con->univ, ®_usage[ii]);
+ }
+
+ /* Process virtual registers mentioned in the instruction. */
+ for (UInt j = 0; j < reg_usage[ii].n_vRegs; j++) {
+ HReg vreg = reg_usage[ii].vRegs[j];
+ vassert(hregIsVirtual(vreg));
+
+ UInt v_idx = hregIndex(vreg);
+ if (!IS_VALID_VREGNO(v_idx)) {
+ vex_printf("\n");
+ con->ppInstr(instr, con->mode64);
+ vex_printf("\n");
+ vex_printf("vreg %u (n_vregs %u)\n", v_idx, n_vregs);
+ vpanic("doRegisterAllocation_v3: out-of-range vreg");
+ }
+
+ /* Note the register class. */
+ if (vreg_state[v_idx].reg_class == HRcINVALID) {
+ /* First mention of this vreg. */
+ vreg_state[v_idx].reg_class = hregClass(vreg);
+ } else {
+ /* Seen it before, so check for consistency. */
+ vassert(vreg_state[v_idx].reg_class == hregClass(vreg));
+ }
+
+ /* Consider live ranges. */
+ switch (reg_usage[ii].vMode[j]) {
+ case HRmRead:
+ if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
+ OFFENDING_VREG(v_idx, instr, "Read");
+ }
+ vreg_state[v_idx].dead_before = toShort(ii + 1);
+ break;
+ case HRmWrite:
+ if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
+ vreg_state[v_idx].live_after = toShort(ii);
+ }
+ vreg_state[v_idx].dead_before = toShort(ii + 1);
+ break;
+ case HRmModify:
+ if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
+ OFFENDING_VREG(v_idx, instr, "Modify");
+ }
+ vreg_state[v_idx].dead_before = toShort(ii + 1);
+ break;
+ default:
+ vassert(0);
+ }
+ }
+
+ /* Process real registers mentioned in the instruction. */
+ const ULong rRead = reg_usage[ii].rRead;
+ const ULong rWritten = reg_usage[ii].rWritten;
+ const ULong rMentioned = rRead | rWritten;
+
+ if (rMentioned != 0) {
+ UInt rReg_minIndex = ULong__minIndex(rMentioned);
+ UInt rReg_maxIndex = ULong__maxIndex(rMentioned);
+ /* Don't bother to look at registers which are not available
+ to the allocator such as the stack or guest state pointers. These
+ are unavailable to the register allocator and so we never visit
+ them. We asserted above that n_rregs > 0, so (n_rregs - 1) is
+ safe. */
+ if (rReg_maxIndex >= n_rregs) {
+ rReg_maxIndex = n_rregs - 1;
+ }
+
+ for (UInt r_idx = rReg_minIndex; r_idx <= rReg_maxIndex; r_idx++) {
+ const ULong jMask = 1ULL << r_idx;
+
+ if (LIKELY((rMentioned & jMask) == 0)) {
+ continue;
+ }
+
+ RRegState* rreg = &rreg_state[r_idx];
+ const Bool isR = (rRead & jMask) != 0;
+ const Bool isW = (rWritten & jMask) != 0;
+
+ if (isW && !isR) {
+ if (rreg->lrs_used == rreg->lrs_size) {
+ enlarge_rreg_lrs(rreg);
+ }
+
+ rreg->lrs[rreg->lrs_used].live_after = ii;
+ rreg->lrs[rreg->lrs_used].dead_before = ii + 1;
+ rreg->lrs_used += 1;
+ } else if (!isW && isR) {
+ if ((rreg->lrs_used == 0)
+ || (rreg->lrs[rreg->lrs_used - 1].live_after
+ == INVALID_INSTRNO)) {
+ OFFENDING_RREG(r_idx, instr, "Read");
+ }
+ rreg->lrs[rreg->lrs_used - 1].dead_before = ii + 1;
+ } else {
+ vassert(isR && isW);
+ if ((rreg->lrs_used == 0)
+ || (rreg->lrs[rreg->lrs_used - 1].live_after
+ == INVALID_INSTRNO)) {
+ OFFENDING_RREG(r_idx, instr, "Modify");
+ }
+ rreg->lrs[rreg->lrs_used - 1].dead_before = ii + 1;
+ }
+ }
+ }
+ }
+
+ if (DEBUG_REGALLOC) {
+ for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
+ vex_printf("vreg %3u: [%3d, %3d)\n",
+ v_idx, vreg_state[v_idx].live_after,
+ vreg_state[v_idx].dead_before);
+ }
+
+ for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
+ const RRegState* rreg = &rreg_state[r_idx];
+ vex_printf("rreg %2u (", r_idx);
+ UInt written = con->ppReg(con->univ->regs[r_idx]);
+ vex_printf("):");
+ for (Int t = 15 - written; t > 0; t--) {
+ vex_printf(" ");
+ }
+
+ for (UInt l = 0; l < rreg->lrs_used; l++) {
+ vex_printf("[%3d, %3d) ",
+ rreg->lrs[l].live_after, rreg->lrs[l].dead_before);
+ }
+ vex_printf("\n");
+ }
+ }
+
+ /* --- Stage 2. Allocate spill slots. --- */
+
+ /* Each spill slot is 8 bytes long. For vregs which take more than 64 bits
+ to spill (for example classes Flt64 and Vec128), we have to allocate two
+ consecutive spill slots. For 256 bit registers (class Vec256), we have to
+ allocate four consecutive spill slots.
+
+ For Vec128-class on PowerPC, the spill slot's actual address must be
+ 16-byte aligned. Since the spill slot's address is computed as an offset
+ from the guest state pointer, and since the user of the generated code
+ must set that pointer to a 32-byte aligned value, we have the residual
+ obligation here of choosing a 16-byte aligned spill slot offset for
+ Vec128-class values. Since each spill slot is 8 bytes long, that means for
+ Vec128-class values we must allocate a spill slot number which is
+ zero mod 2.
+
+ Similarly, for Vec256 class on amd64, find a spill slot number which is
+ zero mod 4. This guarantees it will be 32-byte aligned, which isn't
+ actually necessary on amd64 (we use movUpd etc to spill), but seems like
+ a good practice.
+
+ Do a rank-based allocation of vregs to spill slot numbers. We put as few
+ values as possible in spill slots, but nevertheless need to have a spill
+ slot available for all vregs, just in case. */
+
+# define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8)
+ STATIC_ASSERT((N_SPILL64S % 2) == 0);
+ STATIC_ASSERT((LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN) == 0);
+
+ Short ss_busy_until_before[N_SPILL64S];
+ vex_bzero(&ss_busy_until_before, sizeof(ss_busy_until_before));
+
+ for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
+ /* True iff this vreg is unused. In which case we also expect that the
+ reg_class field for it has not been set. */
+ if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
+ vassert(vreg_state[v_idx].reg_class == HRcINVALID);
+ continue;
+ }
+
+ /* The spill slots are 64 bits in size. As per the comment on definition
+ of HRegClass in host_generic_regs.h, that means, to spill a vreg of
+ class Flt64 or Vec128, we'll need to find two adjacent spill slots to
+ use. For Vec256, we'll need to find four adjacent slots to use. Note,
+ this logic needs to be kept in sync with the size info on the
+ definition of HRegClass. */
+ UInt ss_no;
+ switch (vreg_state[v_idx].reg_class) {
+ case HRcFlt64:
+ case HRcVec128:
+ /* Find two adjacent free slots which provide up to 128 bits to
+ spill the vreg. Since we are trying to find an even:odd pair,
+ move along in steps of 2 (slots). */
+ for (ss_no = 0; ss_no < N_SPILL64S - 1; ss_no += 2)
+ if (ss_busy_until_before[ss_no + 0] <= vreg_state[v_idx].live_after
+ && ss_busy_until_before[ss_no + 1] <= vreg_state[v_idx].live_after)
+ break;
+ if (ss_no >= N_SPILL64S - 1) {
+ vpanic("N_SPILL64S is too low in VEX. Increase and recompile.");
+ }
+ ss_busy_until_before[ss_no + 0] = vreg_state[v_idx].dead_before;
+ ss_busy_until_before[ss_no + 1] = vreg_state[v_idx].dead_before;
+ break;
+ default:
+ /* The ordinary case -- just find a single lowest-numbered spill
+ slot which is available at the start point of this interval,
+ and assign the interval to it. */
+ for (ss_no = 0; ss_no < N_SPILL64S; ss_no++) {
+ if (ss_busy_until_before[ss_no] <= vreg_state[v_idx].live_after)
+ break;
+ }
+ if (ss_no == N_SPILL64S) {
+ vpanic("N_SPILL64S is too low in VEX. Increase and recompile.");
+ }
+ ss_busy_until_before[ss_no] = vreg_state[v_idx].dead_before;
+ break;
+ }
+
+ /* This reflects VEX's hard-wired knowledge of the guest state layout:
+ the guest state itself, then two equal sized areas following it for two
+ sets of shadow state, and then the spill area. */
+ vreg_state[v_idx].spill_offset
+ = toShort(con->guest_sizeB * 3 + ss_no * 8);
+
+ /* Independent check that we've made a sane choice of the slot. */
+ switch (vreg_state[v_idx].reg_class) {
+ case HRcVec128: case HRcFlt64:
+ vassert((vreg_state[v_idx].spill_offset % 16) == 0);
+ break;
+ default:
+ vassert((vreg_state[v_idx].spill_offset % 8) == 0);
+ break;
+ }
+ }
+
+ if (0) {
+ vex_printf("\n\n");
+ for (UInt v_idx = 0; v_idx < n_vregs; v_idx++)
+ vex_printf("vreg %3u --> spill offset %u\n",
+ v_idx, vreg_state[v_idx].spill_offset);
+ }
+
+
+ /* --- State 3. Process instructions. --- */
+ for (UShort ii = 0; ii < instrs_in->arr_used; ii++) {
+ HInstr* instr = instrs_in->arr[ii];
+
+ if (DEBUG_REGALLOC) {
+ vex_printf("\n====----====---- Instr %d ----====----====\n", ii);
+ vex_printf("---- ");
+ con->ppInstr(instrs_in->arr[ii], con->mode64);
+ vex_printf("\n\nInitial state:\n");
+ print_state(con, vreg_state, n_vregs, rreg_state, n_rregs, ii);
+ vex_printf("\n");
+ }
+
+ /* ------------ Sanity checks ------------ */
+
+ /* Sanity checks are relatively expensive. So they are done only once
+ every 17 instructions, and just before the last instruction. */
+ Bool do_sanity_check
+ = toBool(
+ SANITY_CHECKS_EVERY_INSTR
+ || ii == instrs_in->arr_used - 1
+ || (ii > 0 && (ii % 17) == 0)
+ );
+
+ if (do_sanity_check) {
+ /* Sanity check: the vreg_state and rreg_state mutually-redundant
+ mappings are consistent. If vreg_state[v].rreg points at some
+ rreg_state entry then that rreg_state entry should point back at
+ vreg_state[v]. */
+ for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
+ if (vreg_state[v_idx].disp == Assigned) {
+ vassert(!hregIsVirtual(vreg_state[v_idx].rreg));
+
+ UInt r_idx = hregIndex(vreg_state[v_idx].rreg);
+ vassert(IS_VALID_RREGNO(r_idx));
+ vassert(rreg_state[r_idx].disp == Bound);
+ vassert(hregIndex(rreg_state[r_idx].vreg) == v_idx);
+
+ vassert(hregClass(vreg_state[v_idx].rreg)
+ == hregClass(con->univ->regs[r_idx]));
+ }
+ }
+
+ for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
+ if (rreg_state[r_idx].disp == Bound) {
+ vassert(hregIsVirtual(rreg_state[r_idx].vreg));
+
+ UInt v_idx = hregIndex(rreg_state[r_idx].vreg);
+ vassert(IS_VALID_VREGNO(v_idx));
+ vassert(vreg_state[v_idx].disp == Assigned);
+ vassert(hregIndex(vreg_state[v_idx].rreg) == r_idx);
+ }
+ }
+
+ /* Sanity check: if rreg has been marked as Reserved, there must be
+ a corresponding hard live range for it. */
+ for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
+ const RRegState* rreg = &rreg_state[r_idx];
+ if (rreg->disp == Reserved) {
+ vassert(rreg->lrs_used > 0);
+ vassert(rreg->lr_current < rreg->lrs_used);
+ vassert(rreg->lrs[rreg->lr_current].live_after <= (Short) ii);
+ vassert((Short) ii < rreg->lrs[rreg->lr_current].dead_before);
+ }
+ }
+ }
+
+
+ /* --- MOV coalescing --- */
+ /* Optimise register coalescing:
+ MOV v <-> v coalescing (done here).
+ MOV v <-> r coalescing (TODO: not yet). */
+ /* If doing a reg-reg move between two vregs, and the src's live
+ range ends here and the dst's live range starts here, bind the dst
+ to the src's rreg, and that's all. */
+ HReg vregS = INVALID_HREG;
+ HReg vregD = INVALID_HREG;
+ if (con->isMove(instr, &vregS, &vregD)) {
+ if (hregIsVirtual(vregS) && hregIsVirtual(vregD)) {
+ /* Check that |isMove| is not telling us a bunch of lies ... */
+ vassert(hregClass(vregS) == hregClass(vregD));
+ UInt vs_idx = hregIndex(vregS);
+ UInt vd_idx = hregIndex(vregD);
+ vassert(IS_VALID_VREGNO(vs_idx));
+ vassert(IS_VALID_VREGNO(vd_idx));
+
+ if ((vreg_state[vs_idx].dead_before == ii + 1)
+ && (vreg_state[vd_idx].live_after == ii)
+ && (vreg_state[vs_idx].disp == Assigned)) {
+
+ /* Live ranges are adjacent and source vreg is bound.
+ Finally we can do the coalescing. */
+ HReg rreg = vreg_state[vs_idx].rreg;
+ vreg_state[vd_idx].disp = Assigned;
+ vreg_state[vd_idx].rreg = rreg;
+ vreg_state[vs_idx].disp = Unallocated;
+ vreg_state[vs_idx].rreg = INVALID_HREG;
+
+ UInt r_idx = hregIndex(rreg);
+ vassert(rreg_state[r_idx].disp == Bound);
+ rreg_state[r_idx].vreg = vregD;
+
+ if (DEBUG_REGALLOC) {
+ vex_printf("coalesced: ");
+ con->ppReg(vregS);
+ vex_printf(" -> ");
+ con->ppReg(vregD);
+ vex_printf("\n\n");
+ }
+
+ /* In rare cases it can happen that vregD's live range ends
+ here. Check and eventually free the vreg and rreg.
+ This effectively means that either the translated program
+ contained dead code (but VEX iropt passes are pretty good
+ at eliminating it) or the VEX backend generated dead code. */
+ if (vreg_state[vd_idx].dead_before <= (Short) ii + 1) {
+ vreg_state[vd_idx].disp = Unallocated;
+ vreg_state[vd_idx].rreg = INVALID_HREG;
+ rreg_state[r_idx].disp = Free;
+ rreg_state[r_idx].vreg = INVALID_HREG;
+ }
+
+ /* Move on to the next instruction. We skip the post-instruction
+ stuff because all required house-keeping was done here. */
+ continue;
+ }
+ }
+ }
+
+
+ /* --- Reserve and free rregs if needed. --- */
+ /* If the rreg enters its hard live range and is not free:
+ 1. If the corresponding vreg is not used by the instruction, spill it.
+ 2. If the corresponding vreg is used by the instruction, then:
+ 2a. If there are no free rregs, spill a vreg not used by this
+ instruction.
+ 2b. Move the corresponding vreg to a free rreg. This is better than
+ spilling it and immediatelly reloading it.
+ */
+ const ULong rRead = reg_usage[ii].rRead;
+ const ULong rWritten = reg_usage[ii].rWritten;
+ const ULong rMentioned = rRead | rWritten;
+
+ if (rMentioned != 0) {
+ UInt rReg_minIndex = ULong__minIndex(rMentioned);
+ UInt rReg_maxIndex = ULong__maxIndex(rMentioned);
+ if (rReg_maxIndex >= n_rregs) {
+ rReg_maxIndex = n_rregs - 1;
+ }
+
+ for (UInt r_idx = rReg_minIndex; r_idx <= rReg_maxIndex; r_idx++) {
+ const ULong jMask = 1ULL << r_idx;
+
+ if (LIKELY((rMentioned & jMask) == 0)) {
+ continue;
+ }
+
+ RRegState* rreg = &rreg_state[r_idx];
+ if (LIKELY(rreg->lrs_used == 0)) {
+ continue;
+ }
+ if (rreg->disp == Reserved) {
+ continue;
+ }
+
+ if ((rreg->lrs[rreg->lr_current].live_after <= (Short) ii)
+ && ((Short) ii < rreg->lrs[rreg->lr_current].dead_before)) {
+
+ if (rreg->disp == Bound) {
+ /* Yes, there is an associated vreg. We need to deal with
+ it now somehow. */
+ HReg vreg = rreg->vreg;
+ UInt v_idx = hregIndex(vreg);
+
+ if (! HRegUsage__contains(®_usage[ii], vreg)) {
+ /* Spill the vreg. It is not used by this instruction. */
+ spill_vreg(vreg, v_idx, ii, vreg_state, n_vregs,
+ rreg_state, n_rregs, instrs_out, con);
+ } else {
+ /* Find or make a free rreg where to move this vreg to. */
+ UInt r_free_idx = FIND_OR_MAKE_FREE_RREG(
+ ii, v_idx, vreg_state[v_idx].reg_class, True);
+
+ /* Generate "move" between real registers. */
+ HInstr* move = con->genMove(con->univ->regs[r_idx],
+ con->univ->regs[r_free_idx], con->mode64);
+ vassert(move != NULL);
+ emit_instr(move, instrs_out, con, "move");
+
+ /* Update the register allocator state. */
+ vassert(vreg_state[v_idx].disp == Assigned);
+ vreg_state[v_idx].rreg = con->univ->regs[r_free_idx];
+ rreg_state[r_free_idx].disp = Bound;
+ rreg_state[r_free_idx].vreg = vreg;
+ rreg->disp = Free;
+ rreg->vreg = INVALID_HREG;
+ }
+ }
+
+ /* Finally claim the rreg as reserved. */
+ rreg->disp = Reserved;
+
+ if (DEBUG_REGALLOC) {
+ vex_printf("rreg has been reserved: ");
+ con->ppReg(con->univ->regs[r_idx]);
+ vex_printf("\n\n");
+ }
+ }
+ }
+ }
+
+
+ /* --- Direct reload optimisation. --- */
+ /* If the instruction reads exactly one vreg which is currently spilled,
+ and this is the last use of that vreg, see if we can convert
+ the instruction into one that reads directly from the spill slot.
+ This is clearly only possible for x86 and amd64 targets, since ppc and
+ arm are load-store architectures. If successful, replace
+ instrs_in->arr[ii] with this new instruction, and recompute
+ its reg_usage, so that the change is invisible to the standard-case
+ handling that follows. */
+ if ((con->directReload != NULL) && (reg_usage[ii].n_vRegs <= 2)) {
+ Bool debug_direct_reload = False;
+ Bool nreads = 0;
+ HReg vreg_found = INVALID_HREG;
+ Short spill_offset = 0;
+
+ for (UInt j = 0; j < reg_usage[ii].n_vRegs; j++) {
+ HReg vreg = reg_usage[ii].vRegs[j];
+ vassert(hregIsVirtual(vreg));
+
+ if (reg_usage[ii].vMode[j] == HRmRead) {
+ nreads++;
+ UInt v_idx = hregIndex(vreg);
+ vassert(IS_VALID_VREGNO(v_idx));
+ if (vreg_state[v_idx].disp == Spilled) {
+ /* Is this its last use? */
+ vassert(vreg_state[v_idx].dead_before >= (Short) (ii + 1));
+ if ((vreg_state[v_idx].dead_before == (Short) (ii + 1))
+ && hregIsInvalid(vreg_found)) {
+ vreg_found = vreg;
+ spill_offset = vreg_state[v_idx].spill_offset;
+ }
+ }
+ }
+ }
+
+ if (!hregIsInvalid(vreg_found) && (nreads == 1)) {
+ if (reg_usage[ii].n_vRegs == 2) {
+ vassert(! sameHReg(reg_usage[ii].vRegs[0],
+ reg_usage[ii].vRegs[1]));
+ }
+
+ HInstr* reloaded = con->directReload(instrs_in->arr[ii],
+ vreg_found, spill_offset);
+ if (debug_direct_reload && (reloaded != NULL)) {
+ vex_printf("[%3d] ", spill_offset);
+ ppHReg(vreg_found);
+ vex_printf(": ");
+ con->ppInstr(instr, con->mode64);
+ }
+ if (reloaded != NULL) {
+ /* Update info about the instruction, so it looks as if it had
+ been in this form all along. */
+ instr = reloaded;
+ instrs_in->arr[ii] = reloaded;
+ con->getRegUsage(®_usage[ii], instr, con->mode64);
+ if (debug_direct_reload) {
+ vex_printf(" --> ");
+ con->ppInstr(reloaded, con->mode64);
+ }
+ }
+
+ if (debug_direct_reload && (reloaded != NULL)) {
+ vex_printf("\n");
+ }
+ }
+ }
+
+
+ /* The vreg -> rreg map constructed and then applied to each
+ instruction. */
+ HRegRemap remap;
+ initHRegRemap(&remap);
+
+ /* --- Allocate vregs used by the instruction. --- */
+ /* Vregs used by the instruction can be in the following states:
+ - Unallocated: vreg is entering its live range. Find a free rreg.
+ - Assigned: we do nothing; rreg has been allocated previously.
+ - Spilled: Find a free rreg and reload vreg into it.
+ Naturally, finding a free rreg may involve spilling a vreg not used by
+ the instruction. */
+ for (UInt j = 0; j < reg_usage[ii].n_vRegs; j++) {
+ HReg vreg = reg_usage[ii].vRegs[j];
+ vassert(hregIsVirtual(vreg));
+
+ if (0) {
+ vex_printf("considering "); con->ppReg(vreg); vex_printf("\n");
+ }
+
+ UInt v_idx = hregIndex(vreg);
+ vassert(IS_VALID_VREGNO(v_idx));
+ HReg rreg = vreg_state[v_idx].rreg;
+ if (vreg_state[v_idx].disp == Assigned) {
+ UInt r_idx = hregIndex(rreg);
+ vassert(rreg_state[r_idx].disp == Bound);
+ addToHRegRemap(&remap, vreg, rreg);
+ } else {
+ vassert(hregIsInvalid(rreg));
+
+ /* Find or make a free rreg of the correct class. */
+ UInt r_idx = FIND_OR_MAKE_FREE_RREG(
+ ii, v_idx, vreg_state[v_idx].reg_class, False);
+ rreg = con->univ->regs[r_idx];
+
+ /* Generate reload only if the vreg is spilled and is about to being
+ read or modified. If it is merely written than reloading it first
+ would be pointless. */
+ if ((vreg_state[v_idx].disp == Spilled)
+ && (reg_usage[ii].vMode[j] != HRmWrite)) {
+
+ HInstr* reload1 = NULL;
+ HInstr* reload2 = NULL;
+ con->genReload(&reload1, &reload2, rreg,
+ vreg_state[v_idx].spill_offset, con->mode64);
+ vassert(reload1 != NULL || reload2 != NULL);
+ if (reload1 != NULL) {
+ emit_instr(reload1, instrs_out, con, "reload1");
+ }
+ if (reload2 != NULL) {
+ emit_instr(reload2, instrs_out, con, "reload2");
+ }
+ }
+
+ rreg_state[r_idx].disp = Bound;
+ rreg_state[r_idx].vreg = vreg;
+ vreg_state[v_idx].disp = Assigned;
+ vreg_state[v_idx].rreg = rreg;
+ addToHRegRemap(&remap, vreg, rreg);
+ }
+ }
+
+ con->mapRegs(&remap, instr, con->mode64);
+ emit_instr(instr, instrs_out, con, NULL);
+
+ if (DEBUG_REGALLOC) {
+ vex_printf("After dealing with current instruction:\n");
+ print_state(con, vreg_state, n_vregs, rreg_state, n_rregs, ii);
+ vex_printf("\n");
+ }
+
+ /* ------ Post-instruction actions. ------ */
+ /* Free rregs which:
+ - Have been reserved and whose hard live range ended.
+ - Have been bound to vregs whose live range ended. */
+ for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
+ RRegState* rreg = &rreg_state[r_idx];
+ switch (rreg->disp) {
+ case Free:
+ break;
+ case Reserved:
+ if (rreg->lrs_used > 0) {
+ /* Consider "dead before" the next instruction. */
+ if (rreg->lrs[rreg->lr_current].dead_before <= (Short) ii + 1) {
+ rreg_state[r_idx].disp = Free;
+ rreg_state[r_idx].vreg = INVALID_HREG;
+ if (rreg->lr_current < rreg->lrs_used - 1) {
+ rreg->lr_current += 1;
+ }
+ }
+ }
+ break;
+ case Bound: {
+ UInt v_idx = hregIndex(rreg->vreg);
+ /* Consider "dead before" the next instruction. */
+ if (vreg_state[v_idx].dead_before <= (Short) ii + 1) {
+ vreg_state[v_idx].disp = Unallocated;
+ vreg_state[v_idx].rreg = INVALID_HREG;
+ rreg_state[r_idx].disp = Free;
+ rreg_state[r_idx].vreg = INVALID_HREG;
+ }
+ break;
+ }
+ default:
+ vassert(0);
+ }
+ }
+ }
+
+ return instrs_out;
+}
+
+/*----------------------------------------------------------------------------*/
+/*--- host_generic_reg_alloc3.c ---*/
+/*----------------------------------------------------------------------------*/
}
/* Generic printing for registers. */
-void ppHReg ( HReg r )
+UInt ppHReg ( HReg r )
{
if (hregIsInvalid(r)) {
- vex_printf("HReg_INVALID");
- return;
+ return vex_printf("HReg_INVALID");
}
const Bool isV = hregIsVirtual(r);
const HChar* maybe_v = isV ? "v" : "";
always zero for virtual registers, so that's pointless -- hence
show the index number instead. */
switch (hregClass(r)) {
- case HRcInt32: vex_printf("%%%sr%u", maybe_v, regNN); return;
- case HRcInt64: vex_printf("%%%sR%u", maybe_v, regNN); return;
- case HRcFlt32: vex_printf("%%%sF%u", maybe_v, regNN); return;
- case HRcFlt64: vex_printf("%%%sD%u", maybe_v, regNN); return;
- case HRcVec64: vex_printf("%%%sv%u", maybe_v, regNN); return;
- case HRcVec128: vex_printf("%%%sV%u", maybe_v, regNN); return;
+ case HRcInt32: return vex_printf("%%%sr%u", maybe_v, regNN);
+ case HRcInt64: return vex_printf("%%%sR%u", maybe_v, regNN);
+ case HRcFlt32: return vex_printf("%%%sF%u", maybe_v, regNN);
+ case HRcFlt64: return vex_printf("%%%sD%u", maybe_v, regNN);
+ case HRcVec64: return vex_printf("%%%sv%u", maybe_v, regNN);
+ case HRcVec128: return vex_printf("%%%sV%u", maybe_v, regNN);
default: vpanic("ppHReg");
}
}
for (UInt i = 0; i < N_RREGUNIVERSE_REGS; i++) {
univ->regs[i] = INVALID_HREG;
}
+
+ for (UInt i = 0; i <= HrcLAST; i++) {
+ univ->allocable_start[i] = N_RREGUNIVERSE_REGS;
+ univ->allocable_end[i] = N_RREGUNIVERSE_REGS;
+ }
}
void RRegUniverse__check_is_sane ( const RRegUniverse* univ )
HReg reg = univ->regs[i];
vassert(hregIsInvalid(reg));
}
+
+ /* Determine register classes used and if they form contiguous range. */
+ Bool regclass_used[HrcLAST + 1];
+ for (UInt i = 0; i <= HrcLAST; i++) {
+ regclass_used[i] = False;
+ }
+
+ for (UInt i = 0; i < univ->allocable; i++) {
+ HReg reg = univ->regs[i];
+ HRegClass regclass = hregClass(reg);
+ if (!regclass_used[regclass]) {
+ regclass_used[regclass] = True;
+ }
+ }
+
+ UInt regs_visited = 0;
+ for (UInt i = 0; i <= HrcLAST; i++) {
+ if (regclass_used[i]) {
+ for (UInt j = univ->allocable_start[i];
+ j <= univ->allocable_end[i]; j++) {
+ vassert(hregClass(univ->regs[j]) == i);
+ regs_visited += 1;
+ }
+ }
+ }
+
+ vassert(regs_visited == univ->allocable);
}
available on any specific host. For example on x86, the available
classes are: Int32, Flt64, Vec128 only.
- IMPORTANT NOTE: host_generic_reg_alloc2.c needs how much space is
+ IMPORTANT NOTE: host_generic_reg_alloc*.c needs to know how much space is
needed to spill each class of register. It allocates the following
amount of space:
HRcVec128 128 bits
If you add another regclass, you must remember to update
- host_generic_reg_alloc2.c accordingly.
+ host_generic_reg_alloc*.c and RRegUniverse accordingly.
When adding entries to enum HRegClass, do not use any value > 14 or < 1.
*/
HRcFlt32=5, /* 32-bit float */
HRcFlt64=6, /* 64-bit float */
HRcVec64=7, /* 64-bit SIMD */
- HRcVec128=8 /* 128-bit SIMD */
+ HRcVec128=8, /* 128-bit SIMD */
+ HrcLAST=HRcVec128
}
HRegClass;
extern void ppHRegClass ( HRegClass );
-/* Print an HReg in a generic (non-target-specific) way. */
-extern void ppHReg ( HReg );
+/* Print an HReg in a generic (non-target-specific) way.
+ Returns number of HChar's written. */
+extern UInt ppHReg ( HReg );
/* Construct. The goal here is that compiler can fold this down to a
constant in the case where the four arguments are constants, which
static inline HRegClass hregClass ( HReg r )
{
HRegClass rc = (HRegClass)((r.u32 >> 27) & 0xF);
- vassert(rc >= HRcInt32 && rc <= HRcVec128);
+ vassert(rc >= HRcInt32 && rc <= HrcLAST);
return rc;
}
index here, since this is the only place where we map index
numbers to actual registers. */
HReg regs[N_RREGUNIVERSE_REGS];
+
+ /* Ranges for groups of allocable registers. Used to quickly address only
+ a group of allocable registers belonging to the same register class.
+ Indexes into |allocable_{start,end}| are HRcClass entries, such as
+ HRcInt64. Values in |allocable_{start,end}| give a valid range into
+ |regs| where registers corresponding to the given register class are
+ found.
+
+ For example, let's say allocable_start[HRcInt64] == 10 and
+ allocable_end[HRcInt64] == 14. Then regs[10], regs[11], regs[12],
+ regs[13], and regs[14] give all registers of register class HRcInt64.
+
+ If a register class is not present, then values of the corresponding
+ |allocable_{start,end}| elements are equal to N_RREGUNIVERSE_REGS.
+
+ Naturally registers in |regs| must form contiguous groups. This is
+ checked by RRegUniverse__check_is_sane(). */
+ UInt allocable_start[HrcLAST + 1];
+ UInt allocable_end[HrcLAST + 1];
}
RRegUniverse;
/*---------------------------------------------------------*/
/* Note that such maps can only map virtual regs to real regs.
- addToHRegRenap will barf if given a pair not of that form. As a
+ addToHRegRemap will barf if given a pair not of that form. As a
result, no valid HRegRemap will bind a real reg to anything, and so
if lookupHRegMap is given a real reg, it returns it unchanged.
This is precisely the behaviour that the register allocator needs
/*--- Reg alloc: TODO: move somewhere else ---*/
/*---------------------------------------------------------*/
-extern
-HInstrSB* doRegisterAllocation (
-
- /* Incoming virtual-registerised code. */
- HInstrSB* sb_in,
-
- /* The real-register universe to use. This contains facts about
- real registers, one of which is the set of registers available
- for allocation. */
- const RRegUniverse* univ,
-
- /* Return True iff the given insn is a reg-reg move, in which
- case also return the src and dst regs. */
- Bool (*isMove) (const HInstr*, HReg*, HReg*),
-
- /* Get info about register usage in this insn. */
- void (*getRegUsage) (HRegUsage*, const HInstr*, Bool),
-
- /* Apply a reg-reg mapping to an insn. */
- void (*mapRegs) (HRegRemap*, HInstr*, Bool),
-
- /* Is this instruction actually HInstrIfThenElse? Returns pointer to
- HInstrIfThenElse if yes, NULL otherwise. */
- HInstrIfThenElse* (*isIfThenElse) (const HInstr*),
-
- /* Return insn(s) to spill/restore a real reg to a spill slot
- offset. And optionally a function to do direct reloads. */
- void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ),
- void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
- HInstr* (*directReload) ( HInstr*, HReg, Short ),
- Int guest_sizeB,
-
- /* For debug printing only. */
- void (*ppInstr) ( const HInstr*, Bool ),
- void (*ppCondCode)(HCondCode),
- void (*ppReg) ( HReg ),
+/* Control of the VEX register allocator. */
+typedef
+ struct {
+ /* The real-register universe to use. This contains facts about real
+ registers, one of which is the set of registers available for
+ allocation. */
+ const RRegUniverse* univ;
+
+ /* Return True iff the given insn is a reg-reg move, in which case also
+ return the src and dst regs. */
+ Bool (*isMove)(const HInstr*, HReg*, HReg*);
+
+ /* Get info about register usage in this insn. */
+ void (*getRegUsage)(HRegUsage*, const HInstr*, Bool);
+
+ /* Apply a reg-reg mapping to an insn. */
+ void (*mapRegs)(HRegRemap*, HInstr*, Bool);
+
+ /* Is this instruction actually HInstrIfThenElse? Returns pointer to
+ HInstrIfThenElse if yes, NULL otherwise. */
+ HInstrIfThenElse* (*isIfThenElse) (const HInstr*);
+
+ /* Return insn(s) to spill/restore a real register to a spill slot offset.
+ Also a function to move between registers.
+ And optionally a function to do direct reloads. */
+ void (*genSpill)(HInstr**, HInstr**, HReg, Int, Bool);
+ void (*genReload)(HInstr**, HInstr**, HReg, Int, Bool);
+ HInstr* (*genMove)(HReg from, HReg to, Bool);
+ HInstr* (*directReload)(HInstr*, HReg, Short);
+ UInt guest_sizeB;
+
+ /* For debug printing only. */
+ void (*ppInstr)(const HInstr*, Bool);
+ UInt (*ppReg)(HReg);
+
+ /* 32/64bit mode */
+ Bool mode64;
+ }
+ RegAllocControl;
- /* 32/64bit mode */
- Bool mode64
+extern HInstrSB* doRegisterAllocation(
+ HInstrSB* instrs_in,
+ const RegAllocControl* con
);
-
#endif /* ndef __VEX_HOST_GENERIC_REGS_H */
/*---------------------------------------------------------------*/
/* Add the registers. The initial segment of this array must be
those available for allocation by reg-alloc, and those that
follow are not available for allocation. */
+ ru->allocable_start[(mode64) ? HRcInt64 : HRcInt32] = ru->size;
ru->regs[ru->size++] = hregMIPS_GPR16(mode64);
ru->regs[ru->size++] = hregMIPS_GPR17(mode64);
ru->regs[ru->size++] = hregMIPS_GPR18(mode64);
ru->regs[ru->size++] = hregMIPS_GPR14(mode64);
ru->regs[ru->size++] = hregMIPS_GPR15(mode64);
ru->regs[ru->size++] = hregMIPS_GPR24(mode64);
+ ru->allocable_end[(mode64) ? HRcInt64 : HRcInt32] = ru->size - 1;
+
/* s7 (=guest_state) */
+ ru->allocable_start[(mode64) ? HRcFlt64 : HRcFlt32] = ru->size;
ru->regs[ru->size++] = hregMIPS_F16(mode64);
ru->regs[ru->size++] = hregMIPS_F18(mode64);
ru->regs[ru->size++] = hregMIPS_F20(mode64);
ru->regs[ru->size++] = hregMIPS_F26(mode64);
ru->regs[ru->size++] = hregMIPS_F28(mode64);
ru->regs[ru->size++] = hregMIPS_F30(mode64);
+ ru->allocable_end[(mode64) ? HRcFlt64 : HRcFlt32] = ru->size - 1;
+
if (!mode64) {
/* Fake double floating point */
+ ru->allocable_start[HRcFlt64] = ru->size;
ru->regs[ru->size++] = hregMIPS_D0(mode64);
ru->regs[ru->size++] = hregMIPS_D1(mode64);
ru->regs[ru->size++] = hregMIPS_D2(mode64);
ru->regs[ru->size++] = hregMIPS_D5(mode64);
ru->regs[ru->size++] = hregMIPS_D6(mode64);
ru->regs[ru->size++] = hregMIPS_D7(mode64);
+ ru->allocable_end[HRcFlt64] = ru->size - 1;
}
ru->allocable = ru->size;
}
-void ppHRegMIPS(HReg reg, Bool mode64)
+UInt ppHRegMIPS(HReg reg, Bool mode64)
{
Int r;
static const HChar *ireg32_names[35]
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
- return;
+ return ppHReg(reg);
}
/* But specific for real regs. */
case HRcInt32:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("%s", ireg32_names[r]);
- return;
+ return vex_printf("%s", ireg32_names[r]);
case HRcInt64:
r = hregEncoding (reg);
vassert (r >= 0 && r < 32);
- vex_printf ("%s", ireg32_names[r]);
- return;
+ return vex_printf ("%s", ireg32_names[r]);
case HRcFlt32:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("%s", freg32_names[r]);
- return;
+ return vex_printf("%s", freg32_names[r]);
case HRcFlt64:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("%s", freg64_names[r]);
- return;
+ return vex_printf("%s", freg64_names[r]);
default:
vpanic("ppHRegMIPS");
break;
}
-
- return;
}
}
}
+MIPSInstr* genMove_MIPS(HReg from, HReg to, Bool mode64)
+{
+ switch (hregClass(from)) {
+ case HRcInt32:
+ case HRcInt64:
+ return MIPSInstr_Alu(Malu_OR, to, from, MIPSRH_Reg(from));
+ default:
+ ppHRegClass(hregClass(from));
+ vpanic("genMove_MIPS: unimplemented regclass");
+ }
+}
+
/* --------- The mips assembler --------- */
inline static UInt iregNo(HReg r, Bool mode64)
# define MIPS_N_REGPARMS 8
#endif
-extern void ppHRegMIPS ( HReg, Bool );
+extern UInt ppHRegMIPS ( HReg, Bool );
/* --------- Condition codes, Intel encoding. --------- */
HReg rreg, Int offset, Bool);
extern void genReload_MIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2,
HReg rreg, Int offset, Bool);
+extern MIPSInstr* genMove_MIPS(HReg from, HReg to, Bool mode64);
extern const RRegUniverse* getRRegUniverse_MIPS ( Bool mode64 );
// GPR0 = scratch reg where poss. - some ops interpret as value zero
// GPR1 = stack pointer
// GPR2 = TOC pointer
+ ru->allocable_start[(mode64) ? HRcInt64 : HRcInt32] = ru->size;
ru->regs[ru->size++] = hregPPC_GPR3(mode64);
ru->regs[ru->size++] = hregPPC_GPR4(mode64);
ru->regs[ru->size++] = hregPPC_GPR5(mode64);
ru->regs[ru->size++] = hregPPC_GPR26(mode64);
ru->regs[ru->size++] = hregPPC_GPR27(mode64);
ru->regs[ru->size++] = hregPPC_GPR28(mode64);
+ ru->allocable_end[(mode64) ? HRcInt64 : HRcInt32] = ru->size - 1;
// GPR29 is reserved for the dispatcher
// GPR30 is reserved as AltiVec spill reg temporary
// GPR31 is reserved for the GuestStatePtr
the occasional extra spill instead. */
/* For both ppc32-linux and ppc64-linux, f14-f31 are callee save.
So use them. */
+ ru->allocable_start[HRcFlt64] = ru->size;
ru->regs[ru->size++] = hregPPC_FPR14(mode64);
ru->regs[ru->size++] = hregPPC_FPR15(mode64);
ru->regs[ru->size++] = hregPPC_FPR16(mode64);
ru->regs[ru->size++] = hregPPC_FPR19(mode64);
ru->regs[ru->size++] = hregPPC_FPR20(mode64);
ru->regs[ru->size++] = hregPPC_FPR21(mode64);
+ ru->allocable_end[HRcFlt64] = ru->size - 1;
/* Same deal re Altivec */
/* For both ppc32-linux and ppc64-linux, v20-v31 are callee save.
So use them. */
/* NB, vr29 is used as a scratch temporary -- do not allocate */
+ ru->allocable_start[HRcVec128] = ru->size;
ru->regs[ru->size++] = hregPPC_VR20(mode64);
ru->regs[ru->size++] = hregPPC_VR21(mode64);
ru->regs[ru->size++] = hregPPC_VR22(mode64);
ru->regs[ru->size++] = hregPPC_VR25(mode64);
ru->regs[ru->size++] = hregPPC_VR26(mode64);
ru->regs[ru->size++] = hregPPC_VR27(mode64);
+ ru->allocable_end[HRcVec128] = ru->size - 1;
ru->allocable = ru->size;
/* And other regs, not available to the allocator. */
}
-void ppHRegPPC ( HReg reg )
+UInt ppHRegPPC ( HReg reg )
{
Int r;
static const HChar* ireg32_names[32]
"%r28", "%r29", "%r30", "%r31" };
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
- return;
+ return ppHReg(reg);
}
/* But specific for real regs. */
switch (hregClass(reg)) {
case HRcInt64:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("%s", ireg32_names[r]);
- return;
+ return vex_printf("%s", ireg32_names[r]);
case HRcInt32:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("%s", ireg32_names[r]);
- return;
+ return vex_printf("%s", ireg32_names[r]);
case HRcFlt64:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("%%fr%d", r);
- return;
+ return vex_printf("%%fr%d", r);
case HRcVec128:
r = hregEncoding(reg);
vassert(r >= 0 && r < 32);
- vex_printf("%%v%d", r);
- return;
+ return vex_printf("%%v%d", r);
default:
vpanic("ppHRegPPC");
}
}
}
+PPCInstr* genMove_PPC(HReg from, HReg to, Bool mode64)
+{
+ switch (hregClass(from)) {
+ case HRcInt32:
+ case HRcInt64:
+ return PPCInstr_Alu(Palu_OR, to, from, PPCRH_Reg(from));
+ case HRcFlt64:
+ return PPCInstr_FpUnary(Pfp_MOV, to, from);
+ default:
+ ppHRegClass(hregClass(from));
+ vpanic("genMove_PPC: unimplemented regclass");
+ }
+}
+
/* --------- The ppc assembler (bleh.) --------- */
/* Num registers used for function calls */
#define PPC_N_REGPARMS 8
-extern void ppHRegPPC ( HReg );
+extern UInt ppHRegPPC ( HReg );
/* --------- Condition codes --------- */
HReg rreg, Int offsetB, Bool mode64 );
extern void genReload_PPC ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offsetB, Bool mode64 );
+extern PPCInstr* genMove_PPC(HReg from, HReg to, Bool mode64);
extern const RRegUniverse* getRRegUniverse_PPC ( Bool mode64 );
vex_printf("%s", s390_insn_as_string(insn));
}
-void
+UInt
ppHRegS390(HReg reg)
{
- vex_printf("%s", s390_hreg_as_string(reg));
+ return vex_printf("%s", s390_hreg_as_string(reg));
}
/*------------------------------------------------------------*/
FPR12 - FPR15 are also used as register pairs for 128-bit
floating point operations
*/
- UInt regno;
- for (regno = 1; regno <= 11; ++regno) {
+ ru->allocable_start[HRcInt64] = ru->size;
+ for (UInt regno = 1; regno <= 11; ++regno) {
gpr_index[regno] = ru->size;
ru->regs[ru->size++] = s390_hreg_gpr(regno);
}
- for (regno = 0; regno <= 15; ++regno) {
+ ru->allocable_end[HRcInt64] = ru->size - 1;
+
+ ru->allocable_start[HRcFlt64] = ru->size;
+ for (UInt regno = 0; regno <= 15; ++regno) {
fpr_index[regno] = ru->size;
ru->regs[ru->size++] = s390_hreg_fpr(regno);
}
+ ru->allocable_end[HRcFlt64] = ru->size - 1;
ru->allocable = ru->size;
/* Add the registers that are not available for allocation.
}
}
+s390_insn* genMove_S390(HReg from, HReg to, Bool mode64)
+{
+ switch (hregClass(from)) {
+ case HRcInt64:
+ return s390_insn_move(sizeofIRType(Ity_I64), to, from);
+ default:
+ ppHRegClass(hregClass(from));
+ vpanic("genMove_S390: unimplemented regclass");
+ }
+}
+
/* Helper function for s390_insn_get_reg_usage */
static void
s390_opnd_RMI_get_reg_usage(HRegUsage *u, s390_opnd_RMI op)
void ppS390AMode(const s390_amode *);
void ppS390Instr(const s390_insn *, Bool mode64);
-void ppHRegS390(HReg);
+UInt ppHRegS390(HReg);
/* Some functions that insulate the register allocator from details
of the underlying instruction set. */
const RRegUniverse *getRRegUniverse_S390( void );
void genSpill_S390 ( HInstr **, HInstr **, HReg , Int , Bool );
void genReload_S390 ( HInstr **, HInstr **, HReg , Int , Bool );
+extern s390_insn* genMove_S390(HReg from, HReg to, Bool mode64);
HInstrSB *iselSB_S390 ( const IRSB *, VexArch, const VexArchInfo *,
const VexAbiInfo *, Int, Int, Bool, Bool, Addr);
/* Add the registers. The initial segment of this array must be
those available for allocation by reg-alloc, and those that
follow are not available for allocation. */
+ ru->allocable_start[HRcInt32] = ru->size;
ru->regs[ru->size++] = hregX86_EAX();
ru->regs[ru->size++] = hregX86_EBX();
ru->regs[ru->size++] = hregX86_ECX();
ru->regs[ru->size++] = hregX86_EDX();
ru->regs[ru->size++] = hregX86_ESI();
ru->regs[ru->size++] = hregX86_EDI();
+ ru->allocable_end[HRcInt32] = ru->size - 1;
+
+ ru->allocable_start[HRcFlt64] = ru->size;
ru->regs[ru->size++] = hregX86_FAKE0();
ru->regs[ru->size++] = hregX86_FAKE1();
ru->regs[ru->size++] = hregX86_FAKE2();
ru->regs[ru->size++] = hregX86_FAKE3();
ru->regs[ru->size++] = hregX86_FAKE4();
ru->regs[ru->size++] = hregX86_FAKE5();
+ ru->allocable_end[HRcFlt64] = ru->size - 1;
+
+ ru->allocable_start[HRcVec128] = ru->size;
ru->regs[ru->size++] = hregX86_XMM0();
ru->regs[ru->size++] = hregX86_XMM1();
ru->regs[ru->size++] = hregX86_XMM2();
ru->regs[ru->size++] = hregX86_XMM5();
ru->regs[ru->size++] = hregX86_XMM6();
ru->regs[ru->size++] = hregX86_XMM7();
+ ru->allocable_end[HRcVec128] = ru->size - 1;
ru->allocable = ru->size;
+
/* And other regs, not available to the allocator. */
ru->regs[ru->size++] = hregX86_ESP();
ru->regs[ru->size++] = hregX86_EBP();
}
-void ppHRegX86 ( HReg reg )
+UInt ppHRegX86 ( HReg reg )
{
Int r;
static const HChar* ireg32_names[8]
= { "%eax", "%ecx", "%edx", "%ebx", "%esp", "%ebp", "%esi", "%edi" };
/* Be generic for all virtual regs. */
if (hregIsVirtual(reg)) {
- ppHReg(reg);
- return;
+ return ppHReg(reg);
}
/* But specific for real regs. */
switch (hregClass(reg)) {
case HRcInt32:
r = hregEncoding(reg);
vassert(r >= 0 && r < 8);
- vex_printf("%s", ireg32_names[r]);
- return;
+ return vex_printf("%s", ireg32_names[r]);
case HRcFlt64:
r = hregEncoding(reg);
vassert(r >= 0 && r < 6);
- vex_printf("%%fake%d", r);
- return;
+ return vex_printf("%%fake%d", r);
case HRcVec128:
r = hregEncoding(reg);
vassert(r >= 0 && r < 8);
- vex_printf("%%xmm%d", r);
- return;
+ return vex_printf("%%xmm%d", r);
default:
vpanic("ppHRegX86");
}
}
}
+X86Instr* genMove_X86(HReg from, HReg to, Bool mode64)
+{
+ switch (hregClass(from)) {
+ case HRcInt32:
+ return X86Instr_Alu32R(Xalu_MOV, X86RMI_Reg(from), to);
+ case HRcVec128:
+ return X86Instr_SseReRg(Xsse_MOV, from, to);
+ default:
+ ppHRegClass(hregClass(from));
+ vpanic("genMove_X86: unimplemented regclass");
+ }
+}
+
/* The given instruction reads the specified vreg exactly once, and
that vreg is currently located at the given spill offset. If
possible, return a variant of the instruction to one which instead
ST_IN HReg hregX86_EBP ( void ) { return mkHReg(False, HRcInt32, 5, 21); }
#undef ST_IN
-extern void ppHRegX86 ( HReg );
+extern UInt ppHRegX86 ( HReg );
/* --------- Condition codes, Intel encoding. --------- */
HReg rreg, Int offset, Bool );
extern void genReload_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offset, Bool );
-
+extern X86Instr* genMove_X86(HReg from, HReg to, Bool);
extern X86Instr* directReload_X86 ( X86Instr* i, HReg vreg, Short spill_off );
extern const RRegUniverse* getRRegUniverse_X86 ( void );
HInstrIfThenElse* (*isIfThenElse)(const HInstr*);
void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool );
void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool );
+ HInstr* (*genMove) ( HReg, HReg, Bool );
HInstr* (*directReload) ( HInstr*, HReg, Short );
void (*ppInstr) ( const HInstr*, Bool );
void (*ppCondCode) ( HCondCode );
- void (*ppReg) ( HReg );
+ UInt (*ppReg) ( HReg );
HInstrSB* (*iselSB) ( const IRSB*, VexArch, const VexArchInfo*,
const VexAbiInfo*, Int, Int, Bool, Bool,
Addr );
mapRegs = NULL;
genSpill = NULL;
genReload = NULL;
+ genMove = NULL;
directReload = NULL;
ppInstr = NULL;
ppCondCode = NULL;
isIfThenElse = CAST_TO_TYPEOF(isIfThenElse) X86FN(isIfThenElse_X86Instr);
genSpill = CAST_TO_TYPEOF(genSpill) X86FN(genSpill_X86);
genReload = CAST_TO_TYPEOF(genReload) X86FN(genReload_X86);
+ genMove = CAST_TO_TYPEOF(genMove) X86FN(genMove_X86);
directReload = CAST_TO_TYPEOF(directReload) X86FN(directReload_X86);
ppInstr = CAST_TO_TYPEOF(ppInstr) X86FN(ppX86Instr);
ppCondCode = CAST_TO_TYPEOF(ppCondCode) X86FN(ppX86CondCode);
mapRegs = CAST_TO_TYPEOF(mapRegs) AMD64FN(mapRegs_AMD64Instr);
genSpill = CAST_TO_TYPEOF(genSpill) AMD64FN(genSpill_AMD64);
genReload = CAST_TO_TYPEOF(genReload) AMD64FN(genReload_AMD64);
+ genMove = CAST_TO_TYPEOF(genMove) AMD64FN(genMove_AMD64);
directReload = CAST_TO_TYPEOF(directReload) AMD64FN(directReload_AMD64);
ppInstr = CAST_TO_TYPEOF(ppInstr) AMD64FN(ppAMD64Instr);
ppReg = CAST_TO_TYPEOF(ppReg) AMD64FN(ppHRegAMD64);
mapRegs = CAST_TO_TYPEOF(mapRegs) PPC32FN(mapRegs_PPCInstr);
genSpill = CAST_TO_TYPEOF(genSpill) PPC32FN(genSpill_PPC);
genReload = CAST_TO_TYPEOF(genReload) PPC32FN(genReload_PPC);
+ genMove = CAST_TO_TYPEOF(genMove) PPC32FN(genMove_PPC);
ppInstr = CAST_TO_TYPEOF(ppInstr) PPC32FN(ppPPCInstr);
ppReg = CAST_TO_TYPEOF(ppReg) PPC32FN(ppHRegPPC);
iselSB = PPC32FN(iselSB_PPC);
mapRegs = CAST_TO_TYPEOF(mapRegs) PPC64FN(mapRegs_PPCInstr);
genSpill = CAST_TO_TYPEOF(genSpill) PPC64FN(genSpill_PPC);
genReload = CAST_TO_TYPEOF(genReload) PPC64FN(genReload_PPC);
+ genMove = CAST_TO_TYPEOF(genMove) PPC64FN(genMove_PPC);
ppInstr = CAST_TO_TYPEOF(ppInstr) PPC64FN(ppPPCInstr);
ppReg = CAST_TO_TYPEOF(ppReg) PPC64FN(ppHRegPPC);
iselSB = PPC64FN(iselSB_PPC);
mapRegs = CAST_TO_TYPEOF(mapRegs) S390FN(mapRegs_S390Instr);
genSpill = CAST_TO_TYPEOF(genSpill) S390FN(genSpill_S390);
genReload = CAST_TO_TYPEOF(genReload) S390FN(genReload_S390);
+ genMove = CAST_TO_TYPEOF(genMove) S390FN(genMove_S390);
// fixs390: consider implementing directReload_S390
ppInstr = CAST_TO_TYPEOF(ppInstr) S390FN(ppS390Instr);
ppReg = CAST_TO_TYPEOF(ppReg) S390FN(ppHRegS390);
mapRegs = CAST_TO_TYPEOF(mapRegs) ARMFN(mapRegs_ARMInstr);
genSpill = CAST_TO_TYPEOF(genSpill) ARMFN(genSpill_ARM);
genReload = CAST_TO_TYPEOF(genReload) ARMFN(genReload_ARM);
+ genMove = CAST_TO_TYPEOF(genMove) ARMFN(genMove_ARM);
ppInstr = CAST_TO_TYPEOF(ppInstr) ARMFN(ppARMInstr);
ppReg = CAST_TO_TYPEOF(ppReg) ARMFN(ppHRegARM);
iselSB = ARMFN(iselSB_ARM);
mapRegs = CAST_TO_TYPEOF(mapRegs) ARM64FN(mapRegs_ARM64Instr);
genSpill = CAST_TO_TYPEOF(genSpill) ARM64FN(genSpill_ARM64);
genReload = CAST_TO_TYPEOF(genReload) ARM64FN(genReload_ARM64);
+ genMove = CAST_TO_TYPEOF(genMove) ARM64FN(genMove_ARM64);
ppInstr = CAST_TO_TYPEOF(ppInstr) ARM64FN(ppARM64Instr);
ppReg = CAST_TO_TYPEOF(ppReg) ARM64FN(ppHRegARM64);
iselSB = ARM64FN(iselSB_ARM64);
mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS32FN(mapRegs_MIPSInstr);
genSpill = CAST_TO_TYPEOF(genSpill) MIPS32FN(genSpill_MIPS);
genReload = CAST_TO_TYPEOF(genReload) MIPS32FN(genReload_MIPS);
+ genMove = CAST_TO_TYPEOF(genMove) MIPS32FN(genMove_MIPS);
ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS32FN(ppMIPSInstr);
ppReg = CAST_TO_TYPEOF(ppReg) MIPS32FN(ppHRegMIPS);
iselSB = MIPS32FN(iselSB_MIPS);
mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS64FN(mapRegs_MIPSInstr);
genSpill = CAST_TO_TYPEOF(genSpill) MIPS64FN(genSpill_MIPS);
genReload = CAST_TO_TYPEOF(genReload) MIPS64FN(genReload_MIPS);
+ genMove = CAST_TO_TYPEOF(genMove) MIPS64FN(genMove_MIPS);
ppInstr = CAST_TO_TYPEOF(ppInstr) MIPS64FN(ppMIPSInstr);
ppReg = CAST_TO_TYPEOF(ppReg) MIPS64FN(ppHRegMIPS);
iselSB = MIPS64FN(iselSB_MIPS);
}
/* Register allocate. */
- rcode = doRegisterAllocation ( vcode, rRegUniv,
- isMove, getRegUsage, mapRegs, isIfThenElse,
- genSpill, genReload, directReload,
- guest_sizeB,
- ppInstr, ppCondCode, ppReg, mode64 );
+ RegAllocControl con = {
+ .univ = rRegUniv, .isMove = isMove, .getRegUsage = getRegUsage,
+ .mapRegs = mapRegs, .isIfThenElse = isIfThenElse, .genSpill = genSpill,
+ .genReload = genReload, .genMove = genMove, .directReload = directReload,
+ .guest_sizeB = guest_sizeB, .ppInstr = ppInstr, .ppReg = ppReg,
+ .mode64 = mode64};
+
+ rcode = doRegisterAllocation(vcode, &con);
vexAllocSanityCheck();
}
}
+/* Vectorised memset, copied from Valgrind's m_libcbase.c. */
void vex_bzero ( void* sV, SizeT n )
{
- SizeT i;
- UChar* s = (UChar*)sV;
- /* No laughing, please. Just don't call this too often. Thank you
- for your attention. */
- for (i = 0; i < n; i++) s[i] = 0;
+# define IS_4_ALIGNED(aaa_p) (0 == (((HWord)(aaa_p)) & ((HWord)0x3)))
+
+ UChar* d = sV;
+
+ while ((!IS_4_ALIGNED(d)) && n >= 1) {
+ d[0] = 0;
+ d++;
+ n--;
+ }
+ if (n == 0)
+ return;
+ while (n >= 16) {
+ ((UInt*)d)[0] = 0;
+ ((UInt*)d)[1] = 0;
+ ((UInt*)d)[2] = 0;
+ ((UInt*)d)[3] = 0;
+ d += 16;
+ n -= 16;
+ }
+ while (n >= 4) {
+ ((UInt*)d)[0] = 0;
+ d += 4;
+ n -= 4;
+ }
+ while (n >= 1) {
+ d[0] = 0;
+ d++;
+ n--;
+ }
+ return;
+# undef IS_4_ALIGNED
}