Reg<->Reg MOV coalescing status is now a part of the HRegUsage.
This allows register allocation to query it two times without incurring
a performance penalty. This in turn allows to better keep track of
vreg<->vreg MOV coalescing so that all vregs in the coalesce chain
get the effective |dead_before| of the last vreg.
A small performance improvement has been observed because this allows
to coalesce even spilled vregs (previously only assigned ones).
addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src);
if (i->Ain.Alu64R.op == Aalu_MOV) {
addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst);
+
+ if (i->Ain.Alu64R.src->tag == Armi_Reg) {
+ u->isRegRegMove = True;
+ u->regMoveSrc = i->Ain.Alu64R.src->Armi.Reg.reg;
+ u->regMoveDst = i->Ain.Alu64R.dst;
+ }
return;
}
if (i->Ain.Alu64R.op == Aalu_CMP) {
addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV
? HRmWrite : HRmModify,
i->Ain.SseReRg.dst);
+
+ if (i->Ain.SseReRg.op == Asse_MOV) {
+ u->isRegRegMove = True;
+ u->regMoveSrc = i->Ain.SseReRg.src;
+ u->regMoveDst = i->Ain.SseReRg.dst;
+ }
}
return;
case Ain_SseCMov:
//uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV
//uu ? HRmWrite : HRmModify,
//uu i->Ain.AvxReRg.dst);
+ //uu
+ //uu if (i->Ain.AvxReRg.op == Asse_MOV) {
+ //uu u->isRegRegMove = True;
+ //uu u->regMoveSrc = i->Ain.AvxReRg.src;
+ //uu u->regMoveDst = i->Ain.AvxReRg.dst;
+ //uu }
//uu }
//uu return;
case Ain_EvCheck:
}
}
-/* Figure out if i represents a reg-reg move, and if so assign the
- source and destination to *src and *dst. If in doubt say No. Used
- by the register allocator to do move coalescing.
-*/
-Bool isMove_AMD64Instr ( const AMD64Instr* i, HReg* src, HReg* dst )
-{
- switch (i->tag) {
- case Ain_Alu64R:
- /* Moves between integer regs */
- if (i->Ain.Alu64R.op != Aalu_MOV)
- return False;
- if (i->Ain.Alu64R.src->tag != Armi_Reg)
- return False;
- *src = i->Ain.Alu64R.src->Armi.Reg.reg;
- *dst = i->Ain.Alu64R.dst;
- return True;
- case Ain_SseReRg:
- /* Moves between SSE regs */
- if (i->Ain.SseReRg.op != Asse_MOV)
- return False;
- *src = i->Ain.SseReRg.src;
- *dst = i->Ain.SseReRg.dst;
- return True;
- //uu case Ain_AvxReRg:
- //uu /* Moves between AVX regs */
- //uu if (i->Ain.AvxReRg.op != Asse_MOV)
- //uu return False;
- //uu *src = i->Ain.AvxReRg.src;
- //uu *dst = i->Ain.AvxReRg.dst;
- //uu return True;
- default:
- return False;
- }
- /*NOTREACHED*/
-}
-
-
/* Generate amd64 spill/reload instructions under the direction of the
register allocator. Note it's critical these don't write the
condition codes. */
of the underlying instruction set. */
extern void getRegUsage_AMD64Instr ( HRegUsage*, const AMD64Instr*, Bool );
extern void mapRegs_AMD64Instr ( HRegRemap*, AMD64Instr*, Bool );
-extern Bool isMove_AMD64Instr ( const AMD64Instr*, HReg*, HReg* );
extern Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
UChar* buf, Int nbuf,
const AMD64Instr* i,
case ARM64in_MovI:
addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst);
addHRegUse(u, HRmRead, i->ARM64in.MovI.src);
+ u->isRegRegMove = True;
+ u->regMoveSrc = i->ARM64in.MovI.src;
+ u->regMoveDst = i->ARM64in.MovI.dst;
return;
case ARM64in_Imm64:
addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst);
case ARM64in_VMov:
addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst);
addHRegUse(u, HRmRead, i->ARM64in.VMov.src);
+ u->isRegRegMove = True;
+ u->regMoveSrc = i->ARM64in.VMov.src;
+ u->regMoveDst = i->ARM64in.VMov.dst;
return;
case ARM64in_EvCheck:
/* We expect both amodes only to mention x21, so this is in
}
}
-/* Figure out if i represents a reg-reg move, and if so assign the
- source and destination to *src and *dst. If in doubt say No. Used
- by the register allocator to do move coalescing.
-*/
-Bool isMove_ARM64Instr ( const ARM64Instr* i, HReg* src, HReg* dst )
-{
- switch (i->tag) {
- case ARM64in_MovI:
- *src = i->ARM64in.MovI.src;
- *dst = i->ARM64in.MovI.dst;
- return True;
- case ARM64in_VMov:
- *src = i->ARM64in.VMov.src;
- *dst = i->ARM64in.VMov.dst;
- return True;
- default:
- break;
- }
-
- return False;
-}
-
-
/* Generate arm spill/reload instructions under the direction of the
register allocator. Note it's critical these don't write the
condition codes. */
of the underlying instruction set. */
extern void getRegUsage_ARM64Instr ( HRegUsage*, const ARM64Instr*, Bool );
extern void mapRegs_ARM64Instr ( HRegRemap*, ARM64Instr*, Bool );
-extern Bool isMove_ARM64Instr ( const ARM64Instr*, HReg*, HReg* );
extern Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
UChar* buf, Int nbuf, const ARM64Instr* i,
Bool mode64,
case ARMin_Mov:
addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
+
+ if (i->ARMin.Mov.src->tag == ARMri84_R) {
+ u->isRegRegMove = True;
+ u->regMoveSrc = i->ARMin.Mov.src->ARMri84.R.reg;
+ u->regMoveDst = i->ARMin.Mov.dst;
+ }
return;
case ARMin_Imm32:
addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
case ARMin_VUnaryD:
addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
+
+ if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
+ u->isRegRegMove = True;
+ u->regMoveSrc = i->ARMin.VUnaryD.src;
+ u->regMoveDst = i->ARMin.VUnaryD.dst;
+ }
return;
case ARMin_VUnaryS:
addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
+
+ if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
+ u->isRegRegMove = True;
+ u->regMoveSrc = i->ARMin.VUnaryS.src;
+ u->regMoveDst = i->ARMin.VUnaryS.dst;
+ }
return;
case ARMin_VCmpD:
addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
case ARMin_NUnary:
addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
+
+ if (i->ARMin.NUnary.op == ARMneon_COPY) {
+ u->isRegRegMove = True;
+ u->regMoveSrc = i->ARMin.NUnary.src;
+ u->regMoveDst = i->ARMin.NUnary.dst;
+ }
return;
case ARMin_NUnaryS:
addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
}
}
-/* Figure out if i represents a reg-reg move, and if so assign the
- source and destination to *src and *dst. If in doubt say No. Used
- by the register allocator to do move coalescing.
-*/
-Bool isMove_ARMInstr ( const ARMInstr* i, HReg* src, HReg* dst )
-{
- /* Moves between integer regs */
- switch (i->tag) {
- case ARMin_Mov:
- if (i->ARMin.Mov.src->tag == ARMri84_R) {
- *src = i->ARMin.Mov.src->ARMri84.R.reg;
- *dst = i->ARMin.Mov.dst;
- return True;
- }
- break;
- case ARMin_VUnaryD:
- if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
- *src = i->ARMin.VUnaryD.src;
- *dst = i->ARMin.VUnaryD.dst;
- return True;
- }
- break;
- case ARMin_VUnaryS:
- if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
- *src = i->ARMin.VUnaryS.src;
- *dst = i->ARMin.VUnaryS.dst;
- return True;
- }
- break;
- case ARMin_NUnary:
- if (i->ARMin.NUnary.op == ARMneon_COPY) {
- *src = i->ARMin.NUnary.src;
- *dst = i->ARMin.NUnary.dst;
- return True;
- }
- break;
- default:
- break;
- }
-
- return False;
-}
-
-
/* Generate arm spill/reload instructions under the direction of the
register allocator. Note it's critical these don't write the
condition codes. */
of the underlying instruction set. */
extern void getRegUsage_ARMInstr ( HRegUsage*, const ARMInstr*, Bool );
extern void mapRegs_ARMInstr ( HRegRemap*, ARMInstr*, Bool );
-extern Bool isMove_ARMInstr ( const ARMInstr*, HReg*, HReg* );
extern Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc,
UChar* buf, Int nbuf, const ARMInstr* i,
Bool mode64,
/* TODO 27 Oct 04:
- Better consistency checking from what isMove tells us.
-
We can possibly do V-V coalescing even when the src is spilled,
providing we can arrange for the dst to have the same spill slot.
for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
con->getRegUsage(®_usage_arr[ii], instrs_in->arr[ii], con->mode64);
+ reg_usage_arr[ii].isVregVregMove
+ = reg_usage_arr[ii].isRegRegMove
+ && hregIsVirtual(reg_usage_arr[ii].regMoveSrc)
+ && hregIsVirtual(reg_usage_arr[ii].regMoveDst);
if (0) {
vex_printf("\n%d stage1: ", ii);
/* If doing a reg-reg move between two vregs, and the src's live
range ends here and the dst's live range starts here, bind
the dst to the src's rreg, and that's all. */
- HReg vregS = INVALID_HREG;
- HReg vregD = INVALID_HREG;
- if ( con->isMove(instrs_in->arr[ii], &vregS, &vregD) ) {
- if (!hregIsVirtual(vregS)) goto cannot_coalesce;
- if (!hregIsVirtual(vregD)) goto cannot_coalesce;
- /* Check that *isMove is not telling us a bunch of lies ... */
+ if (reg_usage_arr[ii].isVregVregMove) {
+ HReg vregS = reg_usage_arr[ii].regMoveSrc;
+ HReg vregD = reg_usage_arr[ii].regMoveDst;
+ /* Check that |isVregVregMove| is not telling us a bunch of lies ... */
vassert(hregClass(vregS) == hregClass(vregD));
Int k = hregIndex(vregS);
Int m = hregIndex(vregD);
/* The "home" spill slot. The offset is relative to the beginning of
the guest state. */
UShort spill_offset;
+
+ /* This vreg (vregS) is coalesced to another vreg
+ if |coalescedTo| != INVALID_HREG.
+ Coalescing means that there is a MOV instruction which occurs in the
+ instruction stream right at vregS' dead_before
+ and vregD's live_after. */
+ HReg coalescedTo; /* Which vreg it is coalesced to. */
+ HReg coalescedFirst; /* First vreg in the coalescing chain. */
+
+ /* If this vregS is coalesced to another vregD, what is the combined
+ dead_before for vregS+vregD. Used to effectively allocate registers. */
+ Short effective_dead_before;
}
VRegState;
const RRegLRState* rreg_lr_state,
UShort current_ii)
{
+# define RIGHT_JUSTIFY(_total, _written) \
+ do { \
+ for (Int w = (_total) - (_written); w > 0; w--) { \
+ vex_printf(" "); \
+ } \
+ } while (0)
+
for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
const VRegState* vreg = &vreg_state[v_idx];
if (vreg->live_after == INVALID_INSTRNO) {
continue; /* This is a dead vreg. Never comes into live. */
}
- vex_printf("vreg_state[%3u] \t", v_idx);
+ vex_printf("vreg_state[%3u] ", v_idx);
UInt written;
switch (vreg->disp) {
default:
vassert(0);
}
+ RIGHT_JUSTIFY(25, written);
- for (Int w = 30 - written; w > 0; w--) {
- vex_printf(" ");
- }
+ written = vex_printf("lr: [%d, %d) ",
+ vreg->live_after, vreg->dead_before);
+ RIGHT_JUSTIFY(15, written);
+
+ written = vex_printf("effective lr: [%d, %d)",
+ vreg->live_after, vreg->effective_dead_before);
+ RIGHT_JUSTIFY(25, written);
if (vreg->live_after > (Short) current_ii) {
vex_printf("[not live yet]\n");
} else if ((Short) current_ii >= vreg->dead_before) {
- vex_printf("[now dead]\n");
+ if (hregIsInvalid(vreg->coalescedTo)) {
+ vex_printf("[now dead]\n");
+ } else {
+ vex_printf("[now dead, coalesced to ");
+ con->ppReg(vreg->coalescedTo);
+ vex_printf("]\n");
+ }
} else {
vex_printf("[live]\n");
}
const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx];
vex_printf("rreg_state[%2u] = ", r_idx);
UInt written = con->ppReg(con->univ->regs[r_idx]);
- for (Int w = 10 - written; w > 0; w--) {
- vex_printf(" ");
- }
+ RIGHT_JUSTIFY(10, written);
switch (rreg->disp) {
case Free:
break;
}
}
+
+# undef RIGHT_JUSTIFY
}
static inline void emit_instr(HInstr* instr, HInstrArray* instrs_out,
a callee-save register because it won't be used for parameter passing
around helper function calls. */
static Bool find_free_rreg(
- VRegState* vreg_state, UInt n_vregs,
- RRegState* rreg_state, UInt n_rregs,
+ const VRegState* vreg_state, UInt n_vregs,
+ const RRegState* rreg_state, UInt n_rregs,
const RRegLRState* rreg_lr_state,
UInt current_ii, HRegClass target_hregclass,
Bool reserve_phase, const RegAllocControl* con, UInt* r_idx_found)
HRegUsage* reg_usage
= LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used);
+ /* Mark vreg indexes where coalesce chains start at. */
+ UInt* coalesce_heads = LibVEX_Alloc_inline(n_vregs * sizeof(UInt));
+ UInt nr_coalesce_heads = 0;
+
/* The live range numbers are signed shorts, and so limiting the
number of instructions to 15000 comfortably guards against them
overflowing 32k. */
instruction and makes free the corresponding rreg. */
# define FIND_OR_MAKE_FREE_RREG(_ii, _v_idx, _reg_class, _reserve_phase) \
({ \
- UInt _r_free_idx = -1; \
+ UInt _r_free_idx; \
Bool free_rreg_found = find_free_rreg( \
- vreg_state, n_vregs, rreg_state, n_rregs, rreg_lr_state, \
+ vreg_state, n_vregs, rreg_state, n_rregs, rreg_lr_state, \
(_ii), (_reg_class), (_reserve_phase), con, &_r_free_idx); \
if (!free_rreg_found) { \
HReg vreg_to_spill = find_vreg_to_spill( \
/* --- Stage 0. Initialize the state. --- */
for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
- vreg_state[v_idx].live_after = INVALID_INSTRNO;
- vreg_state[v_idx].dead_before = INVALID_INSTRNO;
- vreg_state[v_idx].reg_class = HRcINVALID;
- vreg_state[v_idx].disp = Unallocated;
- vreg_state[v_idx].rreg = INVALID_HREG;
- vreg_state[v_idx].spill_offset = 0;
+ vreg_state[v_idx].live_after = INVALID_INSTRNO;
+ vreg_state[v_idx].dead_before = INVALID_INSTRNO;
+ vreg_state[v_idx].reg_class = HRcINVALID;
+ vreg_state[v_idx].disp = Unallocated;
+ vreg_state[v_idx].rreg = INVALID_HREG;
+ vreg_state[v_idx].spill_offset = 0;
+ vreg_state[v_idx].coalescedTo = INVALID_HREG;
+ vreg_state[v_idx].coalescedFirst = INVALID_HREG;
+ vreg_state[v_idx].effective_dead_before = INVALID_INSTRNO;
}
for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
const HInstr* instr = instrs_in->arr[ii];
con->getRegUsage(®_usage[ii], instr, con->mode64);
+ reg_usage[ii].isVregVregMove
+ = reg_usage[ii].isRegRegMove
+ && hregIsVirtual(reg_usage[ii].regMoveSrc)
+ && hregIsVirtual(reg_usage[ii].regMoveDst);
if (0) {
vex_printf("\n%u stage 1: ", ii);
if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
OFFENDING_VREG(v_idx, instr, "Read");
}
- vreg_state[v_idx].dead_before = toShort(ii + 1);
break;
case HRmWrite:
if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
vreg_state[v_idx].live_after = toShort(ii);
}
- vreg_state[v_idx].dead_before = toShort(ii + 1);
break;
case HRmModify:
if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
OFFENDING_VREG(v_idx, instr, "Modify");
}
- vreg_state[v_idx].dead_before = toShort(ii + 1);
break;
default:
vassert(0);
}
+
+ vreg_state[v_idx].dead_before = toShort(ii + 1);
+ vreg_state[v_idx].effective_dead_before
+ = vreg_state[v_idx].dead_before;
}
/* Process real registers mentioned in the instruction. */
}
}
- /* --- Stage 2. Allocate spill slots. --- */
+
+ /* --- Stage 2. MOV coalescing (preparation). --- */
+ /* Optimise register coalescing:
+ MOV v <-> v coalescing (done here).
+ MOV v <-> r coalescing (TODO: not yet, not here). */
+ /* If doing a reg-reg move between two vregs, and the src's live range ends
+ here and the dst's live range starts here, coalesce the src vreg
+ to the dst vreg. */
+ Bool coalesce_happened = False;
+ for (UShort ii = 0; ii < instrs_in->arr_used; ii++) {
+ if (reg_usage[ii].isVregVregMove) {
+ HReg vregS = reg_usage[ii].regMoveSrc;
+ HReg vregD = reg_usage[ii].regMoveDst;
+
+ /* Check that |isVregVregMove| is not telling us a bunch of lies ... */
+ vassert(hregClass(vregS) == hregClass(vregD));
+ UInt vs_idx = hregIndex(vregS);
+ UInt vd_idx = hregIndex(vregD);
+ vassert(IS_VALID_VREGNO(vs_idx));
+ vassert(IS_VALID_VREGNO(vd_idx));
+ vassert(! sameHReg(vregS, vregD));
+ VRegState* vs_st = &vreg_state[vs_idx];
+ VRegState* vd_st = &vreg_state[vd_idx];
+
+ if ((vs_st->dead_before == ii + 1) && (vd_st->live_after == ii)) {
+ /* Live ranges are adjacent. */
+
+ vs_st->coalescedTo = vregD;
+ if (hregIsInvalid(vs_st->coalescedFirst)) {
+ vd_st->coalescedFirst = vregS;
+ coalesce_heads[nr_coalesce_heads] = vs_idx;
+ nr_coalesce_heads += 1;
+ } else {
+ vd_st->coalescedFirst = vs_st->coalescedFirst;
+ }
+
+ vreg_state[hregIndex(vd_st->coalescedFirst)].effective_dead_before
+ = vd_st->dead_before;
+
+ if (DEBUG_REGALLOC) {
+ vex_printf("vreg coalescing: ");
+ con->ppReg(vregS);
+ vex_printf(" -> ");
+ con->ppReg(vregD);
+ vex_printf("\n");
+ }
+
+ coalesce_happened = True;
+ }
+ }
+ }
+
+ /* --- Stage 3. Allocate spill slots. --- */
/* Each spill slot is 8 bytes long. For vregs which take more than 64 bits
to spill (for example classes Flt64 and Vec128), we have to allocate two
vassert(vreg_state[v_idx].reg_class == HRcINVALID);
continue;
}
+ if (! hregIsInvalid(vreg_state[v_idx].coalescedFirst)) {
+ /* Coalesced vregs should share the same spill slot with the first vreg
+ in the coalescing chain. But we don't have that information, yet. */
+ continue;
+ }
/* The spill slots are 64 bits in size. As per the comment on definition
of HRegClass in host_generic_regs.h, that means, to spill a vreg of
if (ss_no >= N_SPILL64S - 1) {
vpanic("N_SPILL64S is too low in VEX. Increase and recompile.");
}
- ss_busy_until_before[ss_no + 0] = vreg_state[v_idx].dead_before;
- ss_busy_until_before[ss_no + 1] = vreg_state[v_idx].dead_before;
+ ss_busy_until_before[ss_no + 0]
+ = vreg_state[v_idx].effective_dead_before;
+ ss_busy_until_before[ss_no + 1]
+ = vreg_state[v_idx].effective_dead_before;
break;
default:
/* The ordinary case -- just find a single lowest-numbered spill
if (ss_no == N_SPILL64S) {
vpanic("N_SPILL64S is too low in VEX. Increase and recompile.");
}
- ss_busy_until_before[ss_no] = vreg_state[v_idx].dead_before;
+ ss_busy_until_before[ss_no]
+ = vreg_state[v_idx].effective_dead_before;
break;
}
}
}
+ /* Fill in the spill offsets and effective_dead_before for coalesced vregs.*/
+ for (UInt i = 0; i < nr_coalesce_heads; i++) {
+ UInt vs_idx = coalesce_heads[i];
+ Short effective_dead_before = vreg_state[vs_idx].effective_dead_before;
+ UShort spill_offset = vreg_state[vs_idx].spill_offset;
+ HReg vregD = vreg_state[vs_idx].coalescedTo;
+ while (! hregIsInvalid(vregD)) {
+ UInt vd_idx = hregIndex(vregD);
+ vreg_state[vd_idx].effective_dead_before = effective_dead_before;
+ vreg_state[vd_idx].spill_offset = spill_offset;
+ vregD = vreg_state[vd_idx].coalescedTo;
+ }
+ }
+
+ if (DEBUG_REGALLOC && coalesce_happened) {
+ UInt ii = 0;
+ vex_printf("After vreg<->vreg MOV coalescing:\n");
+ PRINT_STATE;
+ }
+
if (0) {
vex_printf("\n\n");
- for (UInt v_idx = 0; v_idx < n_vregs; v_idx++)
- vex_printf("vreg %3u --> spill offset %u\n",
- v_idx, vreg_state[v_idx].spill_offset);
+ for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
+ if (vreg_state[v_idx].live_after != INVALID_INSTRNO) {
+ vex_printf("vreg %3u --> spill offset %u\n",
+ v_idx, vreg_state[v_idx].spill_offset);
+ }
+ }
}
- /* --- State 3. Process instructions. --- */
+ /* --- State 4. Process instructions. --- */
for (UShort ii = 0; ii < instrs_in->arr_used; ii++) {
HInstr* instr = instrs_in->arr[ii];
vassert((Short) ii < rreg_lrs->lr_current->dead_before);
}
}
+
+ /* Sanity check: if vregS has been marked as coalesced to vregD,
+ then the effective live range of vregS must also cover live range
+ of vregD. */
+ /* The following sanity check is quite expensive. Some basic blocks
+ contain very lengthy coalescing chains... */
+ if (SANITY_CHECKS_EVERY_INSTR) {
+ for (UInt vs_idx = 0; vs_idx < n_vregs; vs_idx++) {
+ const VRegState* vS_st = &vreg_state[vs_idx];
+ HReg vregD = vS_st->coalescedTo;
+ while (! hregIsInvalid(vregD)) {
+ const VRegState* vD_st = &vreg_state[hregIndex(vregD)];
+ vassert(vS_st->live_after <= vD_st->live_after);
+ vassert(vS_st->effective_dead_before >= vD_st->dead_before);
+ vregD = vD_st->coalescedTo;
+ }
+ }
+ }
}
- /* --- MOV coalescing --- */
+ /* --- MOV coalescing (finishing) --- */
/* Optimise register coalescing:
- MOV v <-> v coalescing (done here).
+ MOV v <-> v coalescing (finished here).
MOV v <-> r coalescing (TODO: not yet). */
- /* If doing a reg-reg move between two vregs, and the src's live
- range ends here and the dst's live range starts here, bind the dst
- to the src's rreg, and that's all. */
- HReg vregS = INVALID_HREG;
- HReg vregD = INVALID_HREG;
- if (con->isMove(instr, &vregS, &vregD)) {
- if (hregIsVirtual(vregS) && hregIsVirtual(vregD)) {
- /* Check that |isMove| is not telling us a bunch of lies ... */
- vassert(hregClass(vregS) == hregClass(vregD));
- UInt vs_idx = hregIndex(vregS);
- UInt vd_idx = hregIndex(vregD);
- vassert(IS_VALID_VREGNO(vs_idx));
- vassert(IS_VALID_VREGNO(vd_idx));
-
- if ((vreg_state[vs_idx].dead_before == ii + 1)
- && (vreg_state[vd_idx].live_after == ii)
- && (vreg_state[vs_idx].disp == Assigned)) {
-
- /* Live ranges are adjacent and source vreg is bound.
- Finally we can do the coalescing. */
- HReg rreg = vreg_state[vs_idx].rreg;
- vreg_state[vd_idx].disp = Assigned;
+ if (reg_usage[ii].isVregVregMove) {
+ HReg vregS = reg_usage[ii].regMoveSrc;
+ HReg vregD = reg_usage[ii].regMoveDst;
+ UInt vs_idx = hregIndex(vregS);
+ UInt vd_idx = hregIndex(vregD);
+
+ if (sameHReg(vreg_state[vs_idx].coalescedTo, vregD)) {
+ /* Finally do the coalescing. */
+
+ HReg rreg = vreg_state[vs_idx].rreg;
+ switch (vreg_state[vs_idx].disp) {
+ case Assigned:
vreg_state[vd_idx].rreg = rreg;
- FREE_VREG(&vreg_state[vs_idx]);
-
UInt r_idx = hregIndex(rreg);
vassert(rreg_state[r_idx].disp == Bound);
- rreg_state[r_idx].vreg = vregD;
- rreg_state[r_idx].eq_spill_slot = False;
+ rreg_state[r_idx].vreg = vregD;
+ break;
+ case Spilled:
+ vassert(hregIsInvalid(vreg_state[vs_idx].rreg));
+ break;
+ default:
+ vassert(0);
+ }
- if (DEBUG_REGALLOC) {
- vex_printf("coalesced: ");
- con->ppReg(vregS);
- vex_printf(" -> ");
- con->ppReg(vregD);
- vex_printf("\n\n");
- }
+ vreg_state[vd_idx].disp = vreg_state[vs_idx].disp;
+ FREE_VREG(&vreg_state[vs_idx]);
+
+ if (DEBUG_REGALLOC) {
+ vex_printf("coalesced: ");
+ con->ppReg(vregS);
+ vex_printf(" -> ");
+ con->ppReg(vregD);
+ vex_printf("\n\n");
+ }
- /* In rare cases it can happen that vregD's live range ends
- here. Check and eventually free the vreg and rreg.
- This effectively means that either the translated program
- contained dead code (but VEX iropt passes are pretty good
- at eliminating it) or the VEX backend generated dead code. */
- if (vreg_state[vd_idx].dead_before <= (Short) ii + 1) {
- FREE_VREG(&vreg_state[vd_idx]);
+ /* In rare cases it can happen that vregD's live range ends here.
+ Check and eventually free the vreg and rreg.
+ This effectively means that either the translated program
+ contained dead code (but VEX iropt passes are pretty good
+ at eliminating it) or the VEX backend generated dead code. */
+ if (vreg_state[vd_idx].dead_before <= (Short) ii + 1) {
+ if (vreg_state[vd_idx].disp == Assigned) {
+ UInt r_idx = hregIndex(rreg);
FREE_RREG(&rreg_state[r_idx]);
}
-
- /* Move on to the next instruction. We skip the post-instruction
- stuff because all required house-keeping was done here. */
- continue;
+ FREE_VREG(&vreg_state[vd_idx]);
}
+
+ /* Move on to the next instruction. We skip the post-instruction
+ stuff because all required house-keeping was done here. */
+ continue;
}
}
ppHReg(tab->vRegs[i]);
vex_printf("\n");
}
+ if (tab->isRegRegMove) {
+ vex_printf(" (is a reg-reg move)\n");
+ }
vex_printf("}\n");
}
HReg vRegs[N_HREGUSAGE_VREGS];
HRegMode vMode[N_HREGUSAGE_VREGS];
UInt n_vRegs;
+
+ /* Hint to the register allocator: this instruction is actually a move
+ between two registers: regMoveSrc -> regMoveDst. */
+ Bool isRegRegMove;
+ HReg regMoveSrc;
+ HReg regMoveDst;
+
+ /* Used internally by the register allocator. The reg-reg move is
+ actually a vreg-vreg move. */
+ Bool isVregVregMove;
}
HRegUsage;
static inline void initHRegUsage ( HRegUsage* tab )
{
- tab->rRead = 0;
- tab->rWritten = 0;
- tab->n_vRegs = 0;
+ tab->rRead = 0;
+ tab->rWritten = 0;
+ tab->n_vRegs = 0;
+ tab->isRegRegMove = False;
}
/* Add a register to a usage table. Combine incoming read uses with
allocation. */
const RRegUniverse* univ;
- /* Return True iff the given insn is a reg-reg move, in which case also
- return the src and dst regs. */
- Bool (*isMove)(const HInstr*, HReg*, HReg*);
-
/* Get info about register usage in this insn. */
void (*getRegUsage)(HRegUsage*, const HInstr*, Bool);
addHRegUse(u, HRmRead, i->Min.Alu.srcL);
addRegUsage_MIPSRH(u, i->Min.Alu.srcR);
addHRegUse(u, HRmWrite, i->Min.Alu.dst);
+
+ /* or Rd,Rs,Rs == mr Rd,Rs */
+ if ((i->Min.Alu.op == Malu_OR)
+ && (i->Min.Alu.srcR->tag == Mrh_Reg)
+ && sameHReg(i->Min.Alu.srcR->Mrh.Reg.reg, i->Min.Alu.srcL)) {
+ u->isRegRegMove = True;
+ u->regMoveSrc = i->Min.Alu.srcL;
+ u->regMoveDst = i->Min.Alu.dst;
+ }
return;
case Min_Shft:
addHRegUse(u, HRmRead, i->Min.Shft.srcL);
}
-/* Figure out if i represents a reg-reg move, and if so assign the
- source and destination to *src and *dst. If in doubt say No. Used
- by the register allocator to do move coalescing.
-*/
-Bool isMove_MIPSInstr(const MIPSInstr * i, HReg * src, HReg * dst)
-{
- /* Moves between integer regs */
- if (i->tag == Min_Alu) {
- /* or Rd,Rs,Rs == mr Rd,Rs */
- if (i->Min.Alu.op != Malu_OR)
- return False;
- if (i->Min.Alu.srcR->tag != Mrh_Reg)
- return False;
- if (!sameHReg(i->Min.Alu.srcR->Mrh.Reg.reg, i->Min.Alu.srcL))
- return False;
- *src = i->Min.Alu.srcL;
- *dst = i->Min.Alu.dst;
- return True;
- }
- return False;
-}
-
/* Generate mips spill/reload instructions under the direction of the
register allocator. */
void genSpill_MIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2, HReg rreg,
of the underlying instruction set. */
extern void getRegUsage_MIPSInstr (HRegUsage *, const MIPSInstr *, Bool);
extern void mapRegs_MIPSInstr (HRegRemap *, MIPSInstr *, Bool mode64);
-extern Bool isMove_MIPSInstr (const MIPSInstr *, HReg *, HReg *);
extern Int emit_MIPSInstr (/*MB_MOD*/Bool* is_profInc,
UChar* buf, Int nbuf, const MIPSInstr* i,
Bool mode64,
addHRegUse(u, HRmRead, i->Pin.Alu.srcL);
addRegUsage_PPCRH(u, i->Pin.Alu.srcR);
addHRegUse(u, HRmWrite, i->Pin.Alu.dst);
+
+ // or Rd,Rs,Rs == mr Rd,Rs
+ if ((i->Pin.Alu.op == Palu_OR)
+ && (i->Pin.Alu.srcR->tag == Prh_Reg)
+ && sameHReg(i->Pin.Alu.srcR->Prh.Reg.reg, i->Pin.Alu.srcL)) {
+ u->isRegRegMove = True;
+ u->regMoveSrc = i->Pin.Alu.srcL;
+ u->regMoveDst = i->Pin.Alu.dst;
+ }
return;
case Pin_Shft:
addHRegUse(u, HRmRead, i->Pin.Shft.srcL);
case Pin_FpUnary:
addHRegUse(u, HRmWrite, i->Pin.FpUnary.dst);
addHRegUse(u, HRmRead, i->Pin.FpUnary.src);
+
+ if (i->Pin.FpUnary.op == Pfp_MOV) {
+ u->isRegRegMove = True;
+ u->regMoveSrc = i->Pin.FpUnary.src;
+ u->regMoveDst = i->Pin.FpUnary.dst;
+ }
return;
case Pin_FpBinary:
addHRegUse(u, HRmWrite, i->Pin.FpBinary.dst);
}
}
-/* Figure out if i represents a reg-reg move, and if so assign the
- source and destination to *src and *dst. If in doubt say No. Used
- by the register allocator to do move coalescing.
-*/
-Bool isMove_PPCInstr ( const PPCInstr* i, HReg* src, HReg* dst )
-{
- /* Moves between integer regs */
- if (i->tag == Pin_Alu) {
- // or Rd,Rs,Rs == mr Rd,Rs
- if (i->Pin.Alu.op != Palu_OR)
- return False;
- if (i->Pin.Alu.srcR->tag != Prh_Reg)
- return False;
- if (! sameHReg(i->Pin.Alu.srcR->Prh.Reg.reg, i->Pin.Alu.srcL))
- return False;
- *src = i->Pin.Alu.srcL;
- *dst = i->Pin.Alu.dst;
- return True;
- }
- /* Moves between FP regs */
- if (i->tag == Pin_FpUnary) {
- if (i->Pin.FpUnary.op != Pfp_MOV)
- return False;
- *src = i->Pin.FpUnary.src;
- *dst = i->Pin.FpUnary.dst;
- return True;
- }
- return False;
-}
-
-
/* Generate ppc spill/reload instructions under the direction of the
register allocator. Note it's critical these don't write the
condition codes. */
of the underlying instruction set. */
extern void getRegUsage_PPCInstr ( HRegUsage*, const PPCInstr*, Bool mode64 );
extern void mapRegs_PPCInstr ( HRegRemap*, PPCInstr* , Bool mode64);
-extern Bool isMove_PPCInstr ( const PPCInstr*, HReg*, HReg* );
extern Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
UChar* buf, Int nbuf, const PPCInstr* i,
Bool mode64,
/*--- Forward declarations ---*/
/*------------------------------------------------------------*/
-static Bool s390_insn_is_reg_reg_move(const s390_insn *, HReg *src, HReg *dst);
static void s390_insn_map_regs(HRegRemap *, s390_insn *);
static void s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *);
static UInt s390_tchain_load64_len(void);
}
-/* Figure out if the given insn represents a reg-reg move, and if so
- assign the source and destination to *src and *dst. If in doubt say No.
- Used by the register allocator to do move coalescing. */
-Bool
-isMove_S390Instr(const s390_insn *insn, HReg *src, HReg *dst)
-{
- return s390_insn_is_reg_reg_move(insn, src, dst);
-}
-
-
/* Generate s390 spill/reload instructions under the direction of the
register allocator. Note it's critical these don't write the
condition codes. This is like an Ist_Put */
case S390_INSN_MOVE:
addHRegUse(u, HRmRead, insn->variant.move.src);
addHRegUse(u, HRmWrite, insn->variant.move.dst);
+
+ if (hregClass(insn->variant.move.src) == hregClass(insn->variant.move.dst)) {
+ u->isRegRegMove = True;
+ u->regMoveSrc = insn->variant.move.src;
+ u->regMoveDst = insn->variant.move.dst;
+ }
break;
case S390_INSN_MEMCPY:
}
-/* Return True, if INSN is a move between two registers of the same class.
- In that case assign the source and destination registers to SRC and DST,
- respectively. */
-static Bool
-s390_insn_is_reg_reg_move(const s390_insn *insn, HReg *src, HReg *dst)
-{
- if (insn->tag == S390_INSN_MOVE &&
- hregClass(insn->variant.move.src) == hregClass(insn->variant.move.dst)) {
- *src = insn->variant.move.src;
- *dst = insn->variant.move.dst;
- return True;
- }
-
- return False;
-}
-
-
/*------------------------------------------------------------*/
/*--- Functions to emit a sequence of bytes ---*/
/*------------------------------------------------------------*/
of the underlying instruction set. */
void getRegUsage_S390Instr( HRegUsage *, const s390_insn *, Bool );
void mapRegs_S390Instr ( HRegRemap *, s390_insn *, Bool );
-Bool isMove_S390Instr ( const s390_insn *, HReg *, HReg * );
Int emit_S390Instr ( Bool *, UChar *, Int, const s390_insn *, Bool,
VexEndness, const void *, const void *,
const void *, const void *);
addRegUsage_X86RMI(u, i->Xin.Alu32R.src);
if (i->Xin.Alu32R.op == Xalu_MOV) {
addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst);
+
+ if (i->Xin.Alu32R.src->tag == Xrmi_Reg) {
+ u->isRegRegMove = True;
+ u->regMoveSrc = i->Xin.Alu32R.src->Xrmi.Reg.reg;
+ u->regMoveDst = i->Xin.Alu32R.dst;
+ }
return;
}
if (i->Xin.Alu32R.op == Xalu_CMP) {
case Xin_FpUnary:
addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
+
+ if (i->Xin.FpUnary.op == Xfp_MOV) {
+ u->isRegRegMove = True;
+ u->regMoveSrc = i->Xin.FpUnary.src;
+ u->regMoveDst = i->Xin.FpUnary.dst;
+ }
return;
case Xin_FpBinary:
addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV
? HRmWrite : HRmModify,
i->Xin.SseReRg.dst);
+
+ if (i->Xin.SseReRg.op == Xsse_MOV) {
+ u->isRegRegMove = True;
+ u->regMoveSrc = i->Xin.SseReRg.src;
+ u->regMoveDst = i->Xin.SseReRg.dst;
+ }
}
return;
case Xin_SseCMov:
}
}
-/* Figure out if i represents a reg-reg move, and if so assign the
- source and destination to *src and *dst. If in doubt say No. Used
- by the register allocator to do move coalescing.
-*/
-Bool isMove_X86Instr ( const X86Instr* i, HReg* src, HReg* dst )
-{
- /* Moves between integer regs */
- if (i->tag == Xin_Alu32R) {
- if (i->Xin.Alu32R.op != Xalu_MOV)
- return False;
- if (i->Xin.Alu32R.src->tag != Xrmi_Reg)
- return False;
- *src = i->Xin.Alu32R.src->Xrmi.Reg.reg;
- *dst = i->Xin.Alu32R.dst;
- return True;
- }
- /* Moves between FP regs */
- if (i->tag == Xin_FpUnary) {
- if (i->Xin.FpUnary.op != Xfp_MOV)
- return False;
- *src = i->Xin.FpUnary.src;
- *dst = i->Xin.FpUnary.dst;
- return True;
- }
- if (i->tag == Xin_SseReRg) {
- if (i->Xin.SseReRg.op != Xsse_MOV)
- return False;
- *src = i->Xin.SseReRg.src;
- *dst = i->Xin.SseReRg.dst;
- return True;
- }
- return False;
-}
-
-
/* Generate x86 spill/reload instructions under the direction of the
register allocator. Note it's critical these don't write the
condition codes. */
of the underlying instruction set. */
extern void getRegUsage_X86Instr ( HRegUsage*, const X86Instr*, Bool );
extern void mapRegs_X86Instr ( HRegRemap*, X86Instr*, Bool );
-extern Bool isMove_X86Instr ( const X86Instr*, HReg*, HReg* );
extern Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc,
UChar* buf, Int nbuf, const X86Instr* i,
Bool mode64,
/* This the bundle of functions we need to do the back-end stuff
(insn selection, reg-alloc, assembly) whilst being insulated
from the target instruction set. */
- Bool (*isMove) ( const HInstr*, HReg*, HReg* );
void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool );
void (*mapRegs) ( HRegRemap*, HInstr*, Bool );
void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool );
HInstrArray* vcode;
HInstrArray* rcode;
- isMove = NULL;
getRegUsage = NULL;
mapRegs = NULL;
genSpill = NULL;
case VexArchX86:
mode64 = False;
rRegUniv = X86FN(getRRegUniverse_X86());
- isMove = CAST_TO_TYPEOF(isMove) X86FN(isMove_X86Instr);
getRegUsage
= CAST_TO_TYPEOF(getRegUsage) X86FN(getRegUsage_X86Instr);
mapRegs = CAST_TO_TYPEOF(mapRegs) X86FN(mapRegs_X86Instr);
case VexArchAMD64:
mode64 = True;
rRegUniv = AMD64FN(getRRegUniverse_AMD64());
- isMove = CAST_TO_TYPEOF(isMove) AMD64FN(isMove_AMD64Instr);
getRegUsage
= CAST_TO_TYPEOF(getRegUsage) AMD64FN(getRegUsage_AMD64Instr);
mapRegs = CAST_TO_TYPEOF(mapRegs) AMD64FN(mapRegs_AMD64Instr);
case VexArchPPC32:
mode64 = False;
rRegUniv = PPC32FN(getRRegUniverse_PPC(mode64));
- isMove = CAST_TO_TYPEOF(isMove) PPC32FN(isMove_PPCInstr);
getRegUsage
= CAST_TO_TYPEOF(getRegUsage) PPC32FN(getRegUsage_PPCInstr);
mapRegs = CAST_TO_TYPEOF(mapRegs) PPC32FN(mapRegs_PPCInstr);
case VexArchPPC64:
mode64 = True;
rRegUniv = PPC64FN(getRRegUniverse_PPC(mode64));
- isMove = CAST_TO_TYPEOF(isMove) PPC64FN(isMove_PPCInstr);
getRegUsage
= CAST_TO_TYPEOF(getRegUsage) PPC64FN(getRegUsage_PPCInstr);
mapRegs = CAST_TO_TYPEOF(mapRegs) PPC64FN(mapRegs_PPCInstr);
case VexArchS390X:
mode64 = True;
rRegUniv = S390FN(getRRegUniverse_S390());
- isMove = CAST_TO_TYPEOF(isMove) S390FN(isMove_S390Instr);
getRegUsage
= CAST_TO_TYPEOF(getRegUsage) S390FN(getRegUsage_S390Instr);
mapRegs = CAST_TO_TYPEOF(mapRegs) S390FN(mapRegs_S390Instr);
case VexArchARM:
mode64 = False;
rRegUniv = ARMFN(getRRegUniverse_ARM());
- isMove = CAST_TO_TYPEOF(isMove) ARMFN(isMove_ARMInstr);
getRegUsage
= CAST_TO_TYPEOF(getRegUsage) ARMFN(getRegUsage_ARMInstr);
mapRegs = CAST_TO_TYPEOF(mapRegs) ARMFN(mapRegs_ARMInstr);
case VexArchARM64:
mode64 = True;
rRegUniv = ARM64FN(getRRegUniverse_ARM64());
- isMove = CAST_TO_TYPEOF(isMove) ARM64FN(isMove_ARM64Instr);
getRegUsage
= CAST_TO_TYPEOF(getRegUsage) ARM64FN(getRegUsage_ARM64Instr);
mapRegs = CAST_TO_TYPEOF(mapRegs) ARM64FN(mapRegs_ARM64Instr);
case VexArchMIPS32:
mode64 = False;
rRegUniv = MIPS32FN(getRRegUniverse_MIPS(mode64));
- isMove = CAST_TO_TYPEOF(isMove) MIPS32FN(isMove_MIPSInstr);
getRegUsage
= CAST_TO_TYPEOF(getRegUsage) MIPS32FN(getRegUsage_MIPSInstr);
mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS32FN(mapRegs_MIPSInstr);
case VexArchMIPS64:
mode64 = True;
rRegUniv = MIPS64FN(getRRegUniverse_MIPS(mode64));
- isMove = CAST_TO_TYPEOF(isMove) MIPS64FN(isMove_MIPSInstr);
getRegUsage
= CAST_TO_TYPEOF(getRegUsage) MIPS64FN(getRegUsage_MIPSInstr);
mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS64FN(mapRegs_MIPSInstr);
/* Register allocate. */
RegAllocControl con = {
- .univ = rRegUniv, .isMove = isMove, .getRegUsage = getRegUsage,
- .mapRegs = mapRegs, .genSpill = genSpill, .genReload = genReload,
- .genMove = genMove, .directReload = directReload,
- .guest_sizeB = guest_sizeB, .ppInstr = ppInstr, .ppReg = ppReg,
- .mode64 = mode64};
+ .univ = rRegUniv, .getRegUsage = getRegUsage, .mapRegs = mapRegs,
+ .genSpill = genSpill, .genReload = genReload, .genMove = genMove,
+ .directReload = directReload, .guest_sizeB = guest_sizeB,
+ .ppInstr = ppInstr, .ppReg = ppReg, .mode64 = mode64};
switch (vex_control.regalloc_version) {
case 2:
rcode = doRegisterAllocation_v2(vcode, &con);