From: Ivo Raisr Date: Fri, 22 Sep 2017 20:50:11 +0000 (+0200) Subject: Cherry pick 83cabd32492e6d19d483a63522e4e874fa64b617 from master. X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=90e405b81b8f49121240db713620a173b988abfb;p=thirdparty%2Fvalgrind.git Cherry pick 83cabd32492e6d19d483a63522e4e874fa64b617 from master. Refactor tracking of MOV coalescing. Reg<->Reg MOV coalescing status is now a part of the HRegUsage. This allows register allocation to query it two times without incurring a performance penalty. This in turn allows to better keep track of vreg<->vreg MOV coalescing so that all vregs in the coalesce chain get the effective |dead_before| of the last vreg. A small performance improvement has been observed because this allows to coalesce even spilled vregs (previously only assigned ones). --- diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c index d9949d4fd7..a554e28ed9 100644 --- a/VEX/priv/host_amd64_defs.c +++ b/VEX/priv/host_amd64_defs.c @@ -1406,6 +1406,12 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 ) addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src); if (i->Ain.Alu64R.op == Aalu_MOV) { addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst); + + if (i->Ain.Alu64R.src->tag == Armi_Reg) { + u->isRegRegMove = True; + u->regMoveSrc = i->Ain.Alu64R.src->Armi.Reg.reg; + u->regMoveDst = i->Ain.Alu64R.dst; + } return; } if (i->Ain.Alu64R.op == Aalu_CMP) { @@ -1668,6 +1674,12 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 ) addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV ? HRmWrite : HRmModify, i->Ain.SseReRg.dst); + + if (i->Ain.SseReRg.op == Asse_MOV) { + u->isRegRegMove = True; + u->regMoveSrc = i->Ain.SseReRg.src; + u->regMoveDst = i->Ain.SseReRg.dst; + } } return; case Ain_SseCMov: @@ -1694,6 +1706,12 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 ) //uu addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV //uu ? HRmWrite : HRmModify, //uu i->Ain.AvxReRg.dst); + //uu + //uu if (i->Ain.AvxReRg.op == Asse_MOV) { + //uu u->isRegRegMove = True; + //uu u->regMoveSrc = i->Ain.AvxReRg.src; + //uu u->regMoveDst = i->Ain.AvxReRg.dst; + //uu } //uu } //uu return; case Ain_EvCheck: @@ -1910,43 +1928,6 @@ void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 ) } } -/* Figure out if i represents a reg-reg move, and if so assign the - source and destination to *src and *dst. If in doubt say No. Used - by the register allocator to do move coalescing. -*/ -Bool isMove_AMD64Instr ( const AMD64Instr* i, HReg* src, HReg* dst ) -{ - switch (i->tag) { - case Ain_Alu64R: - /* Moves between integer regs */ - if (i->Ain.Alu64R.op != Aalu_MOV) - return False; - if (i->Ain.Alu64R.src->tag != Armi_Reg) - return False; - *src = i->Ain.Alu64R.src->Armi.Reg.reg; - *dst = i->Ain.Alu64R.dst; - return True; - case Ain_SseReRg: - /* Moves between SSE regs */ - if (i->Ain.SseReRg.op != Asse_MOV) - return False; - *src = i->Ain.SseReRg.src; - *dst = i->Ain.SseReRg.dst; - return True; - //uu case Ain_AvxReRg: - //uu /* Moves between AVX regs */ - //uu if (i->Ain.AvxReRg.op != Asse_MOV) - //uu return False; - //uu *src = i->Ain.AvxReRg.src; - //uu *dst = i->Ain.AvxReRg.dst; - //uu return True; - default: - return False; - } - /*NOTREACHED*/ -} - - /* Generate amd64 spill/reload instructions under the direction of the register allocator. Note it's critical these don't write the condition codes. */ diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h index 349e43c74c..299d002feb 100644 --- a/VEX/priv/host_amd64_defs.h +++ b/VEX/priv/host_amd64_defs.h @@ -785,7 +785,6 @@ extern void ppAMD64Instr ( const AMD64Instr*, Bool ); of the underlying instruction set. */ extern void getRegUsage_AMD64Instr ( HRegUsage*, const AMD64Instr*, Bool ); extern void mapRegs_AMD64Instr ( HRegRemap*, AMD64Instr*, Bool ); -extern Bool isMove_AMD64Instr ( const AMD64Instr*, HReg*, HReg* ); extern Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc, UChar* buf, Int nbuf, const AMD64Instr* i, diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c index 2506512adb..4d088c77b4 100644 --- a/VEX/priv/host_arm64_defs.c +++ b/VEX/priv/host_arm64_defs.c @@ -1958,6 +1958,9 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 ) case ARM64in_MovI: addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst); addHRegUse(u, HRmRead, i->ARM64in.MovI.src); + u->isRegRegMove = True; + u->regMoveSrc = i->ARM64in.MovI.src; + u->regMoveDst = i->ARM64in.MovI.dst; return; case ARM64in_Imm64: addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst); @@ -2238,6 +2241,9 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 ) case ARM64in_VMov: addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst); addHRegUse(u, HRmRead, i->ARM64in.VMov.src); + u->isRegRegMove = True; + u->regMoveSrc = i->ARM64in.VMov.src; + u->regMoveDst = i->ARM64in.VMov.dst; return; case ARM64in_EvCheck: /* We expect both amodes only to mention x21, so this is in @@ -2510,29 +2516,6 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 ) } } -/* Figure out if i represents a reg-reg move, and if so assign the - source and destination to *src and *dst. If in doubt say No. Used - by the register allocator to do move coalescing. -*/ -Bool isMove_ARM64Instr ( const ARM64Instr* i, HReg* src, HReg* dst ) -{ - switch (i->tag) { - case ARM64in_MovI: - *src = i->ARM64in.MovI.src; - *dst = i->ARM64in.MovI.dst; - return True; - case ARM64in_VMov: - *src = i->ARM64in.VMov.src; - *dst = i->ARM64in.VMov.dst; - return True; - default: - break; - } - - return False; -} - - /* Generate arm spill/reload instructions under the direction of the register allocator. Note it's critical these don't write the condition codes. */ diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h index 840e0aabc9..9d8cfb0cff 100644 --- a/VEX/priv/host_arm64_defs.h +++ b/VEX/priv/host_arm64_defs.h @@ -993,7 +993,6 @@ extern void ppARM64Instr ( const ARM64Instr* ); of the underlying instruction set. */ extern void getRegUsage_ARM64Instr ( HRegUsage*, const ARM64Instr*, Bool ); extern void mapRegs_ARM64Instr ( HRegRemap*, ARM64Instr*, Bool ); -extern Bool isMove_ARM64Instr ( const ARM64Instr*, HReg*, HReg* ); extern Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, UChar* buf, Int nbuf, const ARM64Instr* i, Bool mode64, diff --git a/VEX/priv/host_arm_defs.c b/VEX/priv/host_arm_defs.c index 9bf87cd5c3..3de6d50116 100644 --- a/VEX/priv/host_arm_defs.c +++ b/VEX/priv/host_arm_defs.c @@ -2108,6 +2108,12 @@ void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 ) case ARMin_Mov: addHRegUse(u, HRmWrite, i->ARMin.Mov.dst); addRegUsage_ARMRI84(u, i->ARMin.Mov.src); + + if (i->ARMin.Mov.src->tag == ARMri84_R) { + u->isRegRegMove = True; + u->regMoveSrc = i->ARMin.Mov.src->ARMri84.R.reg; + u->regMoveDst = i->ARMin.Mov.dst; + } return; case ARMin_Imm32: addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst); @@ -2256,10 +2262,22 @@ void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 ) case ARMin_VUnaryD: addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst); addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src); + + if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) { + u->isRegRegMove = True; + u->regMoveSrc = i->ARMin.VUnaryD.src; + u->regMoveDst = i->ARMin.VUnaryD.dst; + } return; case ARMin_VUnaryS: addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst); addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src); + + if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) { + u->isRegRegMove = True; + u->regMoveSrc = i->ARMin.VUnaryS.src; + u->regMoveDst = i->ARMin.VUnaryS.dst; + } return; case ARMin_VCmpD: addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL); @@ -2350,6 +2368,12 @@ void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 ) case ARMin_NUnary: addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst); addHRegUse(u, HRmRead, i->ARMin.NUnary.src); + + if (i->ARMin.NUnary.op == ARMneon_COPY) { + u->isRegRegMove = True; + u->regMoveSrc = i->ARMin.NUnary.src; + u->regMoveDst = i->ARMin.NUnary.dst; + } return; case ARMin_NUnaryS: addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg); @@ -2620,50 +2644,6 @@ void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 ) } } -/* Figure out if i represents a reg-reg move, and if so assign the - source and destination to *src and *dst. If in doubt say No. Used - by the register allocator to do move coalescing. -*/ -Bool isMove_ARMInstr ( const ARMInstr* i, HReg* src, HReg* dst ) -{ - /* Moves between integer regs */ - switch (i->tag) { - case ARMin_Mov: - if (i->ARMin.Mov.src->tag == ARMri84_R) { - *src = i->ARMin.Mov.src->ARMri84.R.reg; - *dst = i->ARMin.Mov.dst; - return True; - } - break; - case ARMin_VUnaryD: - if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) { - *src = i->ARMin.VUnaryD.src; - *dst = i->ARMin.VUnaryD.dst; - return True; - } - break; - case ARMin_VUnaryS: - if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) { - *src = i->ARMin.VUnaryS.src; - *dst = i->ARMin.VUnaryS.dst; - return True; - } - break; - case ARMin_NUnary: - if (i->ARMin.NUnary.op == ARMneon_COPY) { - *src = i->ARMin.NUnary.src; - *dst = i->ARMin.NUnary.dst; - return True; - } - break; - default: - break; - } - - return False; -} - - /* Generate arm spill/reload instructions under the direction of the register allocator. Note it's critical these don't write the condition codes. */ diff --git a/VEX/priv/host_arm_defs.h b/VEX/priv/host_arm_defs.h index ec6358ee15..d92ad876be 100644 --- a/VEX/priv/host_arm_defs.h +++ b/VEX/priv/host_arm_defs.h @@ -1056,7 +1056,6 @@ extern void ppARMInstr ( const ARMInstr* ); of the underlying instruction set. */ extern void getRegUsage_ARMInstr ( HRegUsage*, const ARMInstr*, Bool ); extern void mapRegs_ARMInstr ( HRegRemap*, ARMInstr*, Bool ); -extern Bool isMove_ARMInstr ( const ARMInstr*, HReg*, HReg* ); extern Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc, UChar* buf, Int nbuf, const ARMInstr* i, Bool mode64, diff --git a/VEX/priv/host_generic_reg_alloc3.c b/VEX/priv/host_generic_reg_alloc3.c index 18eb91741c..5a604a63e5 100644 --- a/VEX/priv/host_generic_reg_alloc3.c +++ b/VEX/priv/host_generic_reg_alloc3.c @@ -42,6 +42,10 @@ Avoids the situation when registers are allocated somehow in the fall-through leg and need to be spilled just few instructions after the merge (because of a helper call, for example). + + TODO-JIT: Investigate extending MOV coalesce chains accross If-Then-Else + legs. Perhaps phi node merging could be also considered as a sort of MOV + coalescing? */ /* Set to 1 for lots of debugging output. */ @@ -115,6 +119,18 @@ typedef /* The "home" spill slot. The offset is relative to the beginning of the guest state. */ UShort spill_offset; + + /* This vreg (vregS) is coalesced to another vreg + if |coalescedTo| != INVALID_HREG. + Coalescing means that there is a MOV instruction which occurs in the + instruction stream right at vregS' dead_before + and vregD's live_after. */ + HReg coalescedTo; /* Which vreg it is coalesced to. */ + HReg coalescedFirst; /* First vreg in the coalescing chain. */ + + /* If this vregS is coalesced to another vregD, what is the combined + dead_before for vregS+vregD. Used to effectively allocate registers. */ + Short effective_dead_before; } VRegState; @@ -247,6 +263,12 @@ typedef next chunk: not NULL | x | - NULL | x | x */ + + /* Mark vreg indexes where coalesce chains start at. + Used internally by MOV coalescing algorithm, to convey information + across different stages. */ + UInt* coalesce_heads; + UInt nr_coalesce_heads; } RegAllocChunk; @@ -260,7 +282,8 @@ static void init_rreg_lr_state(RRegLRState* rreg_lrs) rreg_lrs->lr_current_idx = 0; } -static RegAllocChunk* new_chunk(HInstrVec* instrs_in, UInt n_rregs) +static RegAllocChunk* new_chunk(HInstrVec* instrs_in, UInt n_vregs, + UInt n_rregs) { RegAllocChunk* chunk = LibVEX_Alloc_inline(sizeof(RegAllocChunk)); chunk->n_rregs = n_rregs; @@ -269,14 +292,16 @@ static RegAllocChunk* new_chunk(HInstrVec* instrs_in, UInt n_rregs) for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) { init_rreg_lr_state(&chunk->rreg_lr_state[r_idx]); } - chunk->instrs_in = instrs_in; - chunk->ii_vec_start = INVALID_INSTRNO; - chunk->ii_vec_len = 0; - chunk->ii_total_start = INVALID_INSTRNO; - chunk->reg_usage = NULL; - chunk->instrs_out = NULL; - chunk->isIfThenElse = False; - chunk->next = NULL; + chunk->instrs_in = instrs_in; + chunk->ii_vec_start = INVALID_INSTRNO; + chunk->ii_vec_len = 0; + chunk->ii_total_start = INVALID_INSTRNO; + chunk->reg_usage = NULL; + chunk->instrs_out = NULL; + chunk->isIfThenElse = False; + chunk->next = NULL; + chunk->coalesce_heads = LibVEX_Alloc_inline(n_vregs); + chunk->nr_coalesce_heads = 0; return chunk; } @@ -347,15 +372,16 @@ static inline void enlarge_rreg_lrs(RRegLRState* rreg_lrs) print_state(chunk, state, INSTRNO_TOTAL, depth, con, what); \ } while (0) -static inline void print_state( - const RegAllocChunk* chunk, const RegAllocState* state, - Short ii_total_current, UInt depth, const RegAllocControl* con, - const HChar* comment) -{ - print_depth(depth); - vex_printf("%s (current instruction total #%d):\n", - comment, ii_total_current); +#define RIGHT_JUSTIFY(_total, _written) \ + do { \ + for (Int w = (_total) - (_written); w > 0; w--) { \ + vex_printf(" "); \ + } \ + } while (0) +static inline void print_vregs(const RegAllocState* state, + Short ii_total_current, UInt depth, const RegAllocControl* con) +{ for (UInt v_idx = 0; v_idx < state->n_vregs; v_idx++) { const VRegState* vreg = &state->vregs[v_idx]; @@ -380,20 +406,43 @@ static inline void print_state( default: vassert(0); } + RIGHT_JUSTIFY(25, written); - for (Int w = 30 - written; w > 0; w--) { - vex_printf(" "); - } + written = vex_printf("lr: [%d, %d) ", + vreg->live_after, vreg->dead_before); + RIGHT_JUSTIFY(15, written); + + written = vex_printf("effective lr: [%d, %d)", + vreg->live_after, vreg->effective_dead_before); + RIGHT_JUSTIFY(25, written); if (vreg->live_after > ii_total_current) { vex_printf("[not live yet]"); } else if (ii_total_current >= vreg->dead_before) { - vex_printf("[now dead]"); + if (hregIsInvalid(vreg->coalescedTo)) { + vex_printf("[now dead]\n"); + } else { + vex_printf("[now dead, coalesced to "); + con->ppReg(vreg->coalescedTo); + vex_printf("]\n"); + } } else { vex_printf("[live]"); } vex_printf(" [%d - %d)\n", vreg->live_after, vreg->dead_before); } +} +static inline void print_state( + const RegAllocChunk* chunk, const RegAllocState* state, + Short ii_total_current, UInt depth, const RegAllocControl* con, + const HChar* comment) +{ + + print_depth(depth); + vex_printf("%s (current instruction total #%d):\n", + comment, ii_total_current); + + print_vregs(state, ii_total_current, depth, con); for (UInt r_idx = 0; r_idx < chunk->n_rregs; r_idx++) { const RRegState* rreg = &state->rregs[r_idx]; @@ -401,9 +450,7 @@ static inline void print_state( print_depth(depth); vex_printf("rreg_state[%2u] = ", r_idx); UInt written = con->ppReg(con->univ->regs[r_idx]); - for (Int w = 10 - written; w > 0; w--) { - vex_printf(" "); - } + RIGHT_JUSTIFY(10, written); switch (rreg->disp) { case Free: @@ -423,6 +470,8 @@ static inline void print_state( break; } } + +# undef RIGHT_JUSTIFY } static RegAllocState* clone_state(const RegAllocState* orig) @@ -582,7 +631,7 @@ static inline HReg find_vreg_to_spill( a callee-save register because it won't be used for parameter passing around helper function calls. */ static inline Bool find_free_rreg( - const RegAllocChunk* chunk, RegAllocState* state, + const RegAllocChunk* chunk, const RegAllocState* state, Short ii_chunk_current, HRegClass target_hregclass, Bool reserve_phase, const RegAllocControl* con, UInt* r_idx_found) { @@ -682,12 +731,12 @@ static inline void assign_vreg(RegAllocChunk* chunk, RegAllocState* state, /* --- Stage 1. --- Determine total ordering of instructions and structure of HInstrIfThenElse. Build similar structure of RegAllocChunk's. */ -static UInt stage1(HInstrVec* instrs_in, UInt ii_total_start, UInt n_rregs, - RegAllocChunk** first_chunk, const RegAllocControl* con) +static UInt stage1(HInstrVec* instrs_in, UInt ii_total_start, UInt n_vregs, + UInt n_rregs, RegAllocChunk** first_chunk, const RegAllocControl* con) { Short ii_vec_start = 0; - RegAllocChunk* chunk = new_chunk(instrs_in, n_rregs); + RegAllocChunk* chunk = new_chunk(instrs_in, n_vregs, n_rregs); chunk->ii_vec_start = ii_vec_start; chunk->ii_total_start = ii_total_start; chunk->instrs_out = newHInstrVec(); @@ -716,10 +765,12 @@ static UInt stage1(HInstrVec* instrs_in, UInt ii_total_start, UInt n_rregs, if (hite != NULL) { RegAllocChunk* chunk_fallThrough; UInt ii_total_fallThrough = stage1(hite->fallThrough, ii_total_start, - n_rregs, &chunk_fallThrough, con); + n_vregs, n_rregs, + &chunk_fallThrough, con); RegAllocChunk* chunk_outOfLine; UInt ii_total_outOfLine = stage1(hite->outOfLine, ii_total_start, - n_rregs, &chunk_outOfLine, con); + n_vregs, n_rregs, + &chunk_outOfLine, con); chunk->isIfThenElse = True; chunk->IfThenElse.ccOOL = hite->ccOOL; @@ -733,7 +784,7 @@ static UInt stage1(HInstrVec* instrs_in, UInt ii_total_start, UInt n_rregs, if (ii_vec < instrs_in->insns_used - 1) { RegAllocChunk* previous = chunk; - chunk = new_chunk(instrs_in, n_rregs); + chunk = new_chunk(instrs_in, n_vregs, n_rregs); chunk->ii_vec_start = ii_vec_start; chunk->ii_total_start = toShort(ii_total_start); chunk->instrs_out = (*first_chunk)->instrs_out; @@ -744,7 +795,6 @@ static UInt stage1(HInstrVec* instrs_in, UInt ii_total_start, UInt n_rregs, return ii_total_start; } - /* --- Stage 2. --- Scan the incoming instructions. Note: state->vregs is initially global (shared accross all chunks). @@ -784,7 +834,12 @@ static void stage2_chunk(RegAllocChunk* chunk, RegAllocState* state, ii_vec++, ii_chunk++) { const HInstr* instr = chunk->instrs_in->insns[ii_vec]; + con->getRegUsage(&chunk->reg_usage[ii_chunk], instr, con->mode64); + chunk->reg_usage[ii_chunk].isVregVregMove + = chunk->reg_usage[ii_chunk].isRegRegMove + && hregIsVirtual(chunk->reg_usage[ii_chunk].regMoveSrc) + && hregIsVirtual(chunk->reg_usage[ii_chunk].regMoveDst); if (0) { vex_printf("\n"); @@ -847,6 +902,9 @@ static void stage2_chunk(RegAllocChunk* chunk, RegAllocState* state, if (state->vregs[v_idx].dead_before < INSTRNO_TOTAL + 1) { state->vregs[v_idx].dead_before = INSTRNO_TOTAL + 1; } + if (state->vregs[v_idx].effective_dead_before < INSTRNO_TOTAL + 1) { + state->vregs[v_idx].effective_dead_before = INSTRNO_TOTAL + 1; + } } /* Process real registers mentioned in the instruction. */ @@ -1002,14 +1060,87 @@ static void stage2_debug_rregs(RegAllocChunk* chunk, UInt depth, ;); } + +/* Preparation for MOV coalescing. Establish MOV coalescing chains. */ +static void stage3_chunk(RegAllocChunk* chunk, RegAllocState* state, + Bool* coalesce_happened, UInt depth, const RegAllocControl* con) +{ + /* Optimise register coalescing: + MOV v <-> v coalescing (done here). + MOV v <-> r coalescing (TODO: not yet, not here). */ + /* If doing a reg-reg move between two vregs, and the src's live range ends + here and the dst's live range starts here, coalesce the src vreg + to the dst vreg. */ + Short ii_chunk = 0; + for (Short ii_vec = chunk->ii_vec_start; + ii_vec < chunk->ii_vec_start + chunk->ii_vec_len; + ii_vec++, ii_chunk++) { + if (chunk->reg_usage[ii_chunk].isVregVregMove) { + HReg vregS = chunk->reg_usage[ii_chunk].regMoveSrc; + HReg vregD = chunk->reg_usage[ii_chunk].regMoveDst; + + /* Check that |isVregVregMove| is not telling us a bunch of lies ... */ + vassert(hregClass(vregS) == hregClass(vregD)); + UInt vs_idx = hregIndex(vregS); + UInt vd_idx = hregIndex(vregD); + vassert(IS_VALID_VREGNO(vs_idx)); + vassert(IS_VALID_VREGNO(vd_idx)); + vassert(! sameHReg(vregS, vregD)); + VRegState* vs_st = &state->vregs[vs_idx]; + VRegState* vd_st = &state->vregs[vd_idx]; + + if ((vs_st->dead_before == INSTRNO_TOTAL + 1) + && (vd_st->live_after == INSTRNO_TOTAL)) { + /* Live ranges are adjacent. */ + + vs_st->coalescedTo = vregD; + if (hregIsInvalid(vs_st->coalescedFirst)) { + vd_st->coalescedFirst = vregS; + chunk->coalesce_heads[chunk->nr_coalesce_heads] = vs_idx; + chunk->nr_coalesce_heads += 1; + } else { + vd_st->coalescedFirst = vs_st->coalescedFirst; + } + + state->vregs[hregIndex(vd_st->coalescedFirst)].effective_dead_before + = vd_st->dead_before; + + if (DEBUG_REGALLOC) { + print_depth(depth); + vex_printf("vreg coalescing: "); + con->ppReg(vregS); + vex_printf(" -> "); + con->ppReg(vregD); + vex_printf("\n"); + } + + *coalesce_happened = True; + } + } + } +} + +static void stage3(RegAllocChunk* chunk, RegAllocState* state, + Bool* coalesce_happened, UInt depth, const RegAllocControl* con) +{ + WALK_CHUNKS(stage3_chunk(chunk, state, coalesce_happened, depth, con), + ;, + stage3(chunk->IfThenElse.fallThrough, state, coalesce_happened, + depth + 1, con), + stage3(chunk->IfThenElse.outOfLine, state, coalesce_happened, + depth + 1, con), + ;); +} + + /* Allocates spill slots. Because VRegState is initiall global, also spill slots are initially global. This might have an adverse effect that spill slots will eventuall run out if there are too many nested If-Then-Else legs. In that case, VRegState must not be initially global but rather local to every leg; and vregs will need to eventually have extended their live ranges after legs merge. */ -static void stage3(VRegState* vreg_state, UInt n_vregs, - const RegAllocControl* con) +static void stage4_main(VRegState* vreg_state, UInt n_vregs, + const RegAllocControl* con) { # define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8) STATIC_ASSERT((N_SPILL64S % 2) == 0); @@ -1048,6 +1179,11 @@ static void stage3(VRegState* vreg_state, UInt n_vregs, vassert(vreg_state[v_idx].reg_class == HRcINVALID); continue; } + if (! hregIsInvalid(vreg_state[v_idx].coalescedFirst)) { + /* Coalesced vregs should share the same spill slot with the first vreg + in the coalescing chain. But we don't have that information, yet. */ + continue; + } /* The spill slots are 64 bits in size. As per the comment on definition of HRegClass in host_generic_regs.h, that means, to spill a vreg of @@ -1070,8 +1206,10 @@ static void stage3(VRegState* vreg_state, UInt n_vregs, if (ss_no >= N_SPILL64S - 1) { vpanic("N_SPILL64S is too low in VEX. Increase and recompile."); } - ss_busy_until_before[ss_no + 0] = vreg_state[v_idx].dead_before; - ss_busy_until_before[ss_no + 1] = vreg_state[v_idx].dead_before; + ss_busy_until_before[ss_no + 0] + = vreg_state[v_idx].effective_dead_before; + ss_busy_until_before[ss_no + 1] + = vreg_state[v_idx].effective_dead_before; break; default: /* The ordinary case -- just find a single lowest-numbered spill @@ -1084,7 +1222,8 @@ static void stage3(VRegState* vreg_state, UInt n_vregs, if (ss_no == N_SPILL64S) { vpanic("N_SPILL64S is too low in VEX. Increase and recompile."); } - ss_busy_until_before[ss_no] = vreg_state[v_idx].dead_before; + ss_busy_until_before[ss_no] + = vreg_state[v_idx].effective_dead_before; break; } @@ -1105,18 +1244,42 @@ static void stage3(VRegState* vreg_state, UInt n_vregs, } } - if (0) { - vex_printf("\n\n"); - for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) - vex_printf("vreg %3u --> spill offset %u\n", - v_idx, vreg_state[v_idx].spill_offset); +# undef N_SPILL64S +} + +static void stage4_coalesced_chunk(RegAllocChunk* chunk, RegAllocState* state, + UInt depth, const RegAllocControl* con) +{ + /* Fill in the spill offsets and effective_dead_before for coalesced vregs.*/ + for (UInt i = 0; i < chunk->nr_coalesce_heads; i++) { + UInt vs_idx = chunk->coalesce_heads[i]; + Short effective_dead_before = state->vregs[vs_idx].effective_dead_before; + UShort spill_offset = state->vregs[vs_idx].spill_offset; + + HReg vregD = state->vregs[vs_idx].coalescedTo; + while (! hregIsInvalid(vregD)) { + UInt vd_idx = hregIndex(vregD); + state->vregs[vd_idx].effective_dead_before = effective_dead_before; + state->vregs[vd_idx].spill_offset = spill_offset; + vregD = state->vregs[vd_idx].coalescedTo; + } } +} -# undef N_SPILL64S +static void stage4_coalesced(RegAllocChunk* chunk, RegAllocState* state, + UInt depth, const RegAllocControl* con) +{ + WALK_CHUNKS(stage4_coalesced_chunk(chunk, state, depth, con), + ;, + stage4_coalesced(chunk->IfThenElse.fallThrough, state, + depth + 1, con), + stage4_coalesced(chunk->IfThenElse.outOfLine, state, + depth + 1, con), + ;); } -static void stage4_chunk(RegAllocChunk* chunk, RegAllocState* state, +static void stage5_chunk(RegAllocChunk* chunk, RegAllocState* state, UInt depth, const RegAllocControl* con) { /* Finds an rreg of the correct class. @@ -1124,7 +1287,7 @@ static void stage4_chunk(RegAllocChunk* chunk, RegAllocState* state, instruction and makes free the corresponding rreg. */ # define FIND_OR_MAKE_FREE_RREG(_v_idx, _reg_class, _reserve_phase) \ ({ \ - UInt _r_free_idx = -1; \ + UInt _r_free_idx; \ Bool free_rreg_found = find_free_rreg(chunk, state, \ ii_chunk, (_reg_class), (_reserve_phase), \ con, &_r_free_idx); \ @@ -1218,66 +1381,83 @@ static void stage4_chunk(RegAllocChunk* chunk, RegAllocState* state, vassert(ii_chunk < rreg_lrs->lr_current->dead_before); } } + + /* Sanity check: if vregS has been marked as coalesced to vregD, + then the effective live range of vregS must also cover live range + of vregD. */ + /* The following sanity check is quite expensive. Some basic blocks + contain very lengthy coalescing chains... */ + if (SANITY_CHECKS_EVERY_INSTR) { + for (UInt vs_idx = 0; vs_idx < state->n_vregs; vs_idx++) { + const VRegState* vS_st = &state->vregs[vs_idx]; + HReg vregD = vS_st->coalescedTo; + while (! hregIsInvalid(vregD)) { + const VRegState* vD_st = &state->vregs[hregIndex(vregD)]; + vassert(vS_st->live_after <= vD_st->live_after); + vassert(vS_st->effective_dead_before >= vD_st->dead_before); + vregD = vD_st->coalescedTo; + } + } + } } - /* --- MOV coalescing --- */ + /* --- MOV coalescing (finishing) --- */ /* Optimise register coalescing: - MOV v <-> v coalescing (done here). + MOV v <-> v coalescing (finished here). MOV v <-> r coalescing (TODO: not yet). */ - /* If doing a reg-reg move between two vregs, and the src's live - range ends here and the dst's live range starts here, bind the dst - to the src's rreg, and that's all. */ - HReg vregS = INVALID_HREG; - HReg vregD = INVALID_HREG; - if (con->isMove(instr, &vregS, &vregD)) { - if (hregIsVirtual(vregS) && hregIsVirtual(vregD)) { - /* Check that |isMove| is not telling us a bunch of lies ... */ - vassert(hregClass(vregS) == hregClass(vregD)); - UInt vs_idx = hregIndex(vregS); - UInt vd_idx = hregIndex(vregD); - vassert(IS_VALID_VREGNO(vs_idx)); - vassert(IS_VALID_VREGNO(vd_idx)); - - if ((state->vregs[vs_idx].dead_before == INSTRNO_TOTAL + 1) - && (state->vregs[vd_idx].live_after == INSTRNO_TOTAL) - && (state->vregs[vs_idx].disp == Assigned)) { - - /* Live ranges are adjacent and source vreg is bound. - Finally we can do the coalescing. */ - HReg rreg = state->vregs[vs_idx].rreg; - state->vregs[vd_idx].disp = Assigned; + if (chunk->reg_usage[ii_chunk].isVregVregMove) { + HReg vregS = chunk->reg_usage[ii_chunk].regMoveSrc; + HReg vregD = chunk->reg_usage[ii_chunk].regMoveDst; + UInt vs_idx = hregIndex(vregS); + UInt vd_idx = hregIndex(vregD); + + if (sameHReg(state->vregs[vs_idx].coalescedTo, vregD)) { + /* Finally do the coalescing. */ + + HReg rreg = state->vregs[vs_idx].rreg; + switch (state->vregs[vs_idx].disp) { + case Assigned: state->vregs[vd_idx].rreg = rreg; - FREE_VREG(&state->vregs[vs_idx]); - UInt r_idx = hregIndex(rreg); vassert(state->rregs[r_idx].disp == Bound); - state->rregs[r_idx].vreg = vregD; - state->rregs[r_idx].eq_spill_slot = False; + state->rregs[r_idx].vreg = vregD; + break; + case Spilled: + vassert(hregIsInvalid(state->vregs[vs_idx].rreg)); + break; + default: + vassert(0); + } - if (DEBUG_REGALLOC) { - print_depth(depth); - vex_printf("coalesced: "); - con->ppReg(vregS); - vex_printf(" -> "); - con->ppReg(vregD); - vex_printf("\n\n"); - } + state->vregs[vd_idx].disp = state->vregs[vs_idx].disp; + FREE_VREG(&state->vregs[vs_idx]); + + if (DEBUG_REGALLOC) { + print_depth(depth); + vex_printf("coalesced: "); + con->ppReg(vregS); + vex_printf(" -> "); + con->ppReg(vregD); + vex_printf("\n\n"); + } - /* In rare cases it can happen that vregD's live range ends - here. Check and eventually free the vreg and rreg. - This effectively means that either the translated program - contained dead code (although VEX iropt passes are pretty good - at eliminating it) or the VEX backend generated dead code. */ - if (state->vregs[vd_idx].dead_before <= INSTRNO_TOTAL + 1) { - FREE_VREG(&state->vregs[vd_idx]); + /* In rare cases it can happen that vregD's live range ends here. + Check and eventually free the vreg and rreg. + This effectively means that either the translated program + contained dead code (but VEX iropt passes are pretty good + at eliminating it) or the VEX backend generated dead code. */ + if (state->vregs[vd_idx].dead_before <= INSTRNO_TOTAL + 1) { + if (state->vregs[vd_idx].disp == Assigned) { + UInt r_idx = hregIndex(rreg); FREE_RREG(&state->rregs[r_idx]); } - - /* Move on to the next instruction. We skip the post-instruction - stuff because all required house-keeping was done here. */ - continue; + FREE_VREG(&state->vregs[vd_idx]); } + + /* Move on to the next instruction. We skip the post-instruction + stuff because all required house-keeping was done here. */ + continue; } } @@ -1547,7 +1727,7 @@ static void stage4_chunk(RegAllocChunk* chunk, RegAllocState* state, # undef FIND_OR_MAKE_FREE_RREG } -static void stage4_emit_HInstrIfThenElse(RegAllocChunk* chunk, UInt depth, +static void stage5_emit_HInstrIfThenElse(RegAllocChunk* chunk, UInt depth, const RegAllocControl* con) { vassert(chunk->isIfThenElse); @@ -1759,7 +1939,7 @@ static void merge_vreg_states(RegAllocChunk* chunk, /* Merges |cloned| state from out-of-line leg back into the main |state|, modified by fall-through leg since the legs fork. */ -static void stage4_merge_states(RegAllocChunk* chunk, +static void stage5_merge_states(RegAllocChunk* chunk, RegAllocState* state, RegAllocState* cloned, UInt depth, const RegAllocControl* con) { @@ -1849,15 +2029,15 @@ static void stage4_merge_states(RegAllocChunk* chunk, } } -static void stage4(RegAllocChunk* chunk, RegAllocState* state, +static void stage5(RegAllocChunk* chunk, RegAllocState* state, UInt depth, const RegAllocControl* con) { - WALK_CHUNKS(stage4_chunk(chunk, state, depth, con), - stage4_emit_HInstrIfThenElse(chunk, depth, con); + WALK_CHUNKS(stage5_chunk(chunk, state, depth, con), + stage5_emit_HInstrIfThenElse(chunk, depth, con); RegAllocState* cloned_state = clone_state(state), - stage4(chunk->IfThenElse.fallThrough, state, depth + 1, con), - stage4(chunk->IfThenElse.outOfLine, cloned_state, depth + 1, con), - stage4_merge_states(chunk, state, cloned_state, depth, con)); + stage5(chunk->IfThenElse.fallThrough, state, depth + 1, con), + stage5(chunk->IfThenElse.outOfLine, cloned_state, depth + 1, con), + stage5_merge_states(chunk, state, cloned_state, depth, con)); } @@ -1900,12 +2080,15 @@ HInstrSB* doRegisterAllocation( /* --- Stage 0. --- */ /* Initialize the vreg state. It is initially global. --- */ for (UInt v_idx = 0; v_idx < state->n_vregs; v_idx++) { - state->vregs[v_idx].live_after = INVALID_INSTRNO; - state->vregs[v_idx].dead_before = INVALID_INSTRNO; - state->vregs[v_idx].reg_class = HRcINVALID; - state->vregs[v_idx].disp = Unallocated; - state->vregs[v_idx].rreg = INVALID_HREG; - state->vregs[v_idx].spill_offset = 0; + state->vregs[v_idx].live_after = INVALID_INSTRNO; + state->vregs[v_idx].dead_before = INVALID_INSTRNO; + state->vregs[v_idx].reg_class = HRcINVALID; + state->vregs[v_idx].disp = Unallocated; + state->vregs[v_idx].rreg = INVALID_HREG; + state->vregs[v_idx].spill_offset = 0; + state->vregs[v_idx].coalescedTo = INVALID_HREG; + state->vregs[v_idx].coalescedFirst = INVALID_HREG; + state->vregs[v_idx].effective_dead_before = INVALID_INSTRNO; } /* Initialize redundant rreg -> vreg state. A snaphost is taken for @@ -1920,7 +2103,7 @@ HInstrSB* doRegisterAllocation( /* --- Stage 1. Determine total ordering of instructions and structure of HInstrIfThenElse. --- */ RegAllocChunk* first_chunk; - UInt ii_total_last = stage1(sb_in->insns, 0, state->n_rregs, + UInt ii_total_last = stage1(sb_in->insns, 0, state->n_vregs, state->n_rregs, &first_chunk, con); /* The live range numbers are signed shorts, and so limiting the number @@ -1936,11 +2119,30 @@ HInstrSB* doRegisterAllocation( stage2_debug_rregs(first_chunk, 0, con); } - /* --- Stage 3. Allocate spill slots. --- */ - stage3(state->vregs, state->n_vregs, con); + /* --- Stage 3. MOV coalescing (preparation). --- */ + Bool coalesce_happened = False; + stage3(first_chunk, state, &coalesce_happened, 0, con); + + /* --- Stage 4. Allocate spill slots. --- */ + stage4_main(state->vregs, state->n_vregs, con); + stage4_coalesced(first_chunk, state, 0, con); + if (DEBUG_REGALLOC && coalesce_happened) { + vex_printf("\nAfter vreg<->vreg MOV coalescing:\n"); + print_vregs(state, 0, 0, con); + } + + if (0) { + vex_printf("\n\n"); + for (UInt v_idx = 0; v_idx < state->n_vregs; v_idx++) { + if (state->vregs[v_idx].live_after != INVALID_INSTRNO) { + vex_printf("vreg %3u --> spill offset %u\n", + v_idx, state->vregs[v_idx].spill_offset); + } + } + } - /* --- Stage 4. Process the instructions and allocate registers. --- */ - stage4(first_chunk, state, 0, con); + /* --- Stage 5. Process the instructions and allocate registers. --- */ + stage5(first_chunk, state, 0, con); /* The output SB of instructions. */ HInstrSB* sb_out = LibVEX_Alloc_inline(sizeof(HInstrSB)); diff --git a/VEX/priv/host_generic_regs.c b/VEX/priv/host_generic_regs.c index f21255b0f1..fc92f54447 100644 --- a/VEX/priv/host_generic_regs.c +++ b/VEX/priv/host_generic_regs.c @@ -184,6 +184,9 @@ void ppHRegUsage ( const RRegUniverse* univ, HRegUsage* tab ) ppHReg(tab->vRegs[i]); vex_printf("\n"); } + if (tab->isRegRegMove) { + vex_printf(" (is a reg-reg move)\n"); + } vex_printf("}\n"); } diff --git a/VEX/priv/host_generic_regs.h b/VEX/priv/host_generic_regs.h index add30e2e9a..b729c77088 100644 --- a/VEX/priv/host_generic_regs.h +++ b/VEX/priv/host_generic_regs.h @@ -300,6 +300,16 @@ typedef HReg vRegs[N_HREGUSAGE_VREGS]; HRegMode vMode[N_HREGUSAGE_VREGS]; UInt n_vRegs; + + /* Hint to the register allocator: this instruction is actually a move + between two registers: regMoveSrc -> regMoveDst. */ + Bool isRegRegMove; + HReg regMoveSrc; + HReg regMoveDst; + + /* Used internally by the register allocator. The reg-reg move is + actually a vreg-vreg move. */ + Bool isVregVregMove; } HRegUsage; @@ -307,9 +317,10 @@ extern void ppHRegUsage ( const RRegUniverse*, HRegUsage* ); static inline void initHRegUsage ( HRegUsage* tab ) { - tab->rRead = 0; - tab->rWritten = 0; - tab->n_vRegs = 0; + tab->rRead = 0; + tab->rWritten = 0; + tab->n_vRegs = 0; + tab->isRegRegMove = False; } /* Add a register to a usage table. Combine incoming read uses with @@ -515,10 +526,6 @@ typedef allocation. */ const RRegUniverse* univ; - /* Return True iff the given insn is a reg-reg move, in which case also - return the src and dst regs. */ - Bool (*isMove)(const HInstr*, HReg*, HReg*); - /* Get info about register usage in this insn. */ void (*getRegUsage)(HRegUsage*, const HInstr*, Bool); diff --git a/VEX/priv/host_mips_defs.c b/VEX/priv/host_mips_defs.c index 9a6993eda0..88906b85f6 100644 --- a/VEX/priv/host_mips_defs.c +++ b/VEX/priv/host_mips_defs.c @@ -1578,6 +1578,15 @@ void getRegUsage_MIPSInstr(HRegUsage * u, const MIPSInstr * i, Bool mode64) addHRegUse(u, HRmRead, i->Min.Alu.srcL); addRegUsage_MIPSRH(u, i->Min.Alu.srcR); addHRegUse(u, HRmWrite, i->Min.Alu.dst); + + /* or Rd,Rs,Rs == mr Rd,Rs */ + if ((i->Min.Alu.op == Malu_OR) + && (i->Min.Alu.srcR->tag == Mrh_Reg) + && sameHReg(i->Min.Alu.srcR->Mrh.Reg.reg, i->Min.Alu.srcL)) { + u->isRegRegMove = True; + u->regMoveSrc = i->Min.Alu.srcL; + u->regMoveDst = i->Min.Alu.dst; + } return; case Min_Shft: addHRegUse(u, HRmRead, i->Min.Shft.srcL); @@ -1942,28 +1951,6 @@ void mapRegs_MIPSInstr(HRegRemap * m, MIPSInstr * i, Bool mode64) } -/* Figure out if i represents a reg-reg move, and if so assign the - source and destination to *src and *dst. If in doubt say No. Used - by the register allocator to do move coalescing. -*/ -Bool isMove_MIPSInstr(const MIPSInstr * i, HReg * src, HReg * dst) -{ - /* Moves between integer regs */ - if (i->tag == Min_Alu) { - /* or Rd,Rs,Rs == mr Rd,Rs */ - if (i->Min.Alu.op != Malu_OR) - return False; - if (i->Min.Alu.srcR->tag != Mrh_Reg) - return False; - if (!sameHReg(i->Min.Alu.srcR->Mrh.Reg.reg, i->Min.Alu.srcL)) - return False; - *src = i->Min.Alu.srcL; - *dst = i->Min.Alu.dst; - return True; - } - return False; -} - /* Generate mips spill/reload instructions under the direction of the register allocator. */ void genSpill_MIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2, HReg rreg, diff --git a/VEX/priv/host_mips_defs.h b/VEX/priv/host_mips_defs.h index 45fff16fd0..5bb5c1388f 100644 --- a/VEX/priv/host_mips_defs.h +++ b/VEX/priv/host_mips_defs.h @@ -686,7 +686,6 @@ extern void ppMIPSInstr(const MIPSInstr *, Bool mode64); of the underlying instruction set. */ extern void getRegUsage_MIPSInstr (HRegUsage *, const MIPSInstr *, Bool); extern void mapRegs_MIPSInstr (HRegRemap *, MIPSInstr *, Bool mode64); -extern Bool isMove_MIPSInstr (const MIPSInstr *, HReg *, HReg *); extern Int emit_MIPSInstr (/*MB_MOD*/Bool* is_profInc, UChar* buf, Int nbuf, const MIPSInstr* i, Bool mode64, diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c index 1ef9c5c341..b073c1d79f 100644 --- a/VEX/priv/host_ppc_defs.c +++ b/VEX/priv/host_ppc_defs.c @@ -2362,6 +2362,15 @@ void getRegUsage_PPCInstr ( HRegUsage* u, const PPCInstr* i, Bool mode64 ) addHRegUse(u, HRmRead, i->Pin.Alu.srcL); addRegUsage_PPCRH(u, i->Pin.Alu.srcR); addHRegUse(u, HRmWrite, i->Pin.Alu.dst); + + // or Rd,Rs,Rs == mr Rd,Rs + if ((i->Pin.Alu.op == Palu_OR) + && (i->Pin.Alu.srcR->tag == Prh_Reg) + && sameHReg(i->Pin.Alu.srcR->Prh.Reg.reg, i->Pin.Alu.srcL)) { + u->isRegRegMove = True; + u->regMoveSrc = i->Pin.Alu.srcL; + u->regMoveDst = i->Pin.Alu.dst; + } return; case Pin_Shft: addHRegUse(u, HRmRead, i->Pin.Shft.srcL); @@ -2489,6 +2498,12 @@ void getRegUsage_PPCInstr ( HRegUsage* u, const PPCInstr* i, Bool mode64 ) case Pin_FpUnary: addHRegUse(u, HRmWrite, i->Pin.FpUnary.dst); addHRegUse(u, HRmRead, i->Pin.FpUnary.src); + + if (i->Pin.FpUnary.op == Pfp_MOV) { + u->isRegRegMove = True; + u->regMoveSrc = i->Pin.FpUnary.src; + u->regMoveDst = i->Pin.FpUnary.dst; + } return; case Pin_FpBinary: addHRegUse(u, HRmWrite, i->Pin.FpBinary.dst); @@ -3119,37 +3134,6 @@ void mapRegs_PPCInstr ( HRegRemap* m, PPCInstr* i, Bool mode64 ) } } -/* Figure out if i represents a reg-reg move, and if so assign the - source and destination to *src and *dst. If in doubt say No. Used - by the register allocator to do move coalescing. -*/ -Bool isMove_PPCInstr ( const PPCInstr* i, HReg* src, HReg* dst ) -{ - /* Moves between integer regs */ - if (i->tag == Pin_Alu) { - // or Rd,Rs,Rs == mr Rd,Rs - if (i->Pin.Alu.op != Palu_OR) - return False; - if (i->Pin.Alu.srcR->tag != Prh_Reg) - return False; - if (! sameHReg(i->Pin.Alu.srcR->Prh.Reg.reg, i->Pin.Alu.srcL)) - return False; - *src = i->Pin.Alu.srcL; - *dst = i->Pin.Alu.dst; - return True; - } - /* Moves between FP regs */ - if (i->tag == Pin_FpUnary) { - if (i->Pin.FpUnary.op != Pfp_MOV) - return False; - *src = i->Pin.FpUnary.src; - *dst = i->Pin.FpUnary.dst; - return True; - } - return False; -} - - /* Generate ppc spill/reload instructions under the direction of the register allocator. Note it's critical these don't write the condition codes. */ diff --git a/VEX/priv/host_ppc_defs.h b/VEX/priv/host_ppc_defs.h index f8fcbf99be..7932cdf6c1 100644 --- a/VEX/priv/host_ppc_defs.h +++ b/VEX/priv/host_ppc_defs.h @@ -1201,7 +1201,6 @@ extern void ppPPCInstr(const PPCInstr*, Bool mode64); of the underlying instruction set. */ extern void getRegUsage_PPCInstr ( HRegUsage*, const PPCInstr*, Bool mode64 ); extern void mapRegs_PPCInstr ( HRegRemap*, PPCInstr* , Bool mode64); -extern Bool isMove_PPCInstr ( const PPCInstr*, HReg*, HReg* ); extern Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc, UChar* buf, Int nbuf, const PPCInstr* i, Bool mode64, diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c index 327674acab..f9a9557617 100644 --- a/VEX/priv/host_s390_defs.c +++ b/VEX/priv/host_s390_defs.c @@ -48,7 +48,6 @@ /*--- Forward declarations ---*/ /*------------------------------------------------------------*/ -static Bool s390_insn_is_reg_reg_move(const s390_insn *, HReg *src, HReg *dst); static void s390_insn_map_regs(HRegRemap *, s390_insn *); static void s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *); static UInt s390_tchain_load64_len(void); @@ -467,16 +466,6 @@ mapRegs_S390Instr(HRegRemap *m, s390_insn *insn, Bool mode64) } -/* Figure out if the given insn represents a reg-reg move, and if so - assign the source and destination to *src and *dst. If in doubt say No. - Used by the register allocator to do move coalescing. */ -Bool -isMove_S390Instr(const s390_insn *insn, HReg *src, HReg *dst) -{ - return s390_insn_is_reg_reg_move(insn, src, dst); -} - - /* Generate s390 spill/reload instructions under the direction of the register allocator. Note it's critical these don't write the condition codes. This is like an Ist_Put */ @@ -587,6 +576,12 @@ s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *insn) case S390_INSN_MOVE: addHRegUse(u, HRmRead, insn->variant.move.src); addHRegUse(u, HRmWrite, insn->variant.move.dst); + + if (hregClass(insn->variant.move.src) == hregClass(insn->variant.move.dst)) { + u->isRegRegMove = True; + u->regMoveSrc = insn->variant.move.src; + u->regMoveDst = insn->variant.move.dst; + } break; case S390_INSN_MEMCPY: @@ -1218,23 +1213,6 @@ s390_insn_map_regs(HRegRemap *m, s390_insn *insn) } -/* Return True, if INSN is a move between two registers of the same class. - In that case assign the source and destination registers to SRC and DST, - respectively. */ -static Bool -s390_insn_is_reg_reg_move(const s390_insn *insn, HReg *src, HReg *dst) -{ - if (insn->tag == S390_INSN_MOVE && - hregClass(insn->variant.move.src) == hregClass(insn->variant.move.dst)) { - *src = insn->variant.move.src; - *dst = insn->variant.move.dst; - return True; - } - - return False; -} - - /*------------------------------------------------------------*/ /*--- Functions to emit a sequence of bytes ---*/ /*------------------------------------------------------------*/ diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h index 41b6ecd672..d9df83c22b 100644 --- a/VEX/priv/host_s390_defs.h +++ b/VEX/priv/host_s390_defs.h @@ -742,7 +742,6 @@ UInt ppHRegS390(HReg); of the underlying instruction set. */ void getRegUsage_S390Instr( HRegUsage *, const s390_insn *, Bool ); void mapRegs_S390Instr ( HRegRemap *, s390_insn *, Bool ); -Bool isMove_S390Instr ( const s390_insn *, HReg *, HReg * ); Int emit_S390Instr ( Bool *, UChar *, Int, const s390_insn *, Bool, VexEndness, const void *, const void *, const void *, const void *); diff --git a/VEX/priv/host_x86_defs.c b/VEX/priv/host_x86_defs.c index 5f47bdb813..56f9d1d31b 100644 --- a/VEX/priv/host_x86_defs.c +++ b/VEX/priv/host_x86_defs.c @@ -1275,6 +1275,12 @@ void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64) addRegUsage_X86RMI(u, i->Xin.Alu32R.src); if (i->Xin.Alu32R.op == Xalu_MOV) { addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst); + + if (i->Xin.Alu32R.src->tag == Xrmi_Reg) { + u->isRegRegMove = True; + u->regMoveSrc = i->Xin.Alu32R.src->Xrmi.Reg.reg; + u->regMoveDst = i->Xin.Alu32R.dst; + } return; } if (i->Xin.Alu32R.op == Xalu_CMP) { @@ -1415,6 +1421,12 @@ void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64) case Xin_FpUnary: addHRegUse(u, HRmRead, i->Xin.FpUnary.src); addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst); + + if (i->Xin.FpUnary.op == Xfp_MOV) { + u->isRegRegMove = True; + u->regMoveSrc = i->Xin.FpUnary.src; + u->regMoveDst = i->Xin.FpUnary.dst; + } return; case Xin_FpBinary: addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL); @@ -1510,6 +1522,12 @@ void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64) addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV ? HRmWrite : HRmModify, i->Xin.SseReRg.dst); + + if (i->Xin.SseReRg.op == Xsse_MOV) { + u->isRegRegMove = True; + u->regMoveSrc = i->Xin.SseReRg.src; + u->regMoveDst = i->Xin.SseReRg.dst; + } } return; case Xin_SseCMov: @@ -1709,40 +1727,6 @@ void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 ) } } -/* Figure out if i represents a reg-reg move, and if so assign the - source and destination to *src and *dst. If in doubt say No. Used - by the register allocator to do move coalescing. -*/ -Bool isMove_X86Instr ( const X86Instr* i, HReg* src, HReg* dst ) -{ - /* Moves between integer regs */ - if (i->tag == Xin_Alu32R) { - if (i->Xin.Alu32R.op != Xalu_MOV) - return False; - if (i->Xin.Alu32R.src->tag != Xrmi_Reg) - return False; - *src = i->Xin.Alu32R.src->Xrmi.Reg.reg; - *dst = i->Xin.Alu32R.dst; - return True; - } - /* Moves between FP regs */ - if (i->tag == Xin_FpUnary) { - if (i->Xin.FpUnary.op != Xfp_MOV) - return False; - *src = i->Xin.FpUnary.src; - *dst = i->Xin.FpUnary.dst; - return True; - } - if (i->tag == Xin_SseReRg) { - if (i->Xin.SseReRg.op != Xsse_MOV) - return False; - *src = i->Xin.SseReRg.src; - *dst = i->Xin.SseReRg.dst; - return True; - } - return False; -} - extern HInstrIfThenElse* isIfThenElse_X86Instr(X86Instr* i) { if (UNLIKELY(i->tag == Xin_IfThenElse)) { diff --git a/VEX/priv/host_x86_defs.h b/VEX/priv/host_x86_defs.h index 1f18550886..f9b52ff949 100644 --- a/VEX/priv/host_x86_defs.h +++ b/VEX/priv/host_x86_defs.h @@ -742,7 +742,6 @@ extern void ppX86CondCode(X86CondCode); of the underlying instruction set. */ extern void getRegUsage_X86Instr ( HRegUsage*, const X86Instr*, Bool ); extern void mapRegs_X86Instr ( HRegRemap*, X86Instr*, Bool ); -extern Bool isMove_X86Instr ( const X86Instr*, HReg*, HReg* ); extern HInstrIfThenElse* isIfThenElse_X86Instr(X86Instr*); extern UInt emit_X86Instr ( /*MB_MOD*/Bool* is_profInc, UChar* buf, UInt nbuf, diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c index df568507ed..7d57443fd9 100644 --- a/VEX/priv/main_main.c +++ b/VEX/priv/main_main.c @@ -1264,7 +1264,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, /* This the bundle of functions we need to do the back-end stuff (insn selection, reg-alloc, assembly) whilst being insulated from the target instruction set. */ - Bool (*isMove) ( const HInstr*, HReg*, HReg* ); void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ); void (*mapRegs) ( HRegRemap*, HInstr*, Bool ); HInstrIfThenElse* (*isIfThenElse)( const HInstr* ); @@ -1298,7 +1297,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, HInstrSB* vcode; HInstrSB* rcode; - isMove = NULL; getRegUsage = NULL; mapRegs = NULL; isIfThenElse = NULL; @@ -1422,7 +1420,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, case VexArchX86: mode64 = False; rRegUniv = X86FN(getRRegUniverse_X86()); - isMove = CAST_TO_TYPEOF(isMove) X86FN(isMove_X86Instr); getRegUsage = CAST_TO_TYPEOF(getRegUsage) X86FN(getRegUsage_X86Instr); mapRegs = CAST_TO_TYPEOF(mapRegs) X86FN(mapRegs_X86Instr); @@ -1449,7 +1446,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, case VexArchAMD64: mode64 = True; rRegUniv = AMD64FN(getRRegUniverse_AMD64()); - isMove = CAST_TO_TYPEOF(isMove) AMD64FN(isMove_AMD64Instr); getRegUsage = CAST_TO_TYPEOF(getRegUsage) AMD64FN(getRegUsage_AMD64Instr); mapRegs = CAST_TO_TYPEOF(mapRegs) AMD64FN(mapRegs_AMD64Instr); @@ -1467,7 +1463,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, case VexArchPPC32: mode64 = False; rRegUniv = PPC32FN(getRRegUniverse_PPC(mode64)); - isMove = CAST_TO_TYPEOF(isMove) PPC32FN(isMove_PPCInstr); getRegUsage = CAST_TO_TYPEOF(getRegUsage) PPC32FN(getRegUsage_PPCInstr); mapRegs = CAST_TO_TYPEOF(mapRegs) PPC32FN(mapRegs_PPCInstr); @@ -1484,7 +1479,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, case VexArchPPC64: mode64 = True; rRegUniv = PPC64FN(getRRegUniverse_PPC(mode64)); - isMove = CAST_TO_TYPEOF(isMove) PPC64FN(isMove_PPCInstr); getRegUsage = CAST_TO_TYPEOF(getRegUsage) PPC64FN(getRegUsage_PPCInstr); mapRegs = CAST_TO_TYPEOF(mapRegs) PPC64FN(mapRegs_PPCInstr); @@ -1502,7 +1496,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, case VexArchS390X: mode64 = True; rRegUniv = S390FN(getRRegUniverse_S390()); - isMove = CAST_TO_TYPEOF(isMove) S390FN(isMove_S390Instr); getRegUsage = CAST_TO_TYPEOF(getRegUsage) S390FN(getRegUsage_S390Instr); mapRegs = CAST_TO_TYPEOF(mapRegs) S390FN(mapRegs_S390Instr); @@ -1520,7 +1513,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, case VexArchARM: mode64 = False; rRegUniv = ARMFN(getRRegUniverse_ARM()); - isMove = CAST_TO_TYPEOF(isMove) ARMFN(isMove_ARMInstr); getRegUsage = CAST_TO_TYPEOF(getRegUsage) ARMFN(getRegUsage_ARMInstr); mapRegs = CAST_TO_TYPEOF(mapRegs) ARMFN(mapRegs_ARMInstr); @@ -1537,7 +1529,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, case VexArchARM64: mode64 = True; rRegUniv = ARM64FN(getRRegUniverse_ARM64()); - isMove = CAST_TO_TYPEOF(isMove) ARM64FN(isMove_ARM64Instr); getRegUsage = CAST_TO_TYPEOF(getRegUsage) ARM64FN(getRegUsage_ARM64Instr); mapRegs = CAST_TO_TYPEOF(mapRegs) ARM64FN(mapRegs_ARM64Instr); @@ -1554,7 +1545,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, case VexArchMIPS32: mode64 = False; rRegUniv = MIPS32FN(getRRegUniverse_MIPS(mode64)); - isMove = CAST_TO_TYPEOF(isMove) MIPS32FN(isMove_MIPSInstr); getRegUsage = CAST_TO_TYPEOF(getRegUsage) MIPS32FN(getRegUsage_MIPSInstr); mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS32FN(mapRegs_MIPSInstr); @@ -1572,7 +1562,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, case VexArchMIPS64: mode64 = True; rRegUniv = MIPS64FN(getRRegUniverse_MIPS(mode64)); - isMove = CAST_TO_TYPEOF(isMove) MIPS64FN(isMove_MIPSInstr); getRegUsage = CAST_TO_TYPEOF(getRegUsage) MIPS64FN(getRegUsage_MIPSInstr); mapRegs = CAST_TO_TYPEOF(mapRegs) MIPS64FN(mapRegs_MIPSInstr); @@ -1651,8 +1640,8 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta, /* Register allocate. */ RegAllocControl con = { - .univ = rRegUniv, .isMove = isMove, .getRegUsage = getRegUsage, - .mapRegs = mapRegs, .isIfThenElse = isIfThenElse, .genSpill = genSpill, + .univ = rRegUniv, .getRegUsage = getRegUsage, .mapRegs = mapRegs, + .isIfThenElse = isIfThenElse, .genSpill = genSpill, .genReload = genReload, .genMove = genMove, .genHInstrITE = genHInstrITE, .directReload = directReload, .guest_sizeB = guest_sizeB, .ppInstr = ppInstr, .ppCondCode = ppCondCode, .ppReg = ppReg,