]> git.ipfire.org Git - thirdparty/valgrind.git/commitdiff
Refactor tracking of MOV coalescing.
authorIvo Raisr <ivosh@ivosh.net>
Fri, 22 Sep 2017 20:50:11 +0000 (22:50 +0200)
committerIvo Raisr <ivosh@ivosh.net>
Wed, 11 Oct 2017 18:56:48 +0000 (20:56 +0200)
Reg<->Reg MOV coalescing status is now a part of the HRegUsage.
This allows register allocation to query it two times without incurring
a performance penalty. This in turn allows to better keep track of
vreg<->vreg MOV coalescing so that all vregs in the coalesce chain
get the effective |dead_before| of the last vreg.

A small performance improvement has been observed because this allows
to coalesce even spilled vregs (previously only assigned ones).

19 files changed:
VEX/priv/host_amd64_defs.c
VEX/priv/host_amd64_defs.h
VEX/priv/host_arm64_defs.c
VEX/priv/host_arm64_defs.h
VEX/priv/host_arm_defs.c
VEX/priv/host_arm_defs.h
VEX/priv/host_generic_reg_alloc2.c
VEX/priv/host_generic_reg_alloc3.c
VEX/priv/host_generic_regs.c
VEX/priv/host_generic_regs.h
VEX/priv/host_mips_defs.c
VEX/priv/host_mips_defs.h
VEX/priv/host_ppc_defs.c
VEX/priv/host_ppc_defs.h
VEX/priv/host_s390_defs.c
VEX/priv/host_s390_defs.h
VEX/priv/host_x86_defs.c
VEX/priv/host_x86_defs.h
VEX/priv/main_main.c

index d9949d4fd7f7d697c04296b205ff5cd5594b4459..a554e28ed9c568f4c74c07c661b14a82f8aa4953 100644 (file)
@@ -1406,6 +1406,12 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
          addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src);
          if (i->Ain.Alu64R.op == Aalu_MOV) {
             addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst);
+
+            if (i->Ain.Alu64R.src->tag == Armi_Reg) {
+               u->isRegRegMove = True;
+               u->regMoveSrc   = i->Ain.Alu64R.src->Armi.Reg.reg;
+               u->regMoveDst   = i->Ain.Alu64R.dst;
+            }
             return;
          }
          if (i->Ain.Alu64R.op == Aalu_CMP) { 
@@ -1668,6 +1674,12 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
             addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV 
                              ? HRmWrite : HRmModify, 
                           i->Ain.SseReRg.dst);
+
+            if (i->Ain.SseReRg.op == Asse_MOV) {
+               u->isRegRegMove = True;
+               u->regMoveSrc   = i->Ain.SseReRg.src;
+               u->regMoveDst   = i->Ain.SseReRg.dst;
+            }
          }
          return;
       case Ain_SseCMov:
@@ -1694,6 +1706,12 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
       //uu       addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV 
       //uu                        ? HRmWrite : HRmModify, 
       //uu                     i->Ain.AvxReRg.dst);
+      //uu
+      //uu       if (i->Ain.AvxReRg.op == Asse_MOV) {
+      //uu          u->isRegRegMove = True;
+      //uu          u->regMoveSrc   = i->Ain.AvxReRg.src;
+      //uu          u->regMoveDst   = i->Ain.AvxReRg.dst;
+      //uu       }
       //uu    }
       //uu    return;
       case Ain_EvCheck:
@@ -1910,43 +1928,6 @@ void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
    }
 }
 
-/* Figure out if i represents a reg-reg move, and if so assign the
-   source and destination to *src and *dst.  If in doubt say No.  Used
-   by the register allocator to do move coalescing. 
-*/
-Bool isMove_AMD64Instr ( const AMD64Instr* i, HReg* src, HReg* dst )
-{
-   switch (i->tag) {
-      case Ain_Alu64R:
-         /* Moves between integer regs */
-         if (i->Ain.Alu64R.op != Aalu_MOV)
-            return False;
-         if (i->Ain.Alu64R.src->tag != Armi_Reg)
-            return False;
-         *src = i->Ain.Alu64R.src->Armi.Reg.reg;
-         *dst = i->Ain.Alu64R.dst;
-         return True;
-      case Ain_SseReRg:
-         /* Moves between SSE regs */
-         if (i->Ain.SseReRg.op != Asse_MOV)
-            return False;
-         *src = i->Ain.SseReRg.src;
-         *dst = i->Ain.SseReRg.dst;
-         return True;
-      //uu case Ain_AvxReRg:
-      //uu    /* Moves between AVX regs */
-      //uu    if (i->Ain.AvxReRg.op != Asse_MOV)
-      //uu       return False;
-      //uu    *src = i->Ain.AvxReRg.src;
-      //uu    *dst = i->Ain.AvxReRg.dst;
-      //uu    return True;
-      default:
-         return False;
-   }
-   /*NOTREACHED*/
-}
-
-
 /* Generate amd64 spill/reload instructions under the direction of the
    register allocator.  Note it's critical these don't write the
    condition codes. */
index 92730fa13adb07681c564b8db0f18bf2c4da9b9a..68e199ad342b922c49dc982e79fcc82e00df1ab6 100644 (file)
@@ -785,7 +785,6 @@ extern void ppAMD64Instr ( const AMD64Instr*, Bool );
    of the underlying instruction set. */
 extern void getRegUsage_AMD64Instr ( HRegUsage*, const AMD64Instr*, Bool );
 extern void mapRegs_AMD64Instr     ( HRegRemap*, AMD64Instr*, Bool );
-extern Bool isMove_AMD64Instr      ( const AMD64Instr*, HReg*, HReg* );
 extern Int          emit_AMD64Instr   ( /*MB_MOD*/Bool* is_profInc,
                                         UChar* buf, Int nbuf,
                                         const AMD64Instr* i, 
index 2506512adbc97f032e8160618d17fa8205b81040..4d088c77b4822575b3e9be1cd0d268ae9e91178b 100644 (file)
@@ -1958,6 +1958,9 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
       case ARM64in_MovI:
          addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst);
          addHRegUse(u, HRmRead,  i->ARM64in.MovI.src);
+         u->isRegRegMove = True;
+         u->regMoveSrc   = i->ARM64in.MovI.src;
+         u->regMoveDst   = i->ARM64in.MovI.dst;
          return;
       case ARM64in_Imm64:
          addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst);
@@ -2238,6 +2241,9 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
       case ARM64in_VMov:
          addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst);
          addHRegUse(u, HRmRead,  i->ARM64in.VMov.src);
+         u->isRegRegMove = True;
+         u->regMoveSrc   = i->ARM64in.VMov.src;
+         u->regMoveDst   = i->ARM64in.VMov.dst;
          return;
       case ARM64in_EvCheck:
          /* We expect both amodes only to mention x21, so this is in
@@ -2510,29 +2516,6 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
    }
 }
 
-/* Figure out if i represents a reg-reg move, and if so assign the
-   source and destination to *src and *dst.  If in doubt say No.  Used
-   by the register allocator to do move coalescing. 
-*/
-Bool isMove_ARM64Instr ( const ARM64Instr* i, HReg* src, HReg* dst )
-{
-   switch (i->tag) {
-      case ARM64in_MovI:
-         *src = i->ARM64in.MovI.src;
-         *dst = i->ARM64in.MovI.dst;
-         return True;
-      case ARM64in_VMov:
-         *src = i->ARM64in.VMov.src;
-         *dst = i->ARM64in.VMov.dst;
-         return True;
-      default:
-         break;
-   }
-
-   return False;
-}
-
-
 /* Generate arm spill/reload instructions under the direction of the
    register allocator.  Note it's critical these don't write the
    condition codes. */
index e7da4f90fb7ae6bf791a8ca8f09de3094ae13a15..277a55b12ca72306ca7f2973e76896783f5bfa61 100644 (file)
@@ -993,7 +993,6 @@ extern void ppARM64Instr ( const ARM64Instr* );
    of the underlying instruction set. */
 extern void getRegUsage_ARM64Instr ( HRegUsage*, const ARM64Instr*, Bool );
 extern void mapRegs_ARM64Instr     ( HRegRemap*, ARM64Instr*, Bool );
-extern Bool isMove_ARM64Instr      ( const ARM64Instr*, HReg*, HReg* );
 extern Int  emit_ARM64Instr        ( /*MB_MOD*/Bool* is_profInc,
                                      UChar* buf, Int nbuf, const ARM64Instr* i,
                                      Bool mode64,
index 9bf87cd5c30170ccb17e0a7a65debcd765d99c9e..3de6d5011644aa9e511fd0f5c7cf31fc2fc0d2ea 100644 (file)
@@ -2108,6 +2108,12 @@ void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 )
       case ARMin_Mov:
          addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
          addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
+
+         if (i->ARMin.Mov.src->tag == ARMri84_R) {
+            u->isRegRegMove = True;
+            u->regMoveSrc   = i->ARMin.Mov.src->ARMri84.R.reg;
+            u->regMoveDst   = i->ARMin.Mov.dst;
+         }
          return;
       case ARMin_Imm32:
          addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
@@ -2256,10 +2262,22 @@ void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 )
       case ARMin_VUnaryD:
          addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
          addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
+
+         if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
+            u->isRegRegMove = True;
+            u->regMoveSrc   = i->ARMin.VUnaryD.src;
+            u->regMoveDst   = i->ARMin.VUnaryD.dst;
+         }
          return;
       case ARMin_VUnaryS:
          addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
          addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
+
+         if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
+            u->isRegRegMove = True;
+            u->regMoveSrc   = i->ARMin.VUnaryS.src;
+            u->regMoveDst   = i->ARMin.VUnaryS.dst;
+         }
          return;
       case ARMin_VCmpD:
          addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
@@ -2350,6 +2368,12 @@ void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 )
       case ARMin_NUnary:
          addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
          addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
+
+         if (i->ARMin.NUnary.op == ARMneon_COPY) {
+            u->isRegRegMove = True;
+            u->regMoveSrc   = i->ARMin.NUnary.src;
+            u->regMoveDst   = i->ARMin.NUnary.dst;
+         }
          return;
       case ARMin_NUnaryS:
          addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
@@ -2620,50 +2644,6 @@ void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
    }
 }
 
-/* Figure out if i represents a reg-reg move, and if so assign the
-   source and destination to *src and *dst.  If in doubt say No.  Used
-   by the register allocator to do move coalescing. 
-*/
-Bool isMove_ARMInstr ( const ARMInstr* i, HReg* src, HReg* dst )
-{
-   /* Moves between integer regs */
-   switch (i->tag) {
-      case ARMin_Mov:
-         if (i->ARMin.Mov.src->tag == ARMri84_R) {
-            *src = i->ARMin.Mov.src->ARMri84.R.reg;
-            *dst = i->ARMin.Mov.dst;
-            return True;
-         }
-         break;
-      case ARMin_VUnaryD:
-         if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
-            *src = i->ARMin.VUnaryD.src;
-            *dst = i->ARMin.VUnaryD.dst;
-            return True;
-         }
-         break;
-      case ARMin_VUnaryS:
-         if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
-            *src = i->ARMin.VUnaryS.src;
-            *dst = i->ARMin.VUnaryS.dst;
-            return True;
-         }
-         break;
-      case ARMin_NUnary:
-         if (i->ARMin.NUnary.op == ARMneon_COPY) {
-            *src = i->ARMin.NUnary.src;
-            *dst = i->ARMin.NUnary.dst;
-            return True;
-         }
-         break;
-      default:
-         break;
-   }
-
-   return False;
-}
-
-
 /* Generate arm spill/reload instructions under the direction of the
    register allocator.  Note it's critical these don't write the
    condition codes. */
index 56c4ec5055495e0f6652f729661b7cdfca5eff30..b88c85a7c433e038fc161c4630634e3b1ff69f5c 100644 (file)
@@ -1056,7 +1056,6 @@ extern void ppARMInstr ( const ARMInstr* );
    of the underlying instruction set. */
 extern void getRegUsage_ARMInstr ( HRegUsage*, const ARMInstr*, Bool );
 extern void mapRegs_ARMInstr     ( HRegRemap*, ARMInstr*, Bool );
-extern Bool isMove_ARMInstr      ( const ARMInstr*, HReg*, HReg* );
 extern Int  emit_ARMInstr        ( /*MB_MOD*/Bool* is_profInc,
                                    UChar* buf, Int nbuf, const ARMInstr* i, 
                                    Bool mode64,
index eb4600e9ef30b0aedffc736b8ffd1983602a3218..166f52b2942840eb41e8525f4d30d15f94f83b81 100644 (file)
@@ -45,8 +45,6 @@
 
 /* TODO 27 Oct 04:
 
-   Better consistency checking from what isMove tells us.
-
    We can possibly do V-V coalescing even when the src is spilled,
    providing we can arrange for the dst to have the same spill slot.
 
@@ -515,6 +513,10 @@ HInstrArray* doRegisterAllocation_v2 (
    for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
 
       con->getRegUsage(&reg_usage_arr[ii], instrs_in->arr[ii], con->mode64);
+      reg_usage_arr[ii].isVregVregMove
+         = reg_usage_arr[ii].isRegRegMove
+           && hregIsVirtual(reg_usage_arr[ii].regMoveSrc)
+           && hregIsVirtual(reg_usage_arr[ii].regMoveDst);
 
       if (0) {
          vex_printf("\n%d  stage1: ", ii);
@@ -1025,12 +1027,10 @@ HInstrArray* doRegisterAllocation_v2 (
       /* If doing a reg-reg move between two vregs, and the src's live
          range ends here and the dst's live range starts here, bind
          the dst to the src's rreg, and that's all. */
-      HReg vregS = INVALID_HREG;
-      HReg vregD = INVALID_HREG;
-      if ( con->isMove(instrs_in->arr[ii], &vregS, &vregD) ) {
-         if (!hregIsVirtual(vregS)) goto cannot_coalesce;
-         if (!hregIsVirtual(vregD)) goto cannot_coalesce;
-         /* Check that *isMove is not telling us a bunch of lies ... */
+      if (reg_usage_arr[ii].isVregVregMove) {
+         HReg vregS = reg_usage_arr[ii].regMoveSrc;
+         HReg vregD = reg_usage_arr[ii].regMoveDst;
+         /* Check that |isVregVregMove| is not telling us a bunch of lies ... */
          vassert(hregClass(vregS) == hregClass(vregD));
          Int k = hregIndex(vregS);
          Int m = hregIndex(vregD);
index 929dee5333fa588120e4e529f80a3f39e3b34ad1..9ab95497a69ad89965323552c1453c6a15cb8751 100644 (file)
@@ -72,6 +72,18 @@ typedef
       /* The "home" spill slot. The offset is relative to the beginning of
          the guest state. */
       UShort spill_offset;
+
+      /* This vreg (vregS) is coalesced to another vreg
+         if |coalescedTo| != INVALID_HREG.
+         Coalescing means that there is a MOV instruction which occurs in the
+         instruction stream right at vregS' dead_before
+         and vregD's live_after. */
+      HReg coalescedTo;    /* Which vreg it is coalesced to. */
+      HReg coalescedFirst; /* First vreg in the coalescing chain. */
+
+      /* If this vregS is coalesced to another vregD, what is the combined
+         dead_before for vregS+vregD. Used to effectively allocate registers. */
+      Short effective_dead_before;
    }
    VRegState;
 
@@ -190,13 +202,20 @@ static inline void print_state(
    const RRegLRState* rreg_lr_state,
    UShort current_ii)
 {
+#  define RIGHT_JUSTIFY(_total, _written)                   \
+      do {                                                  \
+         for (Int w = (_total) - (_written); w > 0; w--) {  \
+            vex_printf(" ");                                \
+         }                                                  \
+      } while (0)
+
    for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
       const VRegState* vreg = &vreg_state[v_idx];
 
       if (vreg->live_after == INVALID_INSTRNO) {
          continue; /* This is a dead vreg. Never comes into live. */
       }
-      vex_printf("vreg_state[%3u] \t", v_idx);
+      vex_printf("vreg_state[%3u]    ", v_idx);
 
       UInt written;
       switch (vreg->disp) {
@@ -213,15 +232,26 @@ static inline void print_state(
       default:
          vassert(0);
       }
+      RIGHT_JUSTIFY(25, written);
 
-      for (Int w = 30 - written; w > 0; w--) {
-         vex_printf(" ");
-      }
+      written = vex_printf("lr: [%d, %d) ",
+                           vreg->live_after, vreg->dead_before);
+      RIGHT_JUSTIFY(15, written);
+
+      written = vex_printf("effective lr: [%d, %d)",
+                           vreg->live_after, vreg->effective_dead_before);
+      RIGHT_JUSTIFY(25, written);
 
       if (vreg->live_after > (Short) current_ii) {
          vex_printf("[not live yet]\n");
       } else if ((Short) current_ii >= vreg->dead_before) {
-         vex_printf("[now dead]\n");
+         if (hregIsInvalid(vreg->coalescedTo)) {
+            vex_printf("[now dead]\n");
+         } else {
+            vex_printf("[now dead, coalesced to ");
+            con->ppReg(vreg->coalescedTo);
+            vex_printf("]\n");
+         }
       } else {
          vex_printf("[live]\n");
       }
@@ -232,9 +262,7 @@ static inline void print_state(
       const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx];
       vex_printf("rreg_state[%2u] = ", r_idx);
       UInt written = con->ppReg(con->univ->regs[r_idx]);
-      for (Int w = 10 - written; w > 0; w--) {
-         vex_printf(" ");
-      }
+      RIGHT_JUSTIFY(10, written);
 
       switch (rreg->disp) {
       case Free:
@@ -255,6 +283,8 @@ static inline void print_state(
          break;
       }
    }
+
+#  undef RIGHT_JUSTIFY
 }
 
 static inline void emit_instr(HInstr* instr, HInstrArray* instrs_out,
@@ -383,8 +413,8 @@ static inline HReg find_vreg_to_spill(
    a callee-save register because it won't be used for parameter passing
    around helper function calls. */
 static Bool find_free_rreg(
-   VRegState* vreg_state, UInt n_vregs,
-   RRegState* rreg_state, UInt n_rregs,
+   const VRegState* vreg_state, UInt n_vregs,
+   const RRegState* rreg_state, UInt n_rregs,
    const RRegLRState* rreg_lr_state,
    UInt current_ii, HRegClass target_hregclass,
    Bool reserve_phase, const RegAllocControl* con, UInt* r_idx_found)
@@ -476,6 +506,10 @@ HInstrArray* doRegisterAllocation_v3(
    HRegUsage* reg_usage
       = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used);
 
+   /* Mark vreg indexes where coalesce chains start at. */
+   UInt* coalesce_heads = LibVEX_Alloc_inline(n_vregs * sizeof(UInt));
+   UInt nr_coalesce_heads = 0;
+
    /* The live range numbers are signed shorts, and so limiting the
       number of instructions to 15000 comfortably guards against them
       overflowing 32k. */
@@ -512,9 +546,9 @@ HInstrArray* doRegisterAllocation_v3(
    instruction and makes free the corresponding rreg. */
 #  define FIND_OR_MAKE_FREE_RREG(_ii, _v_idx, _reg_class, _reserve_phase)      \
    ({                                                                          \
-      UInt _r_free_idx = -1;                                                   \
+      UInt _r_free_idx;                                                        \
       Bool free_rreg_found = find_free_rreg(                                   \
-                vreg_state, n_vregs,  rreg_state, n_rregs, rreg_lr_state,      \
+                vreg_state, n_vregs, rreg_state, n_rregs, rreg_lr_state,       \
                 (_ii), (_reg_class), (_reserve_phase), con, &_r_free_idx);     \
       if (!free_rreg_found) {                                                  \
          HReg vreg_to_spill = find_vreg_to_spill(                              \
@@ -536,12 +570,15 @@ HInstrArray* doRegisterAllocation_v3(
 
    /* --- Stage 0. Initialize the state. --- */
    for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
-      vreg_state[v_idx].live_after   = INVALID_INSTRNO;
-      vreg_state[v_idx].dead_before  = INVALID_INSTRNO;
-      vreg_state[v_idx].reg_class    = HRcINVALID;
-      vreg_state[v_idx].disp         = Unallocated;
-      vreg_state[v_idx].rreg         = INVALID_HREG;
-      vreg_state[v_idx].spill_offset = 0;
+      vreg_state[v_idx].live_after            = INVALID_INSTRNO;
+      vreg_state[v_idx].dead_before           = INVALID_INSTRNO;
+      vreg_state[v_idx].reg_class             = HRcINVALID;
+      vreg_state[v_idx].disp                  = Unallocated;
+      vreg_state[v_idx].rreg                  = INVALID_HREG;
+      vreg_state[v_idx].spill_offset          = 0;
+      vreg_state[v_idx].coalescedTo           = INVALID_HREG;
+      vreg_state[v_idx].coalescedFirst        = INVALID_HREG;
+      vreg_state[v_idx].effective_dead_before = INVALID_INSTRNO;
    }
 
    for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
@@ -565,6 +602,10 @@ HInstrArray* doRegisterAllocation_v3(
       const HInstr* instr = instrs_in->arr[ii];
 
       con->getRegUsage(&reg_usage[ii], instr, con->mode64);
+      reg_usage[ii].isVregVregMove
+         = reg_usage[ii].isRegRegMove
+           && hregIsVirtual(reg_usage[ii].regMoveSrc)
+           && hregIsVirtual(reg_usage[ii].regMoveDst);
 
       if (0) {
          vex_printf("\n%u  stage 1: ", ii);
@@ -602,23 +643,24 @@ HInstrArray* doRegisterAllocation_v3(
             if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
                OFFENDING_VREG(v_idx, instr, "Read");
             }
-            vreg_state[v_idx].dead_before = toShort(ii + 1);
             break;
          case HRmWrite:
             if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
                vreg_state[v_idx].live_after = toShort(ii);
             }
-            vreg_state[v_idx].dead_before = toShort(ii + 1);
             break;
          case HRmModify:
             if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
                OFFENDING_VREG(v_idx, instr, "Modify");
             }
-            vreg_state[v_idx].dead_before = toShort(ii + 1);
             break;
          default:
             vassert(0);
          }
+
+         vreg_state[v_idx].dead_before = toShort(ii + 1);
+         vreg_state[v_idx].effective_dead_before
+            = vreg_state[v_idx].dead_before;
       }
 
       /* Process real registers mentioned in the instruction. */
@@ -703,7 +745,59 @@ HInstrArray* doRegisterAllocation_v3(
       }
    }
 
-   /* --- Stage 2. Allocate spill slots. --- */
+
+   /* --- Stage 2. MOV coalescing (preparation). --- */
+   /* Optimise register coalescing:
+         MOV  v <-> v   coalescing (done here).
+         MOV  v <-> r   coalescing (TODO: not yet, not here). */
+   /* If doing a reg-reg move between two vregs, and the src's live range ends
+     here and the dst's live range starts here, coalesce the src vreg
+     to the dst vreg. */
+   Bool coalesce_happened = False;
+   for (UShort ii = 0; ii < instrs_in->arr_used; ii++) {
+      if (reg_usage[ii].isVregVregMove) {
+         HReg vregS = reg_usage[ii].regMoveSrc;
+         HReg vregD = reg_usage[ii].regMoveDst;
+
+         /* Check that |isVregVregMove| is not telling us a bunch of lies ... */
+         vassert(hregClass(vregS) == hregClass(vregD));
+         UInt vs_idx = hregIndex(vregS);
+         UInt vd_idx = hregIndex(vregD);
+         vassert(IS_VALID_VREGNO(vs_idx));
+         vassert(IS_VALID_VREGNO(vd_idx));
+         vassert(! sameHReg(vregS, vregD));
+         VRegState* vs_st = &vreg_state[vs_idx];
+         VRegState* vd_st = &vreg_state[vd_idx];
+
+         if ((vs_st->dead_before == ii + 1) && (vd_st->live_after == ii)) {
+            /* Live ranges are adjacent. */
+
+            vs_st->coalescedTo = vregD;
+            if (hregIsInvalid(vs_st->coalescedFirst)) {
+               vd_st->coalescedFirst = vregS;
+               coalesce_heads[nr_coalesce_heads] = vs_idx;
+               nr_coalesce_heads += 1;
+            } else {
+               vd_st->coalescedFirst = vs_st->coalescedFirst;
+            }
+
+            vreg_state[hregIndex(vd_st->coalescedFirst)].effective_dead_before
+               = vd_st->dead_before;
+
+            if (DEBUG_REGALLOC) {
+               vex_printf("vreg coalescing: ");
+               con->ppReg(vregS);
+               vex_printf(" -> ");
+               con->ppReg(vregD);
+               vex_printf("\n");
+            }
+
+            coalesce_happened = True;
+         }
+      }
+   }
+
+   /* --- Stage 3. Allocate spill slots. --- */
 
    /* Each spill slot is 8 bytes long. For vregs which take more than 64 bits
       to spill (for example classes Flt64 and Vec128), we have to allocate two
@@ -742,6 +836,11 @@ HInstrArray* doRegisterAllocation_v3(
          vassert(vreg_state[v_idx].reg_class == HRcINVALID);
          continue;
       }
+      if (! hregIsInvalid(vreg_state[v_idx].coalescedFirst)) {
+         /* Coalesced vregs should share the same spill slot with the first vreg
+            in the coalescing chain. But we don't have that information, yet. */
+         continue;
+      }
 
       /* The spill slots are 64 bits in size.  As per the comment on definition
          of HRegClass in host_generic_regs.h, that means, to spill a vreg of
@@ -763,8 +862,10 @@ HInstrArray* doRegisterAllocation_v3(
             if (ss_no >= N_SPILL64S - 1) {
                vpanic("N_SPILL64S is too low in VEX. Increase and recompile.");
             }
-            ss_busy_until_before[ss_no + 0] = vreg_state[v_idx].dead_before;
-            ss_busy_until_before[ss_no + 1] = vreg_state[v_idx].dead_before;
+            ss_busy_until_before[ss_no + 0]
+               = vreg_state[v_idx].effective_dead_before;
+            ss_busy_until_before[ss_no + 1]
+               = vreg_state[v_idx].effective_dead_before;
             break;
          default:
             /* The ordinary case -- just find a single lowest-numbered spill
@@ -777,7 +878,8 @@ HInstrArray* doRegisterAllocation_v3(
             if (ss_no == N_SPILL64S) {
                vpanic("N_SPILL64S is too low in VEX. Increase and recompile.");
             }
-            ss_busy_until_before[ss_no] = vreg_state[v_idx].dead_before;
+            ss_busy_until_before[ss_no]
+               = vreg_state[v_idx].effective_dead_before;
             break;
       }
 
@@ -798,15 +900,38 @@ HInstrArray* doRegisterAllocation_v3(
       }
    }
 
+   /* Fill in the spill offsets and effective_dead_before for coalesced vregs.*/
+   for (UInt i = 0; i < nr_coalesce_heads; i++) {
+      UInt vs_idx = coalesce_heads[i];
+      Short effective_dead_before = vreg_state[vs_idx].effective_dead_before;
+      UShort spill_offset         = vreg_state[vs_idx].spill_offset;
+      HReg vregD = vreg_state[vs_idx].coalescedTo;
+      while (! hregIsInvalid(vregD)) {
+         UInt vd_idx = hregIndex(vregD);
+         vreg_state[vd_idx].effective_dead_before = effective_dead_before;
+         vreg_state[vd_idx].spill_offset          = spill_offset;
+         vregD = vreg_state[vd_idx].coalescedTo;
+      }
+   }
+
+   if (DEBUG_REGALLOC && coalesce_happened) {
+      UInt ii = 0;
+      vex_printf("After vreg<->vreg MOV coalescing:\n");
+      PRINT_STATE;
+   }
+
    if (0) {
       vex_printf("\n\n");
-      for (UInt v_idx = 0; v_idx < n_vregs; v_idx++)
-         vex_printf("vreg %3u    --> spill offset %u\n",
-                    v_idx, vreg_state[v_idx].spill_offset);
+      for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
+         if (vreg_state[v_idx].live_after != INVALID_INSTRNO) {
+            vex_printf("vreg %3u    --> spill offset %u\n",
+                       v_idx, vreg_state[v_idx].spill_offset);
+         }
+      }
    }
 
 
-   /* --- State 3. Process instructions. --- */
+   /* --- State 4. Process instructions. --- */
    for (UShort ii = 0; ii < instrs_in->arr_used; ii++) {
       HInstr* instr = instrs_in->arr[ii];
 
@@ -873,65 +998,82 @@ HInstrArray* doRegisterAllocation_v3(
                vassert((Short) ii < rreg_lrs->lr_current->dead_before);
             }
          }
+
+         /* Sanity check: if vregS has been marked as coalesced to vregD,
+            then the effective live range of vregS must also cover live range
+            of vregD. */
+         /* The following sanity check is quite expensive. Some basic blocks
+            contain very lengthy coalescing chains... */
+         if (SANITY_CHECKS_EVERY_INSTR) {
+            for (UInt vs_idx = 0; vs_idx < n_vregs; vs_idx++) {
+               const VRegState* vS_st = &vreg_state[vs_idx];
+               HReg vregD = vS_st->coalescedTo;
+               while (! hregIsInvalid(vregD)) {
+                  const VRegState* vD_st = &vreg_state[hregIndex(vregD)];
+                  vassert(vS_st->live_after <= vD_st->live_after);
+                  vassert(vS_st->effective_dead_before >= vD_st->dead_before);
+                  vregD = vD_st->coalescedTo;
+               }
+            }
+         }
       }
 
 
-      /* --- MOV coalescing --- */
+      /* --- MOV coalescing (finishing) --- */
       /* Optimise register coalescing:
-            MOV  v <-> v   coalescing (done here).
+            MOV  v <-> v   coalescing (finished here).
             MOV  v <-> r   coalescing (TODO: not yet). */
-      /* If doing a reg-reg move between two vregs, and the src's live
-         range ends here and the dst's live range starts here, bind the dst
-         to the src's rreg, and that's all. */
-      HReg vregS = INVALID_HREG;
-      HReg vregD = INVALID_HREG;
-      if (con->isMove(instr, &vregS, &vregD)) {
-         if (hregIsVirtual(vregS) && hregIsVirtual(vregD)) {
-            /* Check that |isMove| is not telling us a bunch of lies ... */
-            vassert(hregClass(vregS) == hregClass(vregD));
-            UInt vs_idx = hregIndex(vregS);
-            UInt vd_idx = hregIndex(vregD);
-            vassert(IS_VALID_VREGNO(vs_idx));
-            vassert(IS_VALID_VREGNO(vd_idx));
-
-            if ((vreg_state[vs_idx].dead_before == ii + 1)
-                && (vreg_state[vd_idx].live_after == ii)
-                && (vreg_state[vs_idx].disp == Assigned)) {
-
-               /* Live ranges are adjacent and source vreg is bound.
-                  Finally we can do the coalescing.  */
-               HReg rreg = vreg_state[vs_idx].rreg;
-               vreg_state[vd_idx].disp = Assigned;
+      if (reg_usage[ii].isVregVregMove) {
+         HReg vregS = reg_usage[ii].regMoveSrc;
+         HReg vregD = reg_usage[ii].regMoveDst;
+         UInt vs_idx = hregIndex(vregS);
+         UInt vd_idx = hregIndex(vregD);
+
+         if (sameHReg(vreg_state[vs_idx].coalescedTo, vregD)) {
+            /* Finally do the coalescing. */
+
+            HReg rreg = vreg_state[vs_idx].rreg;
+            switch (vreg_state[vs_idx].disp) {
+            case Assigned:
                vreg_state[vd_idx].rreg = rreg;
-               FREE_VREG(&vreg_state[vs_idx]);
-
                UInt r_idx = hregIndex(rreg);
                vassert(rreg_state[r_idx].disp == Bound);
-               rreg_state[r_idx].vreg          = vregD;
-               rreg_state[r_idx].eq_spill_slot = False;
+               rreg_state[r_idx].vreg = vregD;
+               break;
+            case Spilled:
+               vassert(hregIsInvalid(vreg_state[vs_idx].rreg));
+               break;
+            default:
+               vassert(0);
+            }
 
-               if (DEBUG_REGALLOC) {
-                  vex_printf("coalesced: ");
-                  con->ppReg(vregS);
-                  vex_printf(" -> ");
-                  con->ppReg(vregD);
-                  vex_printf("\n\n");
-               }
+            vreg_state[vd_idx].disp = vreg_state[vs_idx].disp;
+            FREE_VREG(&vreg_state[vs_idx]);
+
+            if (DEBUG_REGALLOC) {
+               vex_printf("coalesced: ");
+               con->ppReg(vregS);
+               vex_printf(" -> ");
+               con->ppReg(vregD);
+               vex_printf("\n\n");
+            }
 
-               /* In rare cases it can happen that vregD's live range ends
-                  here. Check and eventually free the vreg and rreg.
-                  This effectively means that either the translated program
-                  contained dead code (but VEX iropt passes are pretty good
-                  at eliminating it) or the VEX backend generated dead code. */
-               if (vreg_state[vd_idx].dead_before <= (Short) ii + 1) {
-                  FREE_VREG(&vreg_state[vd_idx]);
+            /* In rare cases it can happen that vregD's live range ends here.
+               Check and eventually free the vreg and rreg.
+               This effectively means that either the translated program
+               contained dead code (but VEX iropt passes are pretty good
+               at eliminating it) or the VEX backend generated dead code. */
+            if (vreg_state[vd_idx].dead_before <= (Short) ii + 1) {
+               if (vreg_state[vd_idx].disp == Assigned) {
+                  UInt r_idx = hregIndex(rreg);
                   FREE_RREG(&rreg_state[r_idx]);
                }
-
-               /* Move on to the next instruction. We skip the post-instruction
-                  stuff because all required house-keeping was done here. */
-               continue;
+               FREE_VREG(&vreg_state[vd_idx]);
             }
+
+            /* Move on to the next instruction. We skip the post-instruction
+               stuff because all required house-keeping was done here. */
+            continue;
          }
       }
 
index 67d2ea2eef9e29ceb97dca71be32bec40b546138..cd5d222f5e72f13e5f3c04eeed99381187a4e433 100644 (file)
@@ -184,6 +184,9 @@ void ppHRegUsage ( const RRegUniverse* univ, HRegUsage* tab )
       ppHReg(tab->vRegs[i]);
       vex_printf("\n");
    }
+   if (tab->isRegRegMove) {
+      vex_printf("   (is a reg-reg move)\n");
+   }
    vex_printf("}\n");
 }
 
index 3db9ea0813262f5da9c12fc71608c5b7ce6c220e..8f6b2d6c4724c0fc5bf19670ab2f32e44782b42a 100644 (file)
@@ -300,6 +300,16 @@ typedef
       HReg     vRegs[N_HREGUSAGE_VREGS];
       HRegMode vMode[N_HREGUSAGE_VREGS];
       UInt     n_vRegs;
+
+      /* Hint to the register allocator: this instruction is actually a move
+         between two registers: regMoveSrc -> regMoveDst. */
+      Bool     isRegRegMove;
+      HReg     regMoveSrc;
+      HReg     regMoveDst;
+
+      /* Used internally by the register allocator. The reg-reg move is
+         actually a vreg-vreg move. */
+      Bool     isVregVregMove;
    }
    HRegUsage;
 
@@ -307,9 +317,10 @@ extern void ppHRegUsage ( const RRegUniverse*, HRegUsage* );
 
 static inline void initHRegUsage ( HRegUsage* tab )
 {
-   tab->rRead    = 0;
-   tab->rWritten = 0;
-   tab->n_vRegs  = 0;
+   tab->rRead        = 0;
+   tab->rWritten     = 0;
+   tab->n_vRegs      = 0;
+   tab->isRegRegMove = False;
 }
 
 /* Add a register to a usage table.  Combine incoming read uses with
@@ -471,10 +482,6 @@ typedef
          allocation. */
       const RRegUniverse* univ;
 
-      /* Return True iff the given insn is a reg-reg move, in which case also
-         return the src and dst regs. */
-      Bool (*isMove)(const HInstr*, HReg*, HReg*);
-
       /* Get info about register usage in this insn. */
       void (*getRegUsage)(HRegUsage*, const HInstr*, Bool);
 
index 66c226dbee3c982daf4b2e56b3b48904192ffa1c..35a293b7227d6e04f65ee08943e7b23726b71a63 100644 (file)
@@ -1606,6 +1606,15 @@ void getRegUsage_MIPSInstr(HRegUsage * u, const MIPSInstr * i, Bool mode64)
          addHRegUse(u, HRmRead, i->Min.Alu.srcL);
          addRegUsage_MIPSRH(u, i->Min.Alu.srcR);
          addHRegUse(u, HRmWrite, i->Min.Alu.dst);
+
+         /* or Rd,Rs,Rs == mr Rd,Rs */
+         if ((i->Min.Alu.op == Malu_OR)
+             && (i->Min.Alu.srcR->tag == Mrh_Reg)
+             && sameHReg(i->Min.Alu.srcR->Mrh.Reg.reg, i->Min.Alu.srcL)) {
+            u->isRegRegMove = True;
+            u->regMoveSrc   = i->Min.Alu.srcL;
+            u->regMoveDst   = i->Min.Alu.dst;
+         }
          return;
       case Min_Shft:
          addHRegUse(u, HRmRead, i->Min.Shft.srcL);
@@ -1990,28 +1999,6 @@ void mapRegs_MIPSInstr(HRegRemap * m, MIPSInstr * i, Bool mode64)
 
 }
 
-/* Figure out if i represents a reg-reg move, and if so assign the
-   source and destination to *src and *dst.  If in doubt say No.  Used
-   by the register allocator to do move coalescing.
-*/
-Bool isMove_MIPSInstr(const MIPSInstr * i, HReg * src, HReg * dst)
-{
-   /* Moves between integer regs */
-   if (i->tag == Min_Alu) {
-      /* or Rd,Rs,Rs == mr Rd,Rs */
-      if (i->Min.Alu.op != Malu_OR)
-         return False;
-      if (i->Min.Alu.srcR->tag != Mrh_Reg)
-         return False;
-      if (!sameHReg(i->Min.Alu.srcR->Mrh.Reg.reg, i->Min.Alu.srcL))
-         return False;
-      *src = i->Min.Alu.srcL;
-      *dst = i->Min.Alu.dst;
-      return True;
-   }
-   return False;
-}
-
 /* Generate mips spill/reload instructions under the direction of the
    register allocator. */
 void genSpill_MIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2, HReg rreg,
index be1e3a80c742402a89a699402465e70916ca338f..fb681ac4de2c1cde64f0c5e403c19475a3f9dda4 100644 (file)
@@ -701,7 +701,6 @@ extern void ppMIPSInstr(const MIPSInstr *, Bool mode64);
    of the underlying instruction set. */
 extern void getRegUsage_MIPSInstr (HRegUsage *, const MIPSInstr *, Bool);
 extern void mapRegs_MIPSInstr     (HRegRemap *, MIPSInstr *, Bool mode64);
-extern Bool isMove_MIPSInstr      (const MIPSInstr *, HReg *, HReg *);
 extern Int        emit_MIPSInstr (/*MB_MOD*/Bool* is_profInc,
                                   UChar* buf, Int nbuf, const MIPSInstr* i,
                                   Bool mode64,
index 1ef9c5c3412dcd17b9dcf4892329ee5730f571ea..b073c1d79fc0bf12a1d186cc911faf41727ca3b8 100644 (file)
@@ -2362,6 +2362,15 @@ void getRegUsage_PPCInstr ( HRegUsage* u, const PPCInstr* i, Bool mode64 )
       addHRegUse(u, HRmRead,  i->Pin.Alu.srcL);
       addRegUsage_PPCRH(u,    i->Pin.Alu.srcR);
       addHRegUse(u, HRmWrite, i->Pin.Alu.dst);
+
+      // or Rd,Rs,Rs == mr Rd,Rs
+      if ((i->Pin.Alu.op == Palu_OR)
+          && (i->Pin.Alu.srcR->tag == Prh_Reg)
+          && sameHReg(i->Pin.Alu.srcR->Prh.Reg.reg, i->Pin.Alu.srcL)) {
+         u->isRegRegMove = True;
+         u->regMoveSrc   = i->Pin.Alu.srcL;
+         u->regMoveDst   = i->Pin.Alu.dst;
+      }
       return;
    case Pin_Shft:
       addHRegUse(u, HRmRead,  i->Pin.Shft.srcL);
@@ -2489,6 +2498,12 @@ void getRegUsage_PPCInstr ( HRegUsage* u, const PPCInstr* i, Bool mode64 )
    case Pin_FpUnary:
       addHRegUse(u, HRmWrite, i->Pin.FpUnary.dst);
       addHRegUse(u, HRmRead,  i->Pin.FpUnary.src);
+
+      if (i->Pin.FpUnary.op == Pfp_MOV) {
+         u->isRegRegMove = True;
+         u->regMoveSrc   = i->Pin.FpUnary.src;
+         u->regMoveDst   = i->Pin.FpUnary.dst;
+      }
       return;
    case Pin_FpBinary:
       addHRegUse(u, HRmWrite, i->Pin.FpBinary.dst);
@@ -3119,37 +3134,6 @@ void mapRegs_PPCInstr ( HRegRemap* m, PPCInstr* i, Bool mode64 )
    }
 }
 
-/* Figure out if i represents a reg-reg move, and if so assign the
-   source and destination to *src and *dst.  If in doubt say No.  Used
-   by the register allocator to do move coalescing. 
-*/
-Bool isMove_PPCInstr ( const PPCInstr* i, HReg* src, HReg* dst )
-{
-   /* Moves between integer regs */
-   if (i->tag == Pin_Alu) {
-      // or Rd,Rs,Rs == mr Rd,Rs
-      if (i->Pin.Alu.op != Palu_OR)
-         return False;
-      if (i->Pin.Alu.srcR->tag != Prh_Reg)
-         return False;
-      if (! sameHReg(i->Pin.Alu.srcR->Prh.Reg.reg, i->Pin.Alu.srcL))
-         return False;
-      *src = i->Pin.Alu.srcL;
-      *dst = i->Pin.Alu.dst;
-      return True;
-   }
-   /* Moves between FP regs */
-   if (i->tag == Pin_FpUnary) {
-      if (i->Pin.FpUnary.op != Pfp_MOV)
-         return False;
-      *src = i->Pin.FpUnary.src;
-      *dst = i->Pin.FpUnary.dst;
-      return True;
-   }
-   return False;
-}
-
-
 /* Generate ppc spill/reload instructions under the direction of the
    register allocator.  Note it's critical these don't write the
    condition codes. */
index 27b3b38df2efe5fb357ca687ee4a572e42f32e7e..17baff59045176d45fe354b140385957b3d7edd7 100644 (file)
@@ -1201,7 +1201,6 @@ extern void ppPPCInstr(const PPCInstr*, Bool mode64);
    of the underlying instruction set. */
 extern void getRegUsage_PPCInstr ( HRegUsage*, const PPCInstr*, Bool mode64 );
 extern void mapRegs_PPCInstr     ( HRegRemap*, PPCInstr* , Bool mode64);
-extern Bool isMove_PPCInstr      ( const PPCInstr*, HReg*, HReg* );
 extern Int          emit_PPCInstr   ( /*MB_MOD*/Bool* is_profInc,
                                       UChar* buf, Int nbuf, const PPCInstr* i, 
                                       Bool mode64,
index 327674acaba7fe5f52a609891b085289a6f9b35b..f9a95576170ef44edeebb2ee21b1e034654ada98 100644 (file)
@@ -48,7 +48,6 @@
 /*--- Forward declarations                                 ---*/
 /*------------------------------------------------------------*/
 
-static Bool s390_insn_is_reg_reg_move(const s390_insn *, HReg *src, HReg *dst);
 static void s390_insn_map_regs(HRegRemap *, s390_insn *);
 static void s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *);
 static UInt s390_tchain_load64_len(void);
@@ -467,16 +466,6 @@ mapRegs_S390Instr(HRegRemap *m, s390_insn *insn, Bool mode64)
 }
 
 
-/* Figure out if the given insn represents a reg-reg move, and if so
-   assign the source and destination to *src and *dst.  If in doubt say No.
-   Used by the register allocator to do move coalescing. */
-Bool
-isMove_S390Instr(const s390_insn *insn, HReg *src, HReg *dst)
-{
-   return s390_insn_is_reg_reg_move(insn, src, dst);
-}
-
-
 /* Generate s390 spill/reload instructions under the direction of the
    register allocator.  Note it's critical these don't write the
    condition codes. This is like an Ist_Put */
@@ -587,6 +576,12 @@ s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *insn)
    case S390_INSN_MOVE:
       addHRegUse(u, HRmRead,  insn->variant.move.src);
       addHRegUse(u, HRmWrite, insn->variant.move.dst);
+
+      if (hregClass(insn->variant.move.src) == hregClass(insn->variant.move.dst)) {
+         u->isRegRegMove = True;
+         u->regMoveSrc   = insn->variant.move.src;
+         u->regMoveDst   = insn->variant.move.dst;
+      }
       break;
 
    case S390_INSN_MEMCPY:
@@ -1218,23 +1213,6 @@ s390_insn_map_regs(HRegRemap *m, s390_insn *insn)
 }
 
 
-/* Return True, if INSN is a move between two registers of the same class.
-   In that case assign the source and destination registers to SRC and DST,
-   respectively. */
-static Bool
-s390_insn_is_reg_reg_move(const s390_insn *insn, HReg *src, HReg *dst)
-{
-   if (insn->tag == S390_INSN_MOVE &&
-       hregClass(insn->variant.move.src) == hregClass(insn->variant.move.dst)) {
-      *src = insn->variant.move.src;
-      *dst = insn->variant.move.dst;
-      return True;
-   }
-
-   return False;
-}
-
-
 /*------------------------------------------------------------*/
 /*--- Functions to emit a sequence of bytes                ---*/
 /*------------------------------------------------------------*/
index 937829cd8fe1786bc9c6560bd064ee8a9a6dffce..254275a58d03b6f53a195bf3a872d0090bbe8959 100644 (file)
@@ -742,7 +742,6 @@ UInt ppHRegS390(HReg);
    of the underlying instruction set. */
 void  getRegUsage_S390Instr( HRegUsage *, const s390_insn *, Bool );
 void  mapRegs_S390Instr    ( HRegRemap *, s390_insn *, Bool );
-Bool  isMove_S390Instr     ( const s390_insn *, HReg *, HReg * );
 Int   emit_S390Instr       ( Bool *, UChar *, Int, const s390_insn *, Bool,
                              VexEndness, const void *, const void *,
                              const void *, const void *);
index 2457cc19f4777bcc7df8a1aefc2c3b02363622b7..eb8e020e3de21f9926d59632a53c4702c56b4f66 100644 (file)
@@ -1234,6 +1234,12 @@ void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64)
          addRegUsage_X86RMI(u, i->Xin.Alu32R.src);
          if (i->Xin.Alu32R.op == Xalu_MOV) {
             addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst);
+
+            if (i->Xin.Alu32R.src->tag == Xrmi_Reg) {
+               u->isRegRegMove = True;
+               u->regMoveSrc   = i->Xin.Alu32R.src->Xrmi.Reg.reg;
+               u->regMoveDst   = i->Xin.Alu32R.dst;
+            }
             return;
          }
          if (i->Xin.Alu32R.op == Xalu_CMP) { 
@@ -1374,6 +1380,12 @@ void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64)
       case Xin_FpUnary:
          addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
          addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
+
+         if (i->Xin.FpUnary.op == Xfp_MOV) {
+            u->isRegRegMove = True;
+            u->regMoveSrc   = i->Xin.FpUnary.src;
+            u->regMoveDst   = i->Xin.FpUnary.dst;
+         }
          return;
       case Xin_FpBinary:
          addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
@@ -1469,6 +1481,12 @@ void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64)
             addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV 
                              ? HRmWrite : HRmModify, 
                           i->Xin.SseReRg.dst);
+
+            if (i->Xin.SseReRg.op == Xsse_MOV) {
+               u->isRegRegMove = True;
+               u->regMoveSrc   = i->Xin.SseReRg.src;
+               u->regMoveDst   = i->Xin.SseReRg.dst;
+            }
          }
          return;
       case Xin_SseCMov:
@@ -1668,41 +1686,6 @@ void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 )
    }
 }
 
-/* Figure out if i represents a reg-reg move, and if so assign the
-   source and destination to *src and *dst.  If in doubt say No.  Used
-   by the register allocator to do move coalescing. 
-*/
-Bool isMove_X86Instr ( const X86Instr* i, HReg* src, HReg* dst )
-{
-   /* Moves between integer regs */
-   if (i->tag == Xin_Alu32R) {
-      if (i->Xin.Alu32R.op != Xalu_MOV)
-         return False;
-      if (i->Xin.Alu32R.src->tag != Xrmi_Reg)
-         return False;
-      *src = i->Xin.Alu32R.src->Xrmi.Reg.reg;
-      *dst = i->Xin.Alu32R.dst;
-      return True;
-   }
-   /* Moves between FP regs */
-   if (i->tag == Xin_FpUnary) {
-      if (i->Xin.FpUnary.op != Xfp_MOV)
-         return False;
-      *src = i->Xin.FpUnary.src;
-      *dst = i->Xin.FpUnary.dst;
-      return True;
-   }
-   if (i->tag == Xin_SseReRg) {
-      if (i->Xin.SseReRg.op != Xsse_MOV)
-         return False;
-      *src = i->Xin.SseReRg.src;
-      *dst = i->Xin.SseReRg.dst;
-      return True;
-   }
-   return False;
-}
-
-
 /* Generate x86 spill/reload instructions under the direction of the
    register allocator.  Note it's critical these don't write the
    condition codes. */
index e1a57671cb3f273f8f54dd9b0e35750d3b8a477c..6812d5fe39ef10d53fd905d1d488a655d9ef144c 100644 (file)
@@ -716,7 +716,6 @@ extern void ppX86Instr ( const X86Instr*, Bool );
    of the underlying instruction set. */
 extern void         getRegUsage_X86Instr ( HRegUsage*, const X86Instr*, Bool );
 extern void         mapRegs_X86Instr     ( HRegRemap*, X86Instr*, Bool );
-extern Bool         isMove_X86Instr      ( const X86Instr*, HReg*, HReg* );
 extern Int          emit_X86Instr   ( /*MB_MOD*/Bool* is_profInc,
                                       UChar* buf, Int nbuf, const X86Instr* i, 
                                       Bool mode64,
index b27d6ca9eada3128f95375956013a8c43c5b7452..107a6a67b1bbd16adff999845acd5e238af32d7b 100644 (file)
@@ -709,7 +709,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
    /* This the bundle of functions we need to do the back-end stuff
       (insn selection, reg-alloc, assembly) whilst being insulated
       from the target instruction set. */
-   Bool         (*isMove)       ( const HInstr*, HReg*, HReg* );
    void         (*getRegUsage)  ( HRegUsage*, const HInstr*, Bool );
    void         (*mapRegs)      ( HRegRemap*, HInstr*, Bool );
    void         (*genSpill)     ( HInstr**, HInstr**, HReg, Int, Bool );
@@ -739,7 +738,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
    HInstrArray*    vcode;
    HInstrArray*    rcode;
 
-   isMove                  = NULL;
    getRegUsage             = NULL;
    mapRegs                 = NULL;
    genSpill                = NULL;
@@ -857,7 +855,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
       case VexArchX86:
          mode64       = False;
          rRegUniv     = X86FN(getRRegUniverse_X86());
-         isMove       = CAST_TO_TYPEOF(isMove) X86FN(isMove_X86Instr);
          getRegUsage  
             = CAST_TO_TYPEOF(getRegUsage) X86FN(getRegUsage_X86Instr);
          mapRegs      = CAST_TO_TYPEOF(mapRegs) X86FN(mapRegs_X86Instr);
@@ -875,7 +872,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
       case VexArchAMD64:
          mode64       = True;
          rRegUniv     = AMD64FN(getRRegUniverse_AMD64());
-         isMove       = CAST_TO_TYPEOF(isMove) AMD64FN(isMove_AMD64Instr);
          getRegUsage  
             = CAST_TO_TYPEOF(getRegUsage) AMD64FN(getRegUsage_AMD64Instr);
          mapRegs      = CAST_TO_TYPEOF(mapRegs) AMD64FN(mapRegs_AMD64Instr);
@@ -893,7 +889,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
       case VexArchPPC32:
          mode64       = False;
          rRegUniv     = PPC32FN(getRRegUniverse_PPC(mode64));
-         isMove       = CAST_TO_TYPEOF(isMove) PPC32FN(isMove_PPCInstr);
          getRegUsage  
             = CAST_TO_TYPEOF(getRegUsage) PPC32FN(getRegUsage_PPCInstr);
          mapRegs      = CAST_TO_TYPEOF(mapRegs) PPC32FN(mapRegs_PPCInstr);
@@ -910,7 +905,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
       case VexArchPPC64:
          mode64       = True;
          rRegUniv     = PPC64FN(getRRegUniverse_PPC(mode64));
-         isMove       = CAST_TO_TYPEOF(isMove) PPC64FN(isMove_PPCInstr);
          getRegUsage  
             = CAST_TO_TYPEOF(getRegUsage) PPC64FN(getRegUsage_PPCInstr);
          mapRegs      = CAST_TO_TYPEOF(mapRegs) PPC64FN(mapRegs_PPCInstr);
@@ -928,7 +922,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
       case VexArchS390X:
          mode64       = True;
          rRegUniv     = S390FN(getRRegUniverse_S390());
-         isMove       = CAST_TO_TYPEOF(isMove) S390FN(isMove_S390Instr);
          getRegUsage  
             = CAST_TO_TYPEOF(getRegUsage) S390FN(getRegUsage_S390Instr);
          mapRegs      = CAST_TO_TYPEOF(mapRegs) S390FN(mapRegs_S390Instr);
@@ -946,7 +939,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
       case VexArchARM:
          mode64       = False;
          rRegUniv     = ARMFN(getRRegUniverse_ARM());
-         isMove       = CAST_TO_TYPEOF(isMove) ARMFN(isMove_ARMInstr);
          getRegUsage  
             = CAST_TO_TYPEOF(getRegUsage) ARMFN(getRegUsage_ARMInstr);
          mapRegs      = CAST_TO_TYPEOF(mapRegs) ARMFN(mapRegs_ARMInstr);
@@ -963,7 +955,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
       case VexArchARM64:
          mode64       = True;
          rRegUniv     = ARM64FN(getRRegUniverse_ARM64());
-         isMove       = CAST_TO_TYPEOF(isMove) ARM64FN(isMove_ARM64Instr);
          getRegUsage  
             = CAST_TO_TYPEOF(getRegUsage) ARM64FN(getRegUsage_ARM64Instr);
          mapRegs      = CAST_TO_TYPEOF(mapRegs) ARM64FN(mapRegs_ARM64Instr);
@@ -980,7 +971,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
       case VexArchMIPS32:
          mode64       = False;
          rRegUniv     = MIPS32FN(getRRegUniverse_MIPS(mode64));
-         isMove       = CAST_TO_TYPEOF(isMove) MIPS32FN(isMove_MIPSInstr);
          getRegUsage  
             = CAST_TO_TYPEOF(getRegUsage) MIPS32FN(getRegUsage_MIPSInstr);
          mapRegs      = CAST_TO_TYPEOF(mapRegs) MIPS32FN(mapRegs_MIPSInstr);
@@ -998,7 +988,6 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
       case VexArchMIPS64:
          mode64       = True;
          rRegUniv     = MIPS64FN(getRRegUniverse_MIPS(mode64));
-         isMove       = CAST_TO_TYPEOF(isMove) MIPS64FN(isMove_MIPSInstr);
          getRegUsage  
             = CAST_TO_TYPEOF(getRegUsage) MIPS64FN(getRegUsage_MIPSInstr);
          mapRegs      = CAST_TO_TYPEOF(mapRegs) MIPS64FN(mapRegs_MIPSInstr);
@@ -1082,11 +1071,10 @@ static void libvex_BackEnd ( const VexTranslateArgs *vta,
 
    /* Register allocate. */
    RegAllocControl con = {
-      .univ = rRegUniv, .isMove = isMove, .getRegUsage = getRegUsage,
-      .mapRegs = mapRegs, .genSpill = genSpill, .genReload = genReload,
-      .genMove = genMove, .directReload = directReload,
-      .guest_sizeB = guest_sizeB, .ppInstr = ppInstr, .ppReg = ppReg,
-      .mode64 = mode64};
+      .univ = rRegUniv, .getRegUsage = getRegUsage, .mapRegs = mapRegs,
+      .genSpill = genSpill, .genReload = genReload, .genMove = genMove,
+      .directReload = directReload, .guest_sizeB = guest_sizeB,
+      .ppInstr = ppInstr, .ppReg = ppReg, .mode64 = mode64};
    switch (vex_control.regalloc_version) {
    case 2:
       rcode = doRegisterAllocation_v2(vcode, &con);