]> git.ipfire.org Git - thirdparty/valgrind.git/commitdiff
Cherry pick 83cabd32492e6d19d483a63522e4e874fa64b617 from master.
authorIvo Raisr <ivosh@ivosh.net>
Fri, 22 Sep 2017 20:50:11 +0000 (22:50 +0200)
committerIvo Raisr <ivosh@ivosh.net>
Thu, 12 Oct 2017 22:46:17 +0000 (00:46 +0200)
Refactor tracking of MOV coalescing.

Reg<->Reg MOV coalescing status is now a part of the HRegUsage.
This allows register allocation to query it two times without incurring
a performance penalty. This in turn allows to better keep track of
vreg<->vreg MOV coalescing so that all vregs in the coalesce chain
get the effective |dead_before| of the last vreg.

A small performance improvement has been observed because this allows
to coalesce even spilled vregs (previously only assigned ones).

18 files changed:
VEX/priv/host_amd64_defs.c
VEX/priv/host_amd64_defs.h
VEX/priv/host_arm64_defs.c
VEX/priv/host_arm64_defs.h
VEX/priv/host_arm_defs.c
VEX/priv/host_arm_defs.h
VEX/priv/host_generic_reg_alloc3.c
VEX/priv/host_generic_regs.c
VEX/priv/host_generic_regs.h
VEX/priv/host_mips_defs.c
VEX/priv/host_mips_defs.h
VEX/priv/host_ppc_defs.c
VEX/priv/host_ppc_defs.h
VEX/priv/host_s390_defs.c
VEX/priv/host_s390_defs.h
VEX/priv/host_x86_defs.c
VEX/priv/host_x86_defs.h
VEX/priv/main_main.c

index d9949d4fd7f7d697c04296b205ff5cd5594b4459..a554e28ed9c568f4c74c07c661b14a82f8aa4953 100644 (file)
@@ -1406,6 +1406,12 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
          addRegUsage_AMD64RMI(u, i->Ain.Alu64R.src);
          if (i->Ain.Alu64R.op == Aalu_MOV) {
             addHRegUse(u, HRmWrite, i->Ain.Alu64R.dst);
+
+            if (i->Ain.Alu64R.src->tag == Armi_Reg) {
+               u->isRegRegMove = True;
+               u->regMoveSrc   = i->Ain.Alu64R.src->Armi.Reg.reg;
+               u->regMoveDst   = i->Ain.Alu64R.dst;
+            }
             return;
          }
          if (i->Ain.Alu64R.op == Aalu_CMP) { 
@@ -1668,6 +1674,12 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
             addHRegUse(u, i->Ain.SseReRg.op == Asse_MOV 
                              ? HRmWrite : HRmModify, 
                           i->Ain.SseReRg.dst);
+
+            if (i->Ain.SseReRg.op == Asse_MOV) {
+               u->isRegRegMove = True;
+               u->regMoveSrc   = i->Ain.SseReRg.src;
+               u->regMoveDst   = i->Ain.SseReRg.dst;
+            }
          }
          return;
       case Ain_SseCMov:
@@ -1694,6 +1706,12 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
       //uu       addHRegUse(u, i->Ain.AvxReRg.op == Asse_MOV 
       //uu                        ? HRmWrite : HRmModify, 
       //uu                     i->Ain.AvxReRg.dst);
+      //uu
+      //uu       if (i->Ain.AvxReRg.op == Asse_MOV) {
+      //uu          u->isRegRegMove = True;
+      //uu          u->regMoveSrc   = i->Ain.AvxReRg.src;
+      //uu          u->regMoveDst   = i->Ain.AvxReRg.dst;
+      //uu       }
       //uu    }
       //uu    return;
       case Ain_EvCheck:
@@ -1910,43 +1928,6 @@ void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
    }
 }
 
-/* Figure out if i represents a reg-reg move, and if so assign the
-   source and destination to *src and *dst.  If in doubt say No.  Used
-   by the register allocator to do move coalescing. 
-*/
-Bool isMove_AMD64Instr ( const AMD64Instr* i, HReg* src, HReg* dst )
-{
-   switch (i->tag) {
-      case Ain_Alu64R:
-         /* Moves between integer regs */
-         if (i->Ain.Alu64R.op != Aalu_MOV)
-            return False;
-         if (i->Ain.Alu64R.src->tag != Armi_Reg)
-            return False;
-         *src = i->Ain.Alu64R.src->Armi.Reg.reg;
-         *dst = i->Ain.Alu64R.dst;
-         return True;
-      case Ain_SseReRg:
-         /* Moves between SSE regs */
-         if (i->Ain.SseReRg.op != Asse_MOV)
-            return False;
-         *src = i->Ain.SseReRg.src;
-         *dst = i->Ain.SseReRg.dst;
-         return True;
-      //uu case Ain_AvxReRg:
-      //uu    /* Moves between AVX regs */
-      //uu    if (i->Ain.AvxReRg.op != Asse_MOV)
-      //uu       return False;
-      //uu    *src = i->Ain.AvxReRg.src;
-      //uu    *dst = i->Ain.AvxReRg.dst;
-      //uu    return True;
-      default:
-         return False;
-   }
-   /*NOTREACHED*/
-}
-
-
 /* Generate amd64 spill/reload instructions under the direction of the
    register allocator.  Note it's critical these don't write the
    condition codes. */
index 349e43c74c46db92e7cbf39864d4eda6b0836d90..299d002febcc81d418319d3583e02345e31d2a63 100644 (file)
@@ -785,7 +785,6 @@ extern void ppAMD64Instr ( const AMD64Instr*, Bool );
    of the underlying instruction set. */
 extern void getRegUsage_AMD64Instr ( HRegUsage*, const AMD64Instr*, Bool );
 extern void mapRegs_AMD64Instr     ( HRegRemap*, AMD64Instr*, Bool );
-extern Bool isMove_AMD64Instr      ( const AMD64Instr*, HReg*, HReg* );
 extern Int          emit_AMD64Instr   ( /*MB_MOD*/Bool* is_profInc,
                                         UChar* buf, Int nbuf,
                                         const AMD64Instr* i, 
index 2506512adbc97f032e8160618d17fa8205b81040..4d088c77b4822575b3e9be1cd0d268ae9e91178b 100644 (file)
@@ -1958,6 +1958,9 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
       case ARM64in_MovI:
          addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst);
          addHRegUse(u, HRmRead,  i->ARM64in.MovI.src);
+         u->isRegRegMove = True;
+         u->regMoveSrc   = i->ARM64in.MovI.src;
+         u->regMoveDst   = i->ARM64in.MovI.dst;
          return;
       case ARM64in_Imm64:
          addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst);
@@ -2238,6 +2241,9 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
       case ARM64in_VMov:
          addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst);
          addHRegUse(u, HRmRead,  i->ARM64in.VMov.src);
+         u->isRegRegMove = True;
+         u->regMoveSrc   = i->ARM64in.VMov.src;
+         u->regMoveDst   = i->ARM64in.VMov.dst;
          return;
       case ARM64in_EvCheck:
          /* We expect both amodes only to mention x21, so this is in
@@ -2510,29 +2516,6 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
    }
 }
 
-/* Figure out if i represents a reg-reg move, and if so assign the
-   source and destination to *src and *dst.  If in doubt say No.  Used
-   by the register allocator to do move coalescing. 
-*/
-Bool isMove_ARM64Instr ( const ARM64Instr* i, HReg* src, HReg* dst )
-{
-   switch (i->tag) {
-      case ARM64in_MovI:
-         *src = i->ARM64in.MovI.src;
-         *dst = i->ARM64in.MovI.dst;
-         return True;
-      case ARM64in_VMov:
-         *src = i->ARM64in.VMov.src;
-         *dst = i->ARM64in.VMov.dst;
-         return True;
-      default:
-         break;
-   }
-
-   return False;
-}
-
-
 /* Generate arm spill/reload instructions under the direction of the
    register allocator.  Note it's critical these don't write the
    condition codes. */
index 840e0aabc99689bf52a98259f8367b31dbf7dd92..9d8cfb0cff10d77959b07aab792ac2fb3d48b35b 100644 (file)
@@ -993,7 +993,6 @@ extern void ppARM64Instr ( const ARM64Instr* );
    of the underlying instruction set. */
 extern void getRegUsage_ARM64Instr ( HRegUsage*, const ARM64Instr*, Bool );
 extern void mapRegs_ARM64Instr     ( HRegRemap*, ARM64Instr*, Bool );
-extern Bool isMove_ARM64Instr      ( const ARM64Instr*, HReg*, HReg* );
 extern Int  emit_ARM64Instr        ( /*MB_MOD*/Bool* is_profInc,
                                      UChar* buf, Int nbuf, const ARM64Instr* i,
                                      Bool mode64,
index 9bf87cd5c30170ccb17e0a7a65debcd765d99c9e..3de6d5011644aa9e511fd0f5c7cf31fc2fc0d2ea 100644 (file)
@@ -2108,6 +2108,12 @@ void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 )
       case ARMin_Mov:
          addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
          addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
+
+         if (i->ARMin.Mov.src->tag == ARMri84_R) {
+            u->isRegRegMove = True;
+            u->regMoveSrc   = i->ARMin.Mov.src->ARMri84.R.reg;
+            u->regMoveDst   = i->ARMin.Mov.dst;
+         }
          return;
       case ARMin_Imm32:
          addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
@@ -2256,10 +2262,22 @@ void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 )
       case ARMin_VUnaryD:
          addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
          addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
+
+         if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
+            u->isRegRegMove = True;
+            u->regMoveSrc   = i->ARMin.VUnaryD.src;
+            u->regMoveDst   = i->ARMin.VUnaryD.dst;
+         }
          return;
       case ARMin_VUnaryS:
          addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
          addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
+
+         if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
+            u->isRegRegMove = True;
+            u->regMoveSrc   = i->ARMin.VUnaryS.src;
+            u->regMoveDst   = i->ARMin.VUnaryS.dst;
+         }
          return;
       case ARMin_VCmpD:
          addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
@@ -2350,6 +2368,12 @@ void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 )
       case ARMin_NUnary:
          addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
          addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
+
+         if (i->ARMin.NUnary.op == ARMneon_COPY) {
+            u->isRegRegMove = True;
+            u->regMoveSrc   = i->ARMin.NUnary.src;
+            u->regMoveDst   = i->ARMin.NUnary.dst;
+         }
          return;
       case ARMin_NUnaryS:
          addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
@@ -2620,50 +2644,6 @@ void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
    }
 }
 
-/* Figure out if i represents a reg-reg move, and if so assign the
-   source and destination to *src and *dst.  If in doubt say No.  Used
-   by the register allocator to do move coalescing. 
-*/
-Bool isMove_ARMInstr ( const ARMInstr* i, HReg* src, HReg* dst )
-{
-   /* Moves between integer regs */
-   switch (i->tag) {
-      case ARMin_Mov:
-         if (i->ARMin.Mov.src->tag == ARMri84_R) {
-            *src = i->ARMin.Mov.src->ARMri84.R.reg;
-            *dst = i->ARMin.Mov.dst;
-            return True;
-         }
-         break;
-      case ARMin_VUnaryD:
-         if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
-            *src = i->ARMin.VUnaryD.src;
-            *dst = i->ARMin.VUnaryD.dst;
-            return True;
-         }
-         break;
-      case ARMin_VUnaryS:
-         if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
-            *src = i->ARMin.VUnaryS.src;
-            *dst = i->ARMin.VUnaryS.dst;
-            return True;
-         }
-         break;
-      case ARMin_NUnary:
-         if (i->ARMin.NUnary.op == ARMneon_COPY) {
-            *src = i->ARMin.NUnary.src;
-            *dst = i->ARMin.NUnary.dst;
-            return True;
-         }
-         break;
-      default:
-         break;
-   }
-
-   return False;
-}
-
-
 /* Generate arm spill/reload instructions under the direction of the
    register allocator.  Note it's critical these don't write the
    condition codes. */
index ec6358ee15badc95607f6bcfdc48e26b9bf16e80..d92ad876bea7c2ab20e8555b3f5a18d6e45a149f 100644 (file)
@@ -1056,7 +1056,6 @@ extern void ppARMInstr ( const ARMInstr* );
    of the underlying instruction set. */
 extern void getRegUsage_ARMInstr ( HRegUsage*, const ARMInstr*, Bool );
 extern void mapRegs_ARMInstr     ( HRegRemap*, ARMInstr*, Bool );
-extern Bool isMove_ARMInstr      ( const ARMInstr*, HReg*, HReg* );
 extern Int  emit_ARMInstr        ( /*MB_MOD*/Bool* is_profInc,
                                    UChar* buf, Int nbuf, const ARMInstr* i, 
                                    Bool mode64,
index 18eb91741c29b76a299de926e4cdc4c6d7c33488..5a604a63e566739e065949f00f48e5a589913a04 100644 (file)
      Avoids the situation when registers are allocated somehow
      in the fall-through leg and need to be spilled just few instructions
      after the merge (because of a helper call, for example).
+
+   TODO-JIT: Investigate extending MOV coalesce chains accross If-Then-Else
+   legs. Perhaps phi node merging could be also considered as a sort of MOV
+   coalescing?
 */
 
 /* Set to 1 for lots of debugging output. */
@@ -115,6 +119,18 @@ typedef
       /* The "home" spill slot. The offset is relative to the beginning of
          the guest state. */
       UShort spill_offset;
+
+      /* This vreg (vregS) is coalesced to another vreg
+         if |coalescedTo| != INVALID_HREG.
+         Coalescing means that there is a MOV instruction which occurs in the
+         instruction stream right at vregS' dead_before
+         and vregD's live_after. */
+      HReg coalescedTo;    /* Which vreg it is coalesced to. */
+      HReg coalescedFirst; /* First vreg in the coalescing chain. */
+
+      /* If this vregS is coalesced to another vregD, what is the combined
+         dead_before for vregS+vregD. Used to effectively allocate registers. */
+      Short effective_dead_before;
    }
    VRegState;
 
@@ -247,6 +263,12 @@ typedef
          next chunk: not NULL                     |    x    |      -
                          NULL                     |    x    |      x
        */
+
+      /* Mark vreg indexes where coalesce chains start at.
+         Used internally by MOV coalescing algorithm, to convey information
+         across different stages. */
+      UInt* coalesce_heads;
+      UInt  nr_coalesce_heads;
    }
    RegAllocChunk;
 
@@ -260,7 +282,8 @@ static void init_rreg_lr_state(RRegLRState* rreg_lrs)
    rreg_lrs->lr_current_idx = 0;
 }
 
-static RegAllocChunk* new_chunk(HInstrVec* instrs_in, UInt n_rregs)
+static RegAllocChunk* new_chunk(HInstrVec* instrs_in, UInt n_vregs,
+                                UInt n_rregs)
 {
    RegAllocChunk* chunk  = LibVEX_Alloc_inline(sizeof(RegAllocChunk));
    chunk->n_rregs        = n_rregs;
@@ -269,14 +292,16 @@ static RegAllocChunk* new_chunk(HInstrVec* instrs_in, UInt n_rregs)
    for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
       init_rreg_lr_state(&chunk->rreg_lr_state[r_idx]);
    }
-   chunk->instrs_in      = instrs_in;
-   chunk->ii_vec_start   = INVALID_INSTRNO;
-   chunk->ii_vec_len     = 0;
-   chunk->ii_total_start = INVALID_INSTRNO;
-   chunk->reg_usage      = NULL;
-   chunk->instrs_out     = NULL;
-   chunk->isIfThenElse   = False;
-   chunk->next           = NULL;
+   chunk->instrs_in         = instrs_in;
+   chunk->ii_vec_start      = INVALID_INSTRNO;
+   chunk->ii_vec_len        = 0;
+   chunk->ii_total_start    = INVALID_INSTRNO;
+   chunk->reg_usage         = NULL;
+   chunk->instrs_out        = NULL;
+   chunk->isIfThenElse      = False;
+   chunk->next              = NULL;
+   chunk->coalesce_heads    = LibVEX_Alloc_inline(n_vregs);
+   chunk->nr_coalesce_heads = 0;
 
    return chunk;
 }
@@ -347,15 +372,16 @@ static inline void enlarge_rreg_lrs(RRegLRState* rreg_lrs)
       print_state(chunk, state, INSTRNO_TOTAL, depth, con, what); \
    } while (0)
 
-static inline void print_state(
-   const RegAllocChunk* chunk, const RegAllocState* state,
-   Short ii_total_current, UInt depth, const RegAllocControl* con,
-   const HChar* comment)
-{
-   print_depth(depth);
-   vex_printf("%s (current instruction total #%d):\n",
-              comment, ii_total_current);
+#define RIGHT_JUSTIFY(_total, _written)                   \
+   do {                                                  \
+      for (Int w = (_total) - (_written); w > 0; w--) {  \
+         vex_printf(" ");                                \
+      }                                                  \
+   } while (0)
 
+static inline void print_vregs(const RegAllocState* state,
+  Short ii_total_current, UInt depth, const RegAllocControl* con)
+{
    for (UInt v_idx = 0; v_idx < state->n_vregs; v_idx++) {
       const VRegState* vreg = &state->vregs[v_idx];
 
@@ -380,20 +406,43 @@ static inline void print_state(
       default:
          vassert(0);
       }
+      RIGHT_JUSTIFY(25, written);
 
-      for (Int w = 30 - written; w > 0; w--) {
-         vex_printf(" ");
-      }
+      written = vex_printf("lr: [%d, %d) ",
+                           vreg->live_after, vreg->dead_before);
+      RIGHT_JUSTIFY(15, written);
+
+      written = vex_printf("effective lr: [%d, %d)",
+                           vreg->live_after, vreg->effective_dead_before);
+      RIGHT_JUSTIFY(25, written);
 
       if (vreg->live_after > ii_total_current) {
          vex_printf("[not live yet]");
       } else if (ii_total_current >= vreg->dead_before) {
-         vex_printf("[now dead]");
+         if (hregIsInvalid(vreg->coalescedTo)) {
+            vex_printf("[now dead]\n");
+         } else {
+            vex_printf("[now dead, coalesced to ");
+            con->ppReg(vreg->coalescedTo);
+            vex_printf("]\n");
+         }
       } else {
          vex_printf("[live]");
       }
       vex_printf(" [%d - %d)\n", vreg->live_after, vreg->dead_before);
    }
+}
+static inline void print_state(
+   const RegAllocChunk* chunk, const RegAllocState* state,
+   Short ii_total_current, UInt depth, const RegAllocControl* con,
+   const HChar* comment)
+{
+
+   print_depth(depth);
+   vex_printf("%s (current instruction total #%d):\n",
+              comment, ii_total_current);
+
+   print_vregs(state, ii_total_current, depth, con);
 
    for (UInt r_idx = 0; r_idx < chunk->n_rregs; r_idx++) {
       const RRegState* rreg = &state->rregs[r_idx];
@@ -401,9 +450,7 @@ static inline void print_state(
       print_depth(depth);
       vex_printf("rreg_state[%2u] = ", r_idx);
       UInt written = con->ppReg(con->univ->regs[r_idx]);
-      for (Int w = 10 - written; w > 0; w--) {
-         vex_printf(" ");
-      }
+      RIGHT_JUSTIFY(10, written);
 
       switch (rreg->disp) {
       case Free:
@@ -423,6 +470,8 @@ static inline void print_state(
          break;
       }
    }
+
+#  undef RIGHT_JUSTIFY
 }
 
 static RegAllocState* clone_state(const RegAllocState* orig)
@@ -582,7 +631,7 @@ static inline HReg find_vreg_to_spill(
    a callee-save register because it won't be used for parameter passing
    around helper function calls. */
 static inline Bool find_free_rreg(
-   const RegAllocChunk* chunk, RegAllocState* state,
+   const RegAllocChunk* chunk, const RegAllocState* state,
    Short ii_chunk_current, HRegClass target_hregclass,
    Bool reserve_phase, const RegAllocControl* con, UInt* r_idx_found)
 {
@@ -682,12 +731,12 @@ static inline void assign_vreg(RegAllocChunk* chunk, RegAllocState* state,
 /* --- Stage 1. ---
    Determine total ordering of instructions and structure of HInstrIfThenElse.
    Build similar structure of RegAllocChunk's. */
-static UInt stage1(HInstrVec* instrs_in, UInt ii_total_start, UInt n_rregs,
-                   RegAllocChunk** first_chunk, const RegAllocControl* con)
+static UInt stage1(HInstrVec* instrs_in, UInt ii_total_start, UInt n_vregs,
+          UInt n_rregs, RegAllocChunk** first_chunk, const RegAllocControl* con)
 {
    Short ii_vec_start = 0;
 
-   RegAllocChunk* chunk  = new_chunk(instrs_in, n_rregs);
+   RegAllocChunk* chunk  = new_chunk(instrs_in, n_vregs, n_rregs);
    chunk->ii_vec_start   = ii_vec_start;
    chunk->ii_total_start = ii_total_start;
    chunk->instrs_out     = newHInstrVec();
@@ -716,10 +765,12 @@ static UInt stage1(HInstrVec* instrs_in, UInt ii_total_start, UInt n_rregs,
       if (hite != NULL) {
          RegAllocChunk* chunk_fallThrough;
          UInt ii_total_fallThrough = stage1(hite->fallThrough, ii_total_start,
-                                            n_rregs, &chunk_fallThrough, con);
+                                            n_vregs, n_rregs,
+                                            &chunk_fallThrough, con);
          RegAllocChunk* chunk_outOfLine;
          UInt ii_total_outOfLine = stage1(hite->outOfLine, ii_total_start,
-                                          n_rregs, &chunk_outOfLine, con);
+                                          n_vregs, n_rregs,
+                                          &chunk_outOfLine, con);
 
          chunk->isIfThenElse           = True;
          chunk->IfThenElse.ccOOL       = hite->ccOOL;
@@ -733,7 +784,7 @@ static UInt stage1(HInstrVec* instrs_in, UInt ii_total_start, UInt n_rregs,
 
       if (ii_vec < instrs_in->insns_used - 1) {
          RegAllocChunk* previous = chunk;
-         chunk                   = new_chunk(instrs_in, n_rregs);
+         chunk                   = new_chunk(instrs_in, n_vregs, n_rregs);
          chunk->ii_vec_start     = ii_vec_start;
          chunk->ii_total_start   = toShort(ii_total_start);
          chunk->instrs_out       = (*first_chunk)->instrs_out;
@@ -744,7 +795,6 @@ static UInt stage1(HInstrVec* instrs_in, UInt ii_total_start, UInt n_rregs,
    return ii_total_start;
 }
 
-
 /* --- Stage 2. ---
    Scan the incoming instructions.
    Note: state->vregs is initially global (shared accross all chunks).
@@ -784,7 +834,12 @@ static void stage2_chunk(RegAllocChunk* chunk, RegAllocState* state,
         ii_vec++, ii_chunk++) {
       const HInstr* instr = chunk->instrs_in->insns[ii_vec];
 
+
       con->getRegUsage(&chunk->reg_usage[ii_chunk], instr, con->mode64);
+      chunk->reg_usage[ii_chunk].isVregVregMove
+         = chunk->reg_usage[ii_chunk].isRegRegMove
+            && hregIsVirtual(chunk->reg_usage[ii_chunk].regMoveSrc)
+            && hregIsVirtual(chunk->reg_usage[ii_chunk].regMoveDst);
 
       if (0) {
          vex_printf("\n");
@@ -847,6 +902,9 @@ static void stage2_chunk(RegAllocChunk* chunk, RegAllocState* state,
          if (state->vregs[v_idx].dead_before < INSTRNO_TOTAL + 1) {
             state->vregs[v_idx].dead_before = INSTRNO_TOTAL + 1;
          }
+         if (state->vregs[v_idx].effective_dead_before < INSTRNO_TOTAL + 1) {
+            state->vregs[v_idx].effective_dead_before = INSTRNO_TOTAL + 1;
+         }
       }
 
       /* Process real registers mentioned in the instruction. */
@@ -1002,14 +1060,87 @@ static void stage2_debug_rregs(RegAllocChunk* chunk, UInt depth,
                ;);
 }
 
+
+/* Preparation for MOV coalescing. Establish MOV coalescing chains. */
+static void stage3_chunk(RegAllocChunk* chunk, RegAllocState* state,
+                Bool* coalesce_happened, UInt depth, const RegAllocControl* con)
+{
+   /* Optimise register coalescing:
+         MOV  v <-> v   coalescing (done here).
+         MOV  v <-> r   coalescing (TODO: not yet, not here). */
+   /* If doing a reg-reg move between two vregs, and the src's live range ends
+     here and the dst's live range starts here, coalesce the src vreg
+     to the dst vreg. */
+   Short ii_chunk = 0;
+   for (Short ii_vec = chunk->ii_vec_start;
+        ii_vec < chunk->ii_vec_start + chunk->ii_vec_len;
+        ii_vec++, ii_chunk++) {
+      if (chunk->reg_usage[ii_chunk].isVregVregMove) {
+         HReg vregS = chunk->reg_usage[ii_chunk].regMoveSrc;
+         HReg vregD = chunk->reg_usage[ii_chunk].regMoveDst;
+
+         /* Check that |isVregVregMove| is not telling us a bunch of lies ... */
+         vassert(hregClass(vregS) == hregClass(vregD));
+         UInt vs_idx = hregIndex(vregS);
+         UInt vd_idx = hregIndex(vregD);
+         vassert(IS_VALID_VREGNO(vs_idx));
+         vassert(IS_VALID_VREGNO(vd_idx));
+         vassert(! sameHReg(vregS, vregD));
+         VRegState* vs_st = &state->vregs[vs_idx];
+         VRegState* vd_st = &state->vregs[vd_idx];
+
+         if ((vs_st->dead_before == INSTRNO_TOTAL + 1)
+             && (vd_st->live_after == INSTRNO_TOTAL)) {
+            /* Live ranges are adjacent. */
+
+            vs_st->coalescedTo = vregD;
+            if (hregIsInvalid(vs_st->coalescedFirst)) {
+               vd_st->coalescedFirst = vregS;
+               chunk->coalesce_heads[chunk->nr_coalesce_heads] = vs_idx;
+               chunk->nr_coalesce_heads += 1;
+            } else {
+               vd_st->coalescedFirst = vs_st->coalescedFirst;
+            }
+
+            state->vregs[hregIndex(vd_st->coalescedFirst)].effective_dead_before
+               = vd_st->dead_before;
+
+            if (DEBUG_REGALLOC) {
+               print_depth(depth);
+               vex_printf("vreg coalescing: ");
+               con->ppReg(vregS);
+               vex_printf(" -> ");
+               con->ppReg(vregD);
+               vex_printf("\n");
+            }
+
+            *coalesce_happened = True;
+         }
+      }
+   }
+}
+
+static void stage3(RegAllocChunk* chunk, RegAllocState* state,
+                Bool* coalesce_happened, UInt depth, const RegAllocControl* con)
+{
+   WALK_CHUNKS(stage3_chunk(chunk, state, coalesce_happened, depth, con),
+               ;,
+               stage3(chunk->IfThenElse.fallThrough, state, coalesce_happened,
+                      depth + 1, con),
+               stage3(chunk->IfThenElse.outOfLine, state, coalesce_happened,
+                      depth + 1, con),
+               ;);
+}
+
+
 /* Allocates spill slots. Because VRegState is initiall global, also spill slots
    are initially global. This might have an adverse effect that spill slots will
    eventuall run out if there are too many nested If-Then-Else legs. In that
    case, VRegState must not be initially global but rather local to every leg;
    and vregs will need to eventually have extended their live ranges after legs
    merge. */
-static void stage3(VRegState* vreg_state, UInt n_vregs,
-                   const RegAllocControl* con)
+static void stage4_main(VRegState* vreg_state, UInt n_vregs,
+                        const RegAllocControl* con)
 {
 #  define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8)
    STATIC_ASSERT((N_SPILL64S % 2) == 0);
@@ -1048,6 +1179,11 @@ static void stage3(VRegState* vreg_state, UInt n_vregs,
          vassert(vreg_state[v_idx].reg_class == HRcINVALID);
          continue;
       }
+      if (! hregIsInvalid(vreg_state[v_idx].coalescedFirst)) {
+         /* Coalesced vregs should share the same spill slot with the first vreg
+            in the coalescing chain. But we don't have that information, yet. */
+         continue;
+      }
 
       /* The spill slots are 64 bits in size.  As per the comment on definition
          of HRegClass in host_generic_regs.h, that means, to spill a vreg of
@@ -1070,8 +1206,10 @@ static void stage3(VRegState* vreg_state, UInt n_vregs,
             if (ss_no >= N_SPILL64S - 1) {
                vpanic("N_SPILL64S is too low in VEX. Increase and recompile.");
             }
-            ss_busy_until_before[ss_no + 0] = vreg_state[v_idx].dead_before;
-            ss_busy_until_before[ss_no + 1] = vreg_state[v_idx].dead_before;
+            ss_busy_until_before[ss_no + 0]
+               = vreg_state[v_idx].effective_dead_before;
+            ss_busy_until_before[ss_no + 1]
+               = vreg_state[v_idx].effective_dead_before;
             break;
          default:
             /* The ordinary case -- just find a single lowest-numbered spill
@@ -1084,7 +1222,8 @@ static void stage3(VRegState* vreg_state, UInt n_vregs,
             if (ss_no == N_SPILL64S) {
                vpanic("N_SPILL64S is too low in VEX. Increase and recompile.");
             }
-            ss_busy_until_before[ss_no] = vreg_state[v_idx].dead_before;
+            ss_busy_until_before[ss_no]
+               = vreg_state[v_idx].effective_dead_before;
             break;
       }
 
@@ -1105,18 +1244,42 @@ static void stage3(VRegState* vreg_state, UInt n_vregs,
       }
    }
 
-   if (0) {
-      vex_printf("\n\n");
-      for (UInt v_idx = 0; v_idx < n_vregs; v_idx++)
-         vex_printf("vreg %3u    --> spill offset %u\n",
-                    v_idx, vreg_state[v_idx].spill_offset);
+#  undef N_SPILL64S
+}
+
+static void stage4_coalesced_chunk(RegAllocChunk* chunk, RegAllocState* state,
+                                   UInt depth, const RegAllocControl* con)
+{
+   /* Fill in the spill offsets and effective_dead_before for coalesced vregs.*/
+   for (UInt i = 0; i < chunk->nr_coalesce_heads; i++) {
+      UInt vs_idx = chunk->coalesce_heads[i];
+      Short effective_dead_before = state->vregs[vs_idx].effective_dead_before;
+      UShort spill_offset         = state->vregs[vs_idx].spill_offset;
+
+      HReg vregD = state->vregs[vs_idx].coalescedTo;
+      while (! hregIsInvalid(vregD)) {
+         UInt vd_idx = hregIndex(vregD);
+         state->vregs[vd_idx].effective_dead_before = effective_dead_before;
+         state->vregs[vd_idx].spill_offset          = spill_offset;
+         vregD = state->vregs[vd_idx].coalescedTo;
+      }
    }
+}
 
-#  undef N_SPILL64S
+static void stage4_coalesced(RegAllocChunk* chunk, RegAllocState* state,
+                             UInt depth, const RegAllocControl* con)
+{
+   WALK_CHUNKS(stage4_coalesced_chunk(chunk, state, depth, con),
+               ;,
+               stage4_coalesced(chunk->IfThenElse.fallThrough, state,
+                                depth + 1, con),
+               stage4_coalesced(chunk->IfThenElse.outOfLine, state,
+                                depth + 1, con),
+               ;);
 }
 
 
-static void stage4_chunk(RegAllocChunk* chunk, RegAllocState* state,
+static void stage5_chunk(RegAllocChunk* chunk, RegAllocState* state,
                          UInt depth, const RegAllocControl* con)
 {
 /* Finds an rreg of the correct class.
@@ -1124,7 +1287,7 @@ static void stage4_chunk(RegAllocChunk* chunk, RegAllocState* state,
    instruction and makes free the corresponding rreg. */
 #  define FIND_OR_MAKE_FREE_RREG(_v_idx, _reg_class, _reserve_phase)           \
    ({                                                                          \
-      UInt _r_free_idx = -1;                                                   \
+      UInt _r_free_idx;                                                        \
       Bool free_rreg_found = find_free_rreg(chunk, state,                      \
                                      ii_chunk, (_reg_class), (_reserve_phase), \
                                      con, &_r_free_idx);                       \
@@ -1218,66 +1381,83 @@ static void stage4_chunk(RegAllocChunk* chunk, RegAllocState* state,
                vassert(ii_chunk < rreg_lrs->lr_current->dead_before);
             }
          }
+
+         /* Sanity check: if vregS has been marked as coalesced to vregD,
+            then the effective live range of vregS must also cover live range
+            of vregD. */
+         /* The following sanity check is quite expensive. Some basic blocks
+            contain very lengthy coalescing chains... */
+         if (SANITY_CHECKS_EVERY_INSTR) {
+            for (UInt vs_idx = 0; vs_idx < state->n_vregs; vs_idx++) {
+               const VRegState* vS_st = &state->vregs[vs_idx];
+               HReg vregD = vS_st->coalescedTo;
+               while (! hregIsInvalid(vregD)) {
+                  const VRegState* vD_st = &state->vregs[hregIndex(vregD)];
+                  vassert(vS_st->live_after <= vD_st->live_after);
+                  vassert(vS_st->effective_dead_before >= vD_st->dead_before);
+                  vregD = vD_st->coalescedTo;
+               }
+            }
+         }
       }
 
 
-      /* --- MOV coalescing --- */
+      /* --- MOV coalescing (finishing) --- */
       /* Optimise register coalescing:
-            MOV  v <-> v   coalescing (done here).
+            MOV  v <-> v   coalescing (finished here).
             MOV  v <-> r   coalescing (TODO: not yet). */
-      /* If doing a reg-reg move between two vregs, and the src's live
-         range ends here and the dst's live range starts here, bind the dst
-         to the src's rreg, and that's all. */
-      HReg vregS = INVALID_HREG;
-      HReg vregD = INVALID_HREG;
-      if (con->isMove(instr, &vregS, &vregD)) {
-         if (hregIsVirtual(vregS) && hregIsVirtual(vregD)) {
-            /* Check that |isMove| is not telling us a bunch of lies ... */
-            vassert(hregClass(vregS) == hregClass(vregD));
-            UInt vs_idx = hregIndex(vregS);
-            UInt vd_idx = hregIndex(vregD);
-            vassert(IS_VALID_VREGNO(vs_idx));
-            vassert(IS_VALID_VREGNO(vd_idx));
-
-            if ((state->vregs[vs_idx].dead_before == INSTRNO_TOTAL + 1)
-                && (state->vregs[vd_idx].live_after == INSTRNO_TOTAL)
-                && (state->vregs[vs_idx].disp == Assigned)) {
-
-               /* Live ranges are adjacent and source vreg is bound.
-                  Finally we can do the coalescing.  */
-               HReg rreg = state->vregs[vs_idx].rreg;
-               state->vregs[vd_idx].disp = Assigned;
+      if (chunk->reg_usage[ii_chunk].isVregVregMove) {
+         HReg vregS = chunk->reg_usage[ii_chunk].regMoveSrc;
+         HReg vregD = chunk->reg_usage[ii_chunk].regMoveDst;
+         UInt vs_idx = hregIndex(vregS);
+         UInt vd_idx = hregIndex(vregD);
+
+         if (sameHReg(state->vregs[vs_idx].coalescedTo, vregD)) {
+            /* Finally do the coalescing. */
+
+            HReg rreg = state->vregs[vs_idx].rreg;
+            switch (state->vregs[vs_idx].disp) {
+            case Assigned:
                state->vregs[vd_idx].rreg = rreg;
-               FREE_VREG(&state->vregs[vs_idx]);
-
                UInt r_idx = hregIndex(rreg);
                vassert(state->rregs[r_idx].disp == Bound);
-               state->rregs[r_idx].vreg          = vregD;
-               state->rregs[r_idx].eq_spill_slot = False;
+               state->rregs[r_idx].vreg = vregD;
+               break;
+            case Spilled:
+               vassert(hregIsInvalid(state->vregs[vs_idx].rreg));
+               break;
+            default:
+               vassert(0);
+            }
 
-               if (DEBUG_REGALLOC) {
-                  print_depth(depth);
-                  vex_printf("coalesced: ");
-                  con->ppReg(vregS);
-                  vex_printf(" -> ");
-                  con->ppReg(vregD);
-                  vex_printf("\n\n");
-               }
+            state->vregs[vd_idx].disp = state->vregs[vs_idx].disp;
+            FREE_VREG(&state->vregs[vs_idx]);
+
+            if (DEBUG_REGALLOC) {
+               print_depth(depth);
+               vex_printf("coalesced: ");
+               con->ppReg(vregS);
+               vex_printf(" -> ");
+               con->ppReg(vregD);
+               vex_printf("\n\n");
+            }
 
-               /* In rare cases it can happen that vregD's live range ends
-                  here. Check and eventually free the vreg and rreg.
-                  This effectively means that either the translated program
-                  contained dead code (although VEX iropt passes are pretty good
-                  at eliminating it) or the VEX backend generated dead code. */
-               if (state->vregs[vd_idx].dead_before <= INSTRNO_TOTAL + 1) {
-                  FREE_VREG(&state->vregs[vd_idx]);
+            /* In rare cases it can happen that vregD's live range ends here.
+               Check and eventually free the vreg and rreg.
+               This effectively means that either the translated program
+               contained dead code (but VEX iropt passes are pretty good
+               at eliminating it) or the VEX backend generated dead code. */
+            if (state->vregs[vd_idx].dead_before <= INSTRNO_TOTAL + 1) {
+               if (state->vregs[vd_idx].disp == Assigned) {
+                  UInt r_idx = hregIndex(rreg);
                   FREE_RREG(&state->rregs[r_idx]);
                }
-
-               /* Move on to the next instruction. We skip the post-instruction
-                  stuff because all required house-keeping was done here. */
-               continue;
+               FREE_VREG(&state->vregs[vd_idx]);
             }
+
+            /* Move on to the next instruction. We skip the post-instruction
+               stuff because all required house-keeping was done here. */
+            continue;
          }
       }
 
@@ -1547,7 +1727,7 @@ static void stage4_chunk(RegAllocChunk* chunk, RegAllocState* state,
 #  undef FIND_OR_MAKE_FREE_RREG
 }
 
-static void stage4_emit_HInstrIfThenElse(RegAllocChunk* chunk, UInt depth,
+static void stage5_emit_HInstrIfThenElse(RegAllocChunk* chunk, UInt depth,
                                          const RegAllocControl* con)
 {
    vassert(chunk->isIfThenElse);
@@ -1759,7 +1939,7 @@ static void merge_vreg_states(RegAllocChunk* chunk,
 
 /* Merges |cloned| state from out-of-line leg back into the main |state|,
    modified by fall-through leg since the legs fork. */
-static void stage4_merge_states(RegAllocChunk* chunk,
+static void stage5_merge_states(RegAllocChunk* chunk,
    RegAllocState* state, RegAllocState* cloned,
    UInt depth, const RegAllocControl* con)
 {
@@ -1849,15 +2029,15 @@ static void stage4_merge_states(RegAllocChunk* chunk,
    }
 }
 
-static void stage4(RegAllocChunk* chunk, RegAllocState* state,
+static void stage5(RegAllocChunk* chunk, RegAllocState* state,
                    UInt depth, const RegAllocControl* con)
 {
-   WALK_CHUNKS(stage4_chunk(chunk, state, depth, con),
-               stage4_emit_HInstrIfThenElse(chunk, depth, con);
+   WALK_CHUNKS(stage5_chunk(chunk, state, depth, con),
+               stage5_emit_HInstrIfThenElse(chunk, depth, con);
                RegAllocState* cloned_state = clone_state(state),
-               stage4(chunk->IfThenElse.fallThrough, state, depth + 1, con),
-               stage4(chunk->IfThenElse.outOfLine, cloned_state, depth + 1, con),
-               stage4_merge_states(chunk, state, cloned_state, depth, con));
+               stage5(chunk->IfThenElse.fallThrough, state, depth + 1, con),
+               stage5(chunk->IfThenElse.outOfLine, cloned_state, depth + 1, con),
+               stage5_merge_states(chunk, state, cloned_state, depth, con));
 }
 
 
@@ -1900,12 +2080,15 @@ HInstrSB* doRegisterAllocation(
    /* --- Stage 0. --- */
    /* Initialize the vreg state. It is initially global. --- */
    for (UInt v_idx = 0; v_idx < state->n_vregs; v_idx++) {
-      state->vregs[v_idx].live_after   = INVALID_INSTRNO;
-      state->vregs[v_idx].dead_before  = INVALID_INSTRNO;
-      state->vregs[v_idx].reg_class    = HRcINVALID;
-      state->vregs[v_idx].disp         = Unallocated;
-      state->vregs[v_idx].rreg         = INVALID_HREG;
-      state->vregs[v_idx].spill_offset = 0;
+      state->vregs[v_idx].live_after            = INVALID_INSTRNO;
+      state->vregs[v_idx].dead_before           = INVALID_INSTRNO;
+      state->vregs[v_idx].reg_class             = HRcINVALID;
+      state->vregs[v_idx].disp                  = Unallocated;
+      state->vregs[v_idx].rreg                  = INVALID_HREG;
+      state->vregs[v_idx].spill_offset          = 0;
+      state->vregs[v_idx].coalescedTo           = INVALID_HREG;
+      state->vregs[v_idx].coalescedFirst        = INVALID_HREG;
+      state->vregs[v_idx].effective_dead_before = INVALID_INSTRNO;
    }
 
    /* Initialize redundant rreg -> vreg state. A snaphost is taken for
@@ -1920,7 +2103,7 @@ HInstrSB* doRegisterAllocation(
    /* --- Stage 1. Determine total ordering of instructions and structure
       of HInstrIfThenElse. --- */
    RegAllocChunk* first_chunk;
-   UInt ii_total_last = stage1(sb_in->insns, 0, state->n_rregs,
+   UInt ii_total_last = stage1(sb_in->insns, 0, state->n_vregs, state->n_rregs,
                                &first_chunk, con);
 
    /* The live range numbers are signed shorts, and so limiting the number
@@ -1936,11 +2119,30 @@ HInstrSB* doRegisterAllocation(
       stage2_debug_rregs(first_chunk, 0, con);
    }
 
-   /* --- Stage 3. Allocate spill slots. --- */
-   stage3(state->vregs, state->n_vregs, con);
+   /* --- Stage 3. MOV coalescing (preparation). --- */
+   Bool coalesce_happened = False;
+   stage3(first_chunk, state, &coalesce_happened, 0, con);
+
+   /* --- Stage 4. Allocate spill slots. --- */
+   stage4_main(state->vregs, state->n_vregs, con);
+   stage4_coalesced(first_chunk, state, 0, con);
+   if (DEBUG_REGALLOC && coalesce_happened) {
+      vex_printf("\nAfter vreg<->vreg MOV coalescing:\n");
+      print_vregs(state, 0, 0, con);
+   }
+
+   if (0) {
+      vex_printf("\n\n");
+      for (UInt v_idx = 0; v_idx < state->n_vregs; v_idx++) {
+         if (state->vregs[v_idx].live_after != INVALID_INSTRNO) {
+            vex_printf("vreg %3u    --> spill offset %u\n",
+                       v_idx, state->vregs[v_idx].spill_offset);
+         }
+      }
+   }
 
-   /* --- Stage 4. Process the instructions and allocate registers. --- */
-   stage4(first_chunk, state, 0, con);
+   /* --- Stage 5. Process the instructions and allocate registers. --- */
+   stage5(first_chunk, state, 0, con);
 
    /* The output SB of instructions. */
    HInstrSB* sb_out = LibVEX_Alloc_inline(sizeof(HInstrSB));
index f21255b0f176afd9ff33694f6293e84d2bac4ba0..fc92f544478a03bf729f5372380c5a38e6e4b4bc 100644 (file)
@@ -184,6 +184,9 @@ void ppHRegUsage ( const RRegUniverse* univ, HRegUsage* tab )
       ppHReg(tab->vRegs[i]);
       vex_printf("\n");
    }
+   if (tab->isRegRegMove) {
+      vex_printf("   (is a reg-reg move)\n");
+   }
    vex_printf("}\n");
 }
 
index add30e2e9a00ae2ebb87ba423aa9a36628a4188e..b729c770889db77e23c92574d24fa800b8c8e565 100644 (file)
@@ -300,6 +300,16 @@ typedef
       HReg     vRegs[N_HREGUSAGE_VREGS];
       HRegMode vMode[N_HREGUSAGE_VREGS];
       UInt     n_vRegs;
+
+      /* Hint to the register allocator: this instruction is actually a move
+         between two registers: regMoveSrc -> regMoveDst. */
+      Bool     isRegRegMove;
+      HReg     regMoveSrc;
+      HReg     regMoveDst;
+
+      /* Used internally by the register allocator. The reg-reg move is
+         actually a vreg-vreg move. */
+      Bool     isVregVregMove;
    }
    HRegUsage;
 
@@ -307,9 +317,10 @@ extern void ppHRegUsage ( const RRegUniverse*, HRegUsage* );
 
 static inline void initHRegUsage ( HRegUsage* tab )
 {
-   tab->rRead    = 0;
-   tab->rWritten = 0;
-   tab->n_vRegs  = 0;
+   tab->rRead        = 0;
+   tab->rWritten     = 0;
+   tab->n_vRegs      = 0;
+   tab->isRegRegMove = False;
 }
 
 /* Add a register to a usage table.  Combine incoming read uses with
@@ -515,10 +526,6 @@ typedef
          allocation. */
       const RRegUniverse* univ;
 
-      /* Return True iff the given insn is a reg-reg move, in which case also
-         return the src and dst regs. */
-      Bool (*isMove)(const HInstr*, HReg*, HReg*);
-
       /* Get info about register usage in this insn. */
       void (*getRegUsage)(HRegUsage*, const HInstr*, Bool);
 
index 9a6993eda0573ed0851963b60acb3f8f09595ded..88906b85f6d0965d05365a4916aec87f6953de41 100644 (file)
@@ -1578,6 +1578,15 @@ void getRegUsage_MIPSInstr(HRegUsage * u, const MIPSInstr * i, Bool mode64)
          addHRegUse(u, HRmRead, i->Min.Alu.srcL);
          addRegUsage_MIPSRH(u, i->Min.Alu.srcR);
          addHRegUse(u, HRmWrite, i->Min.Alu.dst);
+
+         /* or Rd,Rs,Rs == mr Rd,Rs */
+         if ((i->Min.Alu.op == Malu_OR)
+             && (i->Min.Alu.srcR->tag == Mrh_Reg)
+             && sameHReg(i->Min.Alu.srcR->Mrh.Reg.reg, i->Min.Alu.srcL)) {
+            u->isRegRegMove = True;
+            u->regMoveSrc   = i->Min.Alu.srcL;
+            u->regMoveDst   = i->Min.Alu.dst;
+         }
          return;
       case Min_Shft:
          addHRegUse(u, HRmRead, i->Min.Shft.srcL);
@@ -1942,28 +1951,6 @@ void mapRegs_MIPSInstr(HRegRemap * m, MIPSInstr * i, Bool mode64)
 
 }
 
-/* Figure out if i represents a reg-reg move, and if so assign the
-   source and destination to *src and *dst.  If in doubt say No.  Used
-   by the register allocator to do move coalescing.
-*/
-Bool isMove_MIPSInstr(const MIPSInstr * i, HReg * src, HReg * dst)
-{
-   /* Moves between integer regs */
-   if (i->tag == Min_Alu) {
-      /* or Rd,Rs,Rs == mr Rd,Rs */
-      if (i->Min.Alu.op != Malu_OR)
-         return False;
-      if (i->Min.Alu.srcR->tag != Mrh_Reg)
-         return False;
-      if (!sameHReg(i->Min.Alu.srcR->Mrh.Reg.reg, i->Min.Alu.srcL))
-         return False;
-      *src = i->Min.Alu.srcL;
-      *dst = i->Min.Alu.dst;
-      return True;
-   }
-   return False;
-}
-
 /* Generate mips spill/reload instructions under the direction of the
    register allocator. */
 void genSpill_MIPS( /*OUT*/ HInstr ** i1, /*OUT*/ HInstr ** i2, HReg rreg,
index 45fff16fd0306b67de9fbe572be939c907725084..5bb5c1388fec299a8059a2b0129067a4f9befe39 100644 (file)
@@ -686,7 +686,6 @@ extern void ppMIPSInstr(const MIPSInstr *, Bool mode64);
    of the underlying instruction set. */
 extern void getRegUsage_MIPSInstr (HRegUsage *, const MIPSInstr *, Bool);
 extern void mapRegs_MIPSInstr     (HRegRemap *, MIPSInstr *, Bool mode64);
-extern Bool isMove_MIPSInstr      (const MIPSInstr *, HReg *, HReg *);
 extern Int        emit_MIPSInstr (/*MB_MOD*/Bool* is_profInc,
                                   UChar* buf, Int nbuf, const MIPSInstr* i,
                                   Bool mode64,
index 1ef9c5c3412dcd17b9dcf4892329ee5730f571ea..b073c1d79fc0bf12a1d186cc911faf41727ca3b8 100644 (file)
@@ -2362,6 +2362,15 @@ void getRegUsage_PPCInstr ( HRegUsage* u, const PPCInstr* i, Bool mode64 )
       addHRegUse(u, HRmRead,  i->Pin.Alu.srcL);
       addRegUsage_PPCRH(u,    i->Pin.Alu.srcR);
       addHRegUse(u, HRmWrite, i->Pin.Alu.dst);
+
+      // or Rd,Rs,Rs == mr Rd,Rs
+      if ((i->Pin.Alu.op == Palu_OR)
+          && (i->Pin.Alu.srcR->tag == Prh_Reg)
+          && sameHReg(i->Pin.Alu.srcR->Prh.Reg.reg, i->Pin.Alu.srcL)) {
+         u->isRegRegMove = True;
+         u->regMoveSrc   = i->Pin.Alu.srcL;
+         u->regMoveDst   = i->Pin.Alu.dst;
+      }
       return;
    case Pin_Shft:
       addHRegUse(u, HRmRead,  i->Pin.Shft.srcL);
@@ -2489,6 +2498,12 @@ void getRegUsage_PPCInstr ( HRegUsage* u, const PPCInstr* i, Bool mode64 )
    case Pin_FpUnary:
       addHRegUse(u, HRmWrite, i->Pin.FpUnary.dst);
       addHRegUse(u, HRmRead,  i->Pin.FpUnary.src);
+
+      if (i->Pin.FpUnary.op == Pfp_MOV) {
+         u->isRegRegMove = True;
+         u->regMoveSrc   = i->Pin.FpUnary.src;
+         u->regMoveDst   = i->Pin.FpUnary.dst;
+      }
       return;
    case Pin_FpBinary:
       addHRegUse(u, HRmWrite, i->Pin.FpBinary.dst);
@@ -3119,37 +3134,6 @@ void mapRegs_PPCInstr ( HRegRemap* m, PPCInstr* i, Bool mode64 )
    }
 }
 
-/* Figure out if i represents a reg-reg move, and if so assign the
-   source and destination to *src and *dst.  If in doubt say No.  Used
-   by the register allocator to do move coalescing. 
-*/
-Bool isMove_PPCInstr ( const PPCInstr* i, HReg* src, HReg* dst )
-{
-   /* Moves between integer regs */
-   if (i->tag == Pin_Alu) {
-      // or Rd,Rs,Rs == mr Rd,Rs
-      if (i->Pin.Alu.op != Palu_OR)
-         return False;
-      if (i->Pin.Alu.srcR->tag != Prh_Reg)
-         return False;
-      if (! sameHReg(i->Pin.Alu.srcR->Prh.Reg.reg, i->Pin.Alu.srcL))
-         return False;
-      *src = i->Pin.Alu.srcL;
-      *dst = i->Pin.Alu.dst;
-      return True;
-   }
-   /* Moves between FP regs */
-   if (i->tag == Pin_FpUnary) {
-      if (i->Pin.FpUnary.op != Pfp_MOV)
-         return False;
-      *src = i->Pin.FpUnary.src;
-      *dst = i->Pin.FpUnary.dst;
-      return True;
-   }
-   return False;
-}
-
-
 /* Generate ppc spill/reload instructions under the direction of the
    register allocator.  Note it's critical these don't write the
    condition codes. */
index f8fcbf99be17a512567a3e8d60a9144c6427ad16..7932cdf6c136cf20d02741cbfbfda37fc67d8f34 100644 (file)
@@ -1201,7 +1201,6 @@ extern void ppPPCInstr(const PPCInstr*, Bool mode64);
    of the underlying instruction set. */
 extern void getRegUsage_PPCInstr ( HRegUsage*, const PPCInstr*, Bool mode64 );
 extern void mapRegs_PPCInstr     ( HRegRemap*, PPCInstr* , Bool mode64);
-extern Bool isMove_PPCInstr      ( const PPCInstr*, HReg*, HReg* );
 extern Int          emit_PPCInstr   ( /*MB_MOD*/Bool* is_profInc,
                                       UChar* buf, Int nbuf, const PPCInstr* i, 
                                       Bool mode64,
index 327674acaba7fe5f52a609891b085289a6f9b35b..f9a95576170ef44edeebb2ee21b1e034654ada98 100644 (file)
@@ -48,7 +48,6 @@
 /*--- Forward declarations                                 ---*/
 /*------------------------------------------------------------*/
 
-static Bool s390_insn_is_reg_reg_move(const s390_insn *, HReg *src, HReg *dst);
 static void s390_insn_map_regs(HRegRemap *, s390_insn *);
 static void s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *);
 static UInt s390_tchain_load64_len(void);
@@ -467,16 +466,6 @@ mapRegs_S390Instr(HRegRemap *m, s390_insn *insn, Bool mode64)
 }
 
 
-/* Figure out if the given insn represents a reg-reg move, and if so
-   assign the source and destination to *src and *dst.  If in doubt say No.
-   Used by the register allocator to do move coalescing. */
-Bool
-isMove_S390Instr(const s390_insn *insn, HReg *src, HReg *dst)
-{
-   return s390_insn_is_reg_reg_move(insn, src, dst);
-}
-
-
 /* Generate s390 spill/reload instructions under the direction of the
    register allocator.  Note it's critical these don't write the
    condition codes. This is like an Ist_Put */
@@ -587,6 +576,12 @@ s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *insn)
    case S390_INSN_MOVE:
       addHRegUse(u, HRmRead,  insn->variant.move.src);
       addHRegUse(u, HRmWrite, insn->variant.move.dst);
+
+      if (hregClass(insn->variant.move.src) == hregClass(insn->variant.move.dst)) {
+         u->isRegRegMove = True;
+         u->regMoveSrc   = insn->variant.move.src;
+         u->regMoveDst   = insn->variant.move.dst;
+      }
       break;
 
    case S390_INSN_MEMCPY:
@@ -1218,23 +1213,6 @@ s390_insn_map_regs(HRegRemap *m, s390_insn *insn)
 }
 
 
-/* Return True, if INSN is a move between two registers of the same class.
-   In that case assign the source and destination registers to SRC and DST,
-   respectively. */
-static Bool
-s390_insn_is_reg_reg_move(const s390_insn *insn, HReg *src, HReg *dst)
-{
-   if (insn->tag == S390_INSN_MOVE &&
-       hregClass(insn->variant.move.src) == hregClass(insn->variant.move.dst)) {
-      *src = insn->variant.move.src;
-      *dst = insn->variant.move.dst;
-      return True;
-   }
-
-   return False;
-}
-
-
 /*------------------------------------------------------------*/
 /*--- Functions to emit a sequence of bytes                ---*/
 /*------------------------------------------------------------*/
index 41b6ecd67234483027c704f354f795652e0bea90..d9df83c22b05426eee7d737da144882de240e32b 100644 (file)
@@ -742,7 +742,6 @@ UInt ppHRegS390(HReg);
    of the underlying instruction set. */
 void  getRegUsage_S390Instr( HRegUsage *, const s390_insn *, Bool );
 void  mapRegs_S390Instr    ( HRegRemap *, s390_insn *, Bool );
-Bool  isMove_S390Instr     ( const s390_insn *, HReg *, HReg * );
 Int   emit_S390Instr       ( Bool *, UChar *, Int, const s390_insn *, Bool,
                              VexEndness, const void *, const void *,
                              const void *, const void *);
index 5f47bdb8138c830347b39ad9813541f42bbe2669..56f9d1d31b0f00bb0a16d04c82f74359c2f297b0 100644 (file)
@@ -1275,6 +1275,12 @@ void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64)
          addRegUsage_X86RMI(u, i->Xin.Alu32R.src);
          if (i->Xin.Alu32R.op == Xalu_MOV) {
             addHRegUse(u, HRmWrite, i->Xin.Alu32R.dst);
+
+            if (i->Xin.Alu32R.src->tag == Xrmi_Reg) {
+               u->isRegRegMove = True;
+               u->regMoveSrc   = i->Xin.Alu32R.src->Xrmi.Reg.reg;
+               u->regMoveDst   = i->Xin.Alu32R.dst;
+            }
             return;
          }
          if (i->Xin.Alu32R.op == Xalu_CMP) { 
@@ -1415,6 +1421,12 @@ void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64)
       case Xin_FpUnary:
          addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
          addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
+
+         if (i->Xin.FpUnary.op == Xfp_MOV) {
+            u->isRegRegMove = True;
+            u->regMoveSrc   = i->Xin.FpUnary.src;
+            u->regMoveDst   = i->Xin.FpUnary.dst;
+         }
          return;
       case Xin_FpBinary:
          addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
@@ -1510,6 +1522,12 @@ void getRegUsage_X86Instr (HRegUsage* u, const X86Instr* i, Bool mode64)
             addHRegUse(u, i->Xin.SseReRg.op == Xsse_MOV 
                              ? HRmWrite : HRmModify, 
                           i->Xin.SseReRg.dst);
+
+            if (i->Xin.SseReRg.op == Xsse_MOV) {
+               u->isRegRegMove = True;
+               u->regMoveSrc   = i->Xin.SseReRg.src;
+               u->regMoveDst   = i->Xin.SseReRg.dst;
+            }
          }
          return;
       case Xin_SseCMov:
@@ -1709,40 +1727,6 @@ void mapRegs_X86Instr ( HRegRemap* m, X86Instr* i, Bool mode64 )
    }
 }
 
-/* Figure out if i represents a reg-reg move, and if so assign the
-   source and destination to *src and *dst.  If in doubt say No.  Used
-   by the register allocator to do move coalescing. 
-*/
-Bool isMove_X86Instr ( const X86Instr* i, HReg* src, HReg* dst )
-{
-   /* Moves between integer regs */
-   if (i->tag == Xin_Alu32R) {
-      if (i->Xin.Alu32R.op != Xalu_MOV)
-         return False;
-      if (i->Xin.Alu32R.src->tag != Xrmi_Reg)
-         return False;
-      *src = i->Xin.Alu32R.src->Xrmi.Reg.reg;
-      *dst = i->Xin.Alu32R.dst;
-      return True;
-   }
-   /* Moves between FP regs */
-   if (i->tag == Xin_FpUnary) {
-      if (i->Xin.FpUnary.op != Xfp_MOV)
-         return False;
-      *src = i->Xin.FpUnary.src;
-      *dst = i->Xin.FpUnary.dst;
-      return True;
-   }
-   if (i->tag == Xin_SseReRg) {
-      if (i->Xin.SseReRg.op != Xsse_MOV)
-         return False;
-      *src = i->Xin.SseReRg.src;
-      *dst = i->Xin.SseReRg.dst;
-      return True;
-   }
-   return False;
-}
-
 extern HInstrIfThenElse* isIfThenElse_X86Instr(X86Instr* i)
 {
    if (UNLIKELY(i->tag == Xin_IfThenElse)) {
index 1f1855088699a4ef063987bc286a47fafeecad30..f9b52ff949642101458fe98655a1161d1d53aa87 100644 (file)
@@ -742,7 +742,6 @@ extern void ppX86CondCode(X86CondCode);
    of the underlying instruction set. */
 extern void         getRegUsage_X86Instr ( HRegUsage*, const X86Instr*, Bool );
 extern void         mapRegs_X86Instr     ( HRegRemap*, X86Instr*, Bool );
-extern Bool         isMove_X86Instr      ( const X86Instr*, HReg*, HReg* );
 extern HInstrIfThenElse* isIfThenElse_X86Instr(X86Instr*);
 extern UInt         emit_X86Instr   ( /*MB_MOD*/Bool* is_profInc,
                                       UChar* buf, UInt nbuf,
index df568507edf19c8e57dce035ee9dc1c1f3974fbf..7d57443fd9c1927c600e7c35f10d475ed5963c1c 100644 (file)
@@ -1264,7 +1264,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta,
    /* This the bundle of functions we need to do the back-end stuff
       (insn selection, reg-alloc, assembly) whilst being insulated
       from the target instruction set. */
-   Bool         (*isMove)       ( const HInstr*, HReg*, HReg* );
    void         (*getRegUsage)  ( HRegUsage*, const HInstr*, Bool );
    void         (*mapRegs)      ( HRegRemap*, HInstr*, Bool );
    HInstrIfThenElse* (*isIfThenElse)( const HInstr* );
@@ -1298,7 +1297,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta,
    HInstrSB* vcode;
    HInstrSB* rcode;
 
-   isMove                  = NULL;
    getRegUsage             = NULL;
    mapRegs                 = NULL;
    isIfThenElse            = NULL;
@@ -1422,7 +1420,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta,
       case VexArchX86:
          mode64       = False;
          rRegUniv     = X86FN(getRRegUniverse_X86());
-         isMove       = CAST_TO_TYPEOF(isMove) X86FN(isMove_X86Instr);
          getRegUsage  
             = CAST_TO_TYPEOF(getRegUsage) X86FN(getRegUsage_X86Instr);
          mapRegs      = CAST_TO_TYPEOF(mapRegs) X86FN(mapRegs_X86Instr);
@@ -1449,7 +1446,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta,
       case VexArchAMD64:
          mode64       = True;
          rRegUniv     = AMD64FN(getRRegUniverse_AMD64());
-         isMove       = CAST_TO_TYPEOF(isMove) AMD64FN(isMove_AMD64Instr);
          getRegUsage  
             = CAST_TO_TYPEOF(getRegUsage) AMD64FN(getRegUsage_AMD64Instr);
          mapRegs      = CAST_TO_TYPEOF(mapRegs) AMD64FN(mapRegs_AMD64Instr);
@@ -1467,7 +1463,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta,
       case VexArchPPC32:
          mode64       = False;
          rRegUniv     = PPC32FN(getRRegUniverse_PPC(mode64));
-         isMove       = CAST_TO_TYPEOF(isMove) PPC32FN(isMove_PPCInstr);
          getRegUsage  
             = CAST_TO_TYPEOF(getRegUsage) PPC32FN(getRegUsage_PPCInstr);
          mapRegs      = CAST_TO_TYPEOF(mapRegs) PPC32FN(mapRegs_PPCInstr);
@@ -1484,7 +1479,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta,
       case VexArchPPC64:
          mode64       = True;
          rRegUniv     = PPC64FN(getRRegUniverse_PPC(mode64));
-         isMove       = CAST_TO_TYPEOF(isMove) PPC64FN(isMove_PPCInstr);
          getRegUsage  
             = CAST_TO_TYPEOF(getRegUsage) PPC64FN(getRegUsage_PPCInstr);
          mapRegs      = CAST_TO_TYPEOF(mapRegs) PPC64FN(mapRegs_PPCInstr);
@@ -1502,7 +1496,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta,
       case VexArchS390X:
          mode64       = True;
          rRegUniv     = S390FN(getRRegUniverse_S390());
-         isMove       = CAST_TO_TYPEOF(isMove) S390FN(isMove_S390Instr);
          getRegUsage  
             = CAST_TO_TYPEOF(getRegUsage) S390FN(getRegUsage_S390Instr);
          mapRegs      = CAST_TO_TYPEOF(mapRegs) S390FN(mapRegs_S390Instr);
@@ -1520,7 +1513,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta,
       case VexArchARM:
          mode64       = False;
          rRegUniv     = ARMFN(getRRegUniverse_ARM());
-         isMove       = CAST_TO_TYPEOF(isMove) ARMFN(isMove_ARMInstr);
          getRegUsage  
             = CAST_TO_TYPEOF(getRegUsage) ARMFN(getRegUsage_ARMInstr);
          mapRegs      = CAST_TO_TYPEOF(mapRegs) ARMFN(mapRegs_ARMInstr);
@@ -1537,7 +1529,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta,
       case VexArchARM64:
          mode64       = True;
          rRegUniv     = ARM64FN(getRRegUniverse_ARM64());
-         isMove       = CAST_TO_TYPEOF(isMove) ARM64FN(isMove_ARM64Instr);
          getRegUsage  
             = CAST_TO_TYPEOF(getRegUsage) ARM64FN(getRegUsage_ARM64Instr);
          mapRegs      = CAST_TO_TYPEOF(mapRegs) ARM64FN(mapRegs_ARM64Instr);
@@ -1554,7 +1545,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta,
       case VexArchMIPS32:
          mode64       = False;
          rRegUniv     = MIPS32FN(getRRegUniverse_MIPS(mode64));
-         isMove       = CAST_TO_TYPEOF(isMove) MIPS32FN(isMove_MIPSInstr);
          getRegUsage  
             = CAST_TO_TYPEOF(getRegUsage) MIPS32FN(getRegUsage_MIPSInstr);
          mapRegs      = CAST_TO_TYPEOF(mapRegs) MIPS32FN(mapRegs_MIPSInstr);
@@ -1572,7 +1562,6 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta,
       case VexArchMIPS64:
          mode64       = True;
          rRegUniv     = MIPS64FN(getRRegUniverse_MIPS(mode64));
-         isMove       = CAST_TO_TYPEOF(isMove) MIPS64FN(isMove_MIPSInstr);
          getRegUsage  
             = CAST_TO_TYPEOF(getRegUsage) MIPS64FN(getRegUsage_MIPSInstr);
          mapRegs      = CAST_TO_TYPEOF(mapRegs) MIPS64FN(mapRegs_MIPSInstr);
@@ -1651,8 +1640,8 @@ static void libvex_BackEnd ( const VexTranslateArgs* vta,
 
    /* Register allocate. */
    RegAllocControl con = {
-      .univ = rRegUniv, .isMove = isMove, .getRegUsage = getRegUsage,
-      .mapRegs = mapRegs, .isIfThenElse = isIfThenElse, .genSpill = genSpill,
+      .univ = rRegUniv, .getRegUsage = getRegUsage, .mapRegs = mapRegs,
+      .isIfThenElse = isIfThenElse, .genSpill = genSpill,
       .genReload = genReload, .genMove = genMove, .genHInstrITE = genHInstrITE,
       .directReload = directReload, .guest_sizeB = guest_sizeB,
       .ppInstr = ppInstr, .ppCondCode = ppCondCode, .ppReg = ppReg,