Merge from CGTUNE branch, code generation improvements for amd64:

author Julian Seward <jseward@acm.org>

Sat, 25 Aug 2007 23:21:08 +0000 (23:21 +0000)

committer Julian Seward <jseward@acm.org>

Sat, 25 Aug 2007 23:21:08 +0000 (23:21 +0000)
author Julian Seward <jseward@acm.org>
Sat, 25 Aug 2007 23:21:08 +0000 (23:21 +0000)
committer Julian Seward <jseward@acm.org>
Sat, 25 Aug 2007 23:21:08 +0000 (23:21 +0000)
diff --git a/VEX/priv/host-amd64/hdefs.c b/VEX/priv/host-amd64/hdefs.c

index a45550debe7bd5feef0b6f97c48e43455e6c62d5..401dc46e2fb57705d38094e1d3dfab2df8f0d171 100644 (file)
--- a/VEX/priv/host-amd64/hdefs.c
+++ b/VEX/priv/host-amd64/hdefs.c
@@ -1991,6 +1991,17 @@ static Bool fits8bits ( UInt w32 )
     Int i32 = (Int)w32;
     return toBool(i32 == ((i32 << 24) >> 24));
  }
+/* Can the lower 32 bits be signedly widened to produce the whole
+   64-bit value?  In other words, are the top 33 bits either all 0 or
+   all 1 ? */
+static Bool fitsIn32Bits ( ULong x )
+{
+   Long y0 = (Long)x;
+   Long y1 = y0;
+   y1 <<= 32;
+   y1 >>=/*s*/ 32;
+   return toBool(x == y1);
+}
  
  
  /* Forming mod-reg-rm bytes and scale-index-base bytes.
@@ -2601,25 +2612,36 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i,
              goto bad;
        }
  
-   case Ain_Call:
+   case Ain_Call: {
        /* As per detailed comment for Ain_Call in
           getRegUsage_AMD64Instr above, %r11 is used as an address
           temporary. */
        /* jump over the following two insns if the condition does not
           hold */
+      Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
        if (i->Ain.Call.cond != Acc_ALWAYS) {
           *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
-         *p++ = 13; /* 13 bytes in the next two insns */
+         *p++ = shortImm ? 10 : 13;
+         /* 10 or 13 bytes in the next two insns */
+      }
+      if (shortImm) {
+         /* 7 bytes: movl sign-extend(imm32), %r11 */
+         *p++ = 0x49;
+         *p++ = 0xC7;
+         *p++ = 0xC3;
+         p = emit32(p, (UInt)i->Ain.Call.target);
+      } else {
+         /* 10 bytes: movabsq $target, %r11 */
+         *p++ = 0x49;
+         *p++ = 0xBB;
+         p = emit64(p, i->Ain.Call.target);
        }
-      /* movabsq $target, %r11 */
-      *p++ = 0x49;
-      *p++ = 0xBB;
-      p = emit64(p, i->Ain.Call.target);
-      /* call *%r11 */
+      /* 3 bytes: call *%r11 */
        *p++ = 0x41;
        *p++ = 0xFF;
        *p++ = 0xD3;
        goto done;
+   }
  
     case Ain_Goto:
        /* Use ptmp for backpatching conditional jumps. */
@@ -2701,11 +2723,19 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i,
           destined for %rax immediately prior to this Ain_Goto. */
        vassert(sizeof(ULong) == sizeof(void*));
        vassert(dispatch != NULL);
-      /* movabsq $imm64, %rdx */
-      *p++ = 0x48;
-      *p++ = 0xBA;
-      p = emit64(p, Ptr_to_ULong(dispatch));
  
+      if (fitsIn32Bits(Ptr_to_ULong(dispatch))) {
+         /* movl sign-extend(imm32), %rdx */
+         *p++ = 0x48;
+         *p++ = 0xC7;
+         *p++ = 0xC2;
+         p = emit32(p, (UInt)Ptr_to_ULong(dispatch));
+      } else {
+         /* movabsq $imm64, %rdx */
+         *p++ = 0x48;
+         *p++ = 0xBA;
+         p = emit64(p, Ptr_to_ULong(dispatch));
+      }
        /* jmp *%rdx */
        *p++ = 0xFF;
        *p++ = 0xE2;
diff --git a/VEX/priv/host-amd64/isel.c b/VEX/priv/host-amd64/isel.c

index 6f25c9fb9c5ec44df5222b712708bc59f7283c81..265e7c1060f30befdefb91ce4081136486e21e29 100644 (file)
--- a/VEX/priv/host-amd64/isel.c
+++ b/VEX/priv/host-amd64/isel.c
@@ -372,20 +372,54 @@ static void sub_from_rsp ( ISelEnv* env, Int n )
  //.. }
  
  
-/* Used only in doHelperCall.  See big comment in doHelperCall re
-   handling of register-parameter args.  This function figures out
-   whether evaluation of an expression might require use of a fixed
-   register.  If in doubt return True (safe but suboptimal).
-*/
-static
-Bool mightRequireFixedRegs ( IRExpr* e )
+/* Used only in doHelperCall.  If possible, produce a single
+   instruction which computes 'e' into 'dst'.  If not possible, return
+   NULL. */
+
+static AMD64Instr* iselIntExpr_single_instruction ( ISelEnv* env,
+                                                    HReg     dst,
+                                                    IRExpr*  e )
  {
-   switch (e->tag) {
-      case Iex_RdTmp: case Iex_Const: case Iex_Get: 
-         return False;
-      default:
-         return True;
+   vassert(typeOfIRExpr(env->type_env, e) == Ity_I64);
+
+   if (e->tag == Iex_Const) {
+      vassert(e->Iex.Const.con->tag == Ico_U64);
+      if (fitsIn32Bits(e->Iex.Const.con->Ico.U64)) {
+         return AMD64Instr_Alu64R(
+                   Aalu_MOV,
+                   AMD64RMI_Imm(toUInt(e->Iex.Const.con->Ico.U64)),
+                   dst
+                );
+      } else {
+         return AMD64Instr_Imm64(e->Iex.Const.con->Ico.U64, dst);
+      }
+   }
+
+   if (e->tag == Iex_RdTmp) {
+      HReg src = lookupIRTemp(env, e->Iex.RdTmp.tmp);
+      return mk_iMOVsd_RR(src, dst);
     }
+
+   if (e->tag == Iex_Get) {
+      vassert(e->Iex.Get.ty == Ity_I64);
+      return AMD64Instr_Alu64R(
+                Aalu_MOV,
+                AMD64RMI_Mem(
+                   AMD64AMode_IR(e->Iex.Get.offset,
+                                 hregAMD64_RBP())),
+                dst);
+   }
+
+   if (e->tag == Iex_Unop 
+       && e->Iex.Unop.op == Iop_32Uto64 
+       && e->Iex.Unop.arg->tag == Iex_RdTmp) {
+      HReg src = lookupIRTemp(env, e->Iex.Unop.arg->Iex.RdTmp.tmp);
+      return AMD64Instr_MovZLQ(src, dst);
+   }
+
+   if (0) { ppIRExpr(e); vex_printf("\n"); }
+
+   return NULL;
  }
  
  
@@ -401,7 +435,7 @@ void doHelperCall ( ISelEnv* env,
     AMD64CondCode cc;
     HReg          argregs[6];
     HReg          tmpregs[6];
-   Bool          go_fast;
+   AMD64Instr*   fastinstrs[6];
     Int           n_args, i, argreg;
  
     /* Marshal args for a call and do the call.
@@ -471,12 +505,13 @@ void doHelperCall ( ISelEnv* env,
     tmpregs[0] = tmpregs[1] = tmpregs[2] =
     tmpregs[3] = tmpregs[4] = tmpregs[5] = INVALID_HREG;
  
+   fastinstrs[0] = fastinstrs[1] = fastinstrs[2] =
+   fastinstrs[3] = fastinstrs[4] = fastinstrs[5] = NULL;
+
     /* First decide which scheme (slow or fast) is to be used.  First
        assume the fast scheme, and select slow if any contraindications
        (wow) appear. */
  
-   go_fast = True;
-
     if (guard) {
        if (guard->tag == Iex_Const 
            && guard->Iex.Const.con->tag == Ico_U1
@@ -484,91 +519,94 @@ void doHelperCall ( ISelEnv* env,
           /* unconditional */
        } else {
           /* Not manifestly unconditional -- be conservative. */
-         go_fast = False;
+         goto slowscheme;
        }
     }
  
-   if (go_fast) {
-      for (i = 0; i < n_args; i++) {
-         if (mightRequireFixedRegs(args[i])) {
-            go_fast = False;
-            break;
-         }
-      }
+   /* Ok, let's try for the fast scheme.  If it doesn't pan out, we'll
+      use the slow scheme.  Because this is tentative, we can't call
+      addInstr (that is, commit to) any instructions until we're
+      handled all the arguments.  So park the resulting instructions
+      in a buffer and emit that if we're successful. */
+
+   /* FAST SCHEME */
+   argreg = 0;
+   if (passBBP) {
+      fastinstrs[argreg] = mk_iMOVsd_RR( hregAMD64_RBP(), argregs[argreg]);
+      argreg++;
     }
  
-   /* At this point the scheme to use has been established.  Generate
-      code to get the arg values into the argument rregs. */
+   for (i = 0; i < n_args; i++) {
+      vassert(argreg < 6);
+      vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
+      fastinstrs[argreg] 
+         = iselIntExpr_single_instruction( env, argregs[argreg], args[i] );
+      if (fastinstrs[argreg] == NULL)
+         goto slowscheme;
+      argreg++;
+   }
  
-   if (go_fast) {
+   /* Looks like we're in luck.  Emit the accumulated instructions and
+      move on to doing the call itself. */
+   vassert(argreg <= 6);
+   for (i = 0; i < argreg; i++)
+      addInstr(env, fastinstrs[i]);
  
-      /* FAST SCHEME */
-      argreg = 0;
-      if (passBBP) {
-         addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), argregs[argreg]));
-         argreg++;
-      }
+   /* Fast scheme only applies for unconditional calls.  Hence: */
+   cc = Acc_ALWAYS;
  
-      for (i = 0; i < n_args; i++) {
-         vassert(argreg < 6);
-         vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
-         addInstr(env, AMD64Instr_Alu64R(
-                          Aalu_MOV, 
-                          iselIntExpr_RMI(env, args[i]),
-                          argregs[argreg]
-                       )
-                 );
-         argreg++;
-      }
-
-      /* Fast scheme only applies for unconditional calls.  Hence: */
-      cc = Acc_ALWAYS;
-
-   } else {
-
-      /* SLOW SCHEME; move via temporaries */
-      argreg = 0;
-
-      if (passBBP) {
-         /* This is pretty stupid; better to move directly to rdi
-            after the rest of the args are done. */
-         tmpregs[argreg] = newVRegI(env);
-         addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[argreg]));
-         argreg++;
-      }
-
-      for (i = 0; i < n_args; i++) {
-         vassert(argreg < 6);
-         vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
-         tmpregs[argreg] = iselIntExpr_R(env, args[i]);
-         argreg++;
-      }
-
-      /* Now we can compute the condition.  We can't do it earlier
-         because the argument computations could trash the condition
-         codes.  Be a bit clever to handle the common case where the
-         guard is 1:Bit. */
-      cc = Acc_ALWAYS;
-      if (guard) {
-         if (guard->tag == Iex_Const 
-             && guard->Iex.Const.con->tag == Ico_U1
-             && guard->Iex.Const.con->Ico.U1 == True) {
-            /* unconditional -- do nothing */
-         } else {
-            cc = iselCondCode( env, guard );
-         }
-      }
+   goto handle_call;
+
+
+   /* SLOW SCHEME; move via temporaries */
+  slowscheme:
+#if 0
+if (n_args > 0) {for (i = 0; args[i]; i++) {
+ppIRExpr(args[i]); vex_printf(" "); }
+vex_printf("\n");}
+#endif
+   argreg = 0;
  
-      /* Move the args to their final destinations. */
-      for (i = 0; i < argreg; i++) {
-         /* None of these insns, including any spill code that might
-            be generated, may alter the condition codes. */
-         addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) );
+   if (passBBP) {
+      /* This is pretty stupid; better to move directly to rdi
+         after the rest of the args are done. */
+      tmpregs[argreg] = newVRegI(env);
+      addInstr(env, mk_iMOVsd_RR( hregAMD64_RBP(), tmpregs[argreg]));
+      argreg++;
+   }
+
+   for (i = 0; i < n_args; i++) {
+      vassert(argreg < 6);
+      vassert(typeOfIRExpr(env->type_env, args[i]) == Ity_I64);
+      tmpregs[argreg] = iselIntExpr_R(env, args[i]);
+      argreg++;
+   }
+
+   /* Now we can compute the condition.  We can't do it earlier
+      because the argument computations could trash the condition
+      codes.  Be a bit clever to handle the common case where the
+      guard is 1:Bit. */
+   cc = Acc_ALWAYS;
+   if (guard) {
+      if (guard->tag == Iex_Const 
+          && guard->Iex.Const.con->tag == Ico_U1
+          && guard->Iex.Const.con->Ico.U1 == True) {
+         /* unconditional -- do nothing */
+      } else {
+         cc = iselCondCode( env, guard );
        }
+   }
  
+   /* Move the args to their final destinations. */
+   for (i = 0; i < argreg; i++) {
+      /* None of these insns, including any spill code that might
+         be generated, may alter the condition codes. */
+      addInstr( env, mk_iMOVsd_RR( tmpregs[i], argregs[i] ) );
     }
  
+
     /* Finally, the call itself. */
+  handle_call:
     addInstr(env, AMD64Instr_Call( 
                      cc, 
                      Ptr_to_ULong(cee->addr),
author	Julian Seward <jseward@acm.org>
	Sat, 25 Aug 2007 23:21:08 +0000 (23:21 +0000)
committer	Julian Seward <jseward@acm.org>
	Sat, 25 Aug 2007 23:21:08 +0000 (23:21 +0000)
VEX/priv/host-amd64/hdefs.c		patch \| blob \| blame \| history
VEX/priv/host-amd64/isel.c		patch \| blob \| blame \| history