From: Julian Seward <jseward@acm.org>
Date: Thu, 2 Jan 2020 08:23:46 +0000 (+0100)
Subject: amd64 back end: generate 32-bit shift instructions for 32-bit IR shifts.
X-Git-Tag: VALGRIND_3_16_0~155
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4eaa80103df9d1d396cc4b7427ea99faac11329d;p=thirdparty%2Fvalgrind.git

amd64 back end: generate 32-bit shift instructions for 32-bit IR shifts.

Until now these have been handled by possibly widening the value to 64 bits,
if necessary, followed by a 64-bit shift.  That wastes instructions and code
space.
---

diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c
index 29127c1258..3d237e112d 100644
--- a/VEX/priv/host_amd64_defs.c
+++ b/VEX/priv/host_amd64_defs.c
@@ -626,6 +626,14 @@ AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp op, UInt src, HReg dst ) {
    i->Ain.Sh64.dst = dst;
    return i;
 }
+AMD64Instr* AMD64Instr_Sh32 ( AMD64ShiftOp op, UInt src, HReg dst ) {
+   AMD64Instr* i   = LibVEX_Alloc_inline(sizeof(AMD64Instr));
+   i->tag          = Ain_Sh32;
+   i->Ain.Sh32.op  = op;
+   i->Ain.Sh32.src = src;
+   i->Ain.Sh32.dst = dst;
+   return i;
+}
 AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst ) {
    AMD64Instr* i       = LibVEX_Alloc_inline(sizeof(AMD64Instr));
    i->tag              = Ain_Test64;
@@ -1090,6 +1098,14 @@ void ppAMD64Instr ( const AMD64Instr* i, Bool mode64 )
             vex_printf("$%d,", (Int)i->Ain.Sh64.src);
          ppHRegAMD64(i->Ain.Sh64.dst);
          return;
+      case Ain_Sh32:
+         vex_printf("%sl ", showAMD64ShiftOp(i->Ain.Sh32.op));
+         if (i->Ain.Sh32.src == 0)
+            vex_printf("%%cl,");
+         else
+            vex_printf("$%d,", (Int)i->Ain.Sh32.src);
+         ppHRegAMD64_lo32(i->Ain.Sh32.dst);
+         return;
       case Ain_Test64:
          vex_printf("testq $%d,", (Int)i->Ain.Test64.imm32);
          ppHRegAMD64(i->Ain.Test64.dst);
@@ -1471,6 +1487,11 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
          if (i->Ain.Sh64.src == 0)
             addHRegUse(u, HRmRead, hregAMD64_RCX());
          return;
+      case Ain_Sh32:
+         addHRegUse(u, HRmModify, i->Ain.Sh32.dst);
+         if (i->Ain.Sh32.src == 0)
+            addHRegUse(u, HRmRead, hregAMD64_RCX());
+         return;
       case Ain_Test64:
          addHRegUse(u, HRmRead, i->Ain.Test64.dst);
          return;
@@ -1808,6 +1829,9 @@ void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
       case Ain_Sh64:
          mapReg(m, &i->Ain.Sh64.dst);
          return;
+      case Ain_Sh32:
+         mapReg(m, &i->Ain.Sh32.dst);
+         return;
       case Ain_Test64:
          mapReg(m, &i->Ain.Test64.dst);
          return;
@@ -2762,6 +2786,30 @@ Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
       }
       break;
 
+   case Ain_Sh32:
+      opc_cl = opc_imm = subopc = 0;
+      switch (i->Ain.Sh32.op) {
+         case Ash_SHR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 5; break;
+         case Ash_SAR: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 7; break;
+         case Ash_SHL: opc_cl = 0xD3; opc_imm = 0xC1; subopc = 4; break;
+         default: goto bad;
+      }
+      if (i->Ain.Sh32.src == 0) {
+         rex = clearWBit( rexAMode_R_enc_reg(0, i->Ain.Sh32.dst) );
+         if (rex != 0x40) *p++ = rex;
+         *p++ = toUChar(opc_cl);
+         p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh32.dst);
+         goto done;
+      } else {
+         rex = clearWBit( rexAMode_R_enc_reg(0, i->Ain.Sh32.dst) );
+         if (rex != 0x40) *p++ = rex;
+         *p++ = toUChar(opc_imm);
+         p = doAMode_R_enc_reg(p, subopc, i->Ain.Sh32.dst);
+         *p++ = (UChar)(i->Ain.Sh32.src);
+         goto done;
+      }
+      break;
+
    case Ain_Test64:
       /* testq sign-extend($imm32), %reg */
       *p++ = rexAMode_R_enc_reg(0, i->Ain.Test64.dst);
diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h
index 3dfa9fbf5c..e2ed2613bf 100644
--- a/VEX/priv/host_amd64_defs.h
+++ b/VEX/priv/host_amd64_defs.h
@@ -359,7 +359,8 @@ typedef
       Ain_Imm64,       /* Generate 64-bit literal to register */
       Ain_Alu64R,      /* 64-bit mov/arith/logical, dst=REG */
       Ain_Alu64M,      /* 64-bit mov/arith/logical, dst=MEM */
-      Ain_Sh64,        /* 64-bit shift/rotate, dst=REG or MEM */
+      Ain_Sh64,        /* 64-bit shift, dst=REG */
+      Ain_Sh32,        /* 32-bit shift, dst=REG */
       Ain_Test64,      /* 64-bit test (AND, set flags, discard result) */
       Ain_Unary64,     /* 64-bit not and neg */
       Ain_Lea64,       /* 64-bit compute EA into a reg */
@@ -441,6 +442,11 @@ typedef
             UInt         src;  /* shift amount, or 0 means %cl */
             HReg         dst;
          } Sh64;
+         struct {
+            AMD64ShiftOp op;
+            UInt         src;  /* shift amount, or 0 means %cl */
+            HReg         dst;
+         } Sh32;
          struct {
             UInt   imm32;
             HReg   dst;
@@ -744,6 +750,7 @@ extern AMD64Instr* AMD64Instr_Unary64    ( AMD64UnaryOp op, HReg dst );
 extern AMD64Instr* AMD64Instr_Lea64      ( AMD64AMode* am, HReg dst );
 extern AMD64Instr* AMD64Instr_Alu32R     ( AMD64AluOp, AMD64RMI*, HReg );
 extern AMD64Instr* AMD64Instr_Sh64       ( AMD64ShiftOp, UInt, HReg );
+extern AMD64Instr* AMD64Instr_Sh32       ( AMD64ShiftOp, UInt, HReg );
 extern AMD64Instr* AMD64Instr_Test64     ( UInt imm32, HReg dst );
 extern AMD64Instr* AMD64Instr_MulL       ( Bool syned, AMD64RM* );
 extern AMD64Instr* AMD64Instr_Div        ( Bool syned, Int sz, AMD64RM* );
diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c
index dfaabb4689..6b70e54789 100644
--- a/VEX/priv/host_amd64_isel.c
+++ b/VEX/priv/host_amd64_isel.c
@@ -1030,9 +1030,12 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e )
          HReg regL   = iselIntExpr_R(env, e->Iex.Binop.arg1);
          addInstr(env, mk_iMOVsd_RR(regL,dst));
 
-         /* Do any necessary widening for 32/16/8 bit operands */
+         /* Do any necessary widening for 16/8 bit operands.  Also decide on the
+            final width at which the shift is to be done. */
+         Bool shift64 = False;
          switch (e->Iex.Binop.op) {
             case Iop_Shr64: case Iop_Shl64: case Iop_Sar64: 
+               shift64 = True;
                break;
             case Iop_Shl32: case Iop_Shl16: case Iop_Shl8:
                break;
@@ -1045,18 +1048,16 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e )
                                 Aalu_AND, AMD64RMI_Imm(0xFFFF), dst));
                break;
             case Iop_Shr32:
-               addInstr(env, AMD64Instr_MovxLQ(False, dst, dst));
                break;
             case Iop_Sar8:
-               addInstr(env, AMD64Instr_Sh64(Ash_SHL, 56, dst));
-               addInstr(env, AMD64Instr_Sh64(Ash_SAR, 56, dst));
+               addInstr(env, AMD64Instr_Sh32(Ash_SHL, 24, dst));
+               addInstr(env, AMD64Instr_Sh32(Ash_SAR, 24, dst));
                break;
             case Iop_Sar16:
-               addInstr(env, AMD64Instr_Sh64(Ash_SHL, 48, dst));
-               addInstr(env, AMD64Instr_Sh64(Ash_SAR, 48, dst));
+               addInstr(env, AMD64Instr_Sh32(Ash_SHL, 16, dst));
+               addInstr(env, AMD64Instr_Sh32(Ash_SAR, 16, dst));
                break;
             case Iop_Sar32:
-               addInstr(env, AMD64Instr_MovxLQ(True, dst, dst));
                break;
             default: 
                ppIROp(e->Iex.Binop.op);
@@ -1071,14 +1072,23 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, const IRExpr* e )
             vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
             nshift = e->Iex.Binop.arg2->Iex.Const.con->Ico.U8;
             vassert(nshift >= 0);
-            if (nshift > 0)
+            if (nshift > 0) {
                /* Can't allow nshift==0 since that means %cl */
-               addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst));
+               if (shift64) {
+                  addInstr(env, AMD64Instr_Sh64(shOp, nshift, dst));
+               } else {
+                  addInstr(env, AMD64Instr_Sh32(shOp, nshift, dst));
+               }
+            }
          } else {
             /* General case; we have to force the amount into %cl. */
             HReg regR = iselIntExpr_R(env, e->Iex.Binop.arg2);
             addInstr(env, mk_iMOVsd_RR(regR,hregAMD64_RCX()));
-            addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst));
+            if (shift64) {
+               addInstr(env, AMD64Instr_Sh64(shOp, 0/* %cl */, dst));
+            } else {
+               addInstr(env, AMD64Instr_Sh32(shOp, 0/* %cl */, dst));
+            }
          }
          return dst;
       }