AMD64 front end: translate AVX2 PMASKMOV load instructions (vector

author Julian Seward <jseward@acm.org>

Tue, 27 Jan 2015 23:17:02 +0000 (23:17 +0000)

committer Julian Seward <jseward@acm.org>

Tue, 27 Jan 2015 23:17:02 +0000 (23:17 +0000)
author Julian Seward <jseward@acm.org>
Tue, 27 Jan 2015 23:17:02 +0000 (23:17 +0000)
committer Julian Seward <jseward@acm.org>
Tue, 27 Jan 2015 23:17:02 +0000 (23:17 +0000)
diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c

index f17f41bb7ac277dd7f9eaa51745775f3f7426686..d0c223324aae6b4dcacef9eb6308c9c530964a8e 100644 (file)
--- a/VEX/priv/guest_amd64_toIR.c
+++ b/VEX/priv/guest_amd64_toIR.c
@@ -27255,6 +27255,9 @@ static ULong dis_VMASKMOV_load ( Bool *uses_vvvv, const VexAbiInfo* vbi,
     }
     delta += alen;
  
+   for (i = 0; i < sizeof(res)/sizeof(res[0]); i++)
+      res[i] = IRTemp_INVALID;
+
     for (i = 0; i < 2 * (isYMM ? 2 : 1) * (ty == Ity_I32 ? 2 : 1); i++) {
        res[i] = newTemp(ty);
        cond = newTemp(Ity_I1);
@@ -27263,19 +27266,15 @@ static ULong dis_VMASKMOV_load ( Bool *uses_vvvv, const VexAbiInfo* vbi,
                      ty == Ity_I32 ? getYMMRegLane32( rV, i )
                                    : getYMMRegLane64( rV, i ),
                      mkU(ty, 0) ));
-      assign( res[i],
-              IRExpr_ITE(
-                 mkexpr(cond),
-                 loadLE(ty, IRExpr_ITE(
-                               mkexpr(cond),
-                               binop(Iop_Add64, mkexpr(addr),
-                                     mkU64(i*(ty == Ity_I32 ? 4 : 8))),
-                               getIReg64(R_RSP)
-                            )
-                       ),
-                 mkU(ty, 0)
-              )
-            );
+      stmt(
+         IRStmt_LoadG(
+            Iend_LE,
+            ty == Ity_I32 ? ILGop_Ident32 : ILGop_Ident64,
+            res[i], 
+            binop(Iop_Add64, mkexpr(addr), mkU64(i * (ty == Ity_I32 ? 4 : 8))),
+            ty == Ity_I32 ? mkU32(0) : mkU64(0),
+            mkexpr(cond)
+      ));
     }
     switch (ty) {
        case Ity_I32:
diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c

index 7118e2ba8e6a9db664bce36cea2bda03ab5b222f..13925fab3f78bd4e89c495ca01b5dc09e79d8914 100644 (file)
--- a/VEX/priv/host_amd64_defs.c
+++ b/VEX/priv/host_amd64_defs.c
@@ -745,6 +745,17 @@ AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, AMD64RM* src, HReg dst ) {
     vassert(cond != Acc_ALWAYS);
     return i;
  }
+AMD64Instr* AMD64Instr_CLoad ( AMD64CondCode cond, UChar szB,
+                               AMD64AMode* addr, HReg dst ) {
+   AMD64Instr* i     = LibVEX_Alloc(sizeof(AMD64Instr));
+   i->tag            = Ain_CLoad;
+   i->Ain.CLoad.cond = cond;
+   i->Ain.CLoad.szB  = szB;
+   i->Ain.CLoad.addr = addr;
+   i->Ain.CLoad.dst  = dst;
+   vassert(cond != Acc_ALWAYS);
+   return i;
+}
  AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst ) {
     AMD64Instr* i       = LibVEX_Alloc(sizeof(AMD64Instr));
     i->tag              = Ain_MovxLQ;
@@ -1121,6 +1132,16 @@ void ppAMD64Instr ( const AMD64Instr* i, Bool mode64 )
           vex_printf(",");
           ppHRegAMD64(i->Ain.CMov64.dst);
           return;
+      case Ain_CLoad:
+         vex_printf("if (%%rflags.%s) { ",
+                    showAMD64CondCode(i->Ain.CLoad.cond));
+         vex_printf("mov%c (", i->Ain.CLoad.szB == 4 ? 'l' : 'q');
+         ppAMD64AMode(i->Ain.CLoad.addr);
+         vex_printf("), ");
+         (i->Ain.CLoad.szB == 4 ? ppHRegAMD64_lo32 : ppHRegAMD64)
+            (i->Ain.CLoad.dst);
+         vex_printf(" }");
+         return;
        case Ain_MovxLQ:
           vex_printf("mov%clq ", i->Ain.MovxLQ.syned ? 's' : 'z');
           ppHRegAMD64_lo32(i->Ain.MovxLQ.src);
@@ -1463,6 +1484,10 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, const AMD64Instr* i, Bool mode64 )
           addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead);
           addHRegUse(u, HRmModify, i->Ain.CMov64.dst);
           return;
+      case Ain_CLoad:
+         addRegUsage_AMD64AMode(u, i->Ain.CLoad.addr);
+         addHRegUse(u, HRmModify, i->Ain.CLoad.dst);
+         return;
        case Ain_MovxLQ:
           addHRegUse(u, HRmRead,  i->Ain.MovxLQ.src);
           addHRegUse(u, HRmWrite, i->Ain.MovxLQ.dst);
@@ -1695,6 +1720,10 @@ void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i, Bool mode64 )
           mapRegs_AMD64RM(m, i->Ain.CMov64.src);
           mapReg(m, &i->Ain.CMov64.dst);
           return;
+      case Ain_CLoad:
+         mapRegs_AMD64AMode(m, i->Ain.CLoad.addr);
+         mapReg(m, &i->Ain.CLoad.dst);
+         return;
        case Ain_MovxLQ:
           mapReg(m, &i->Ain.MovxLQ.src);
           mapReg(m, &i->Ain.MovxLQ.dst);
@@ -2671,43 +2700,113 @@ Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
        }
  
     case Ain_Call: {
-      if (i->Ain.Call.cond != Acc_ALWAYS
-          && i->Ain.Call.rloc.pri != RLPri_None) {
-         /* The call might not happen (it isn't unconditional) and it
-            returns a result.  In this case we will need to generate a
-            control flow diamond to put 0x555..555 in the return
-            register(s) in the case where the call doesn't happen.  If
-            this ever becomes necessary, maybe copy code from the ARM
-            equivalent.  Until that day, just give up. */
-         goto bad;
-      }
-      /* As per detailed comment for Ain_Call in
-         getRegUsage_AMD64Instr above, %r11 is used as an address
-         temporary. */
-      /* jump over the following two insns if the condition does not
-         hold */
-      Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
-      if (i->Ain.Call.cond != Acc_ALWAYS) {
-         *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
-         *p++ = shortImm ? 10 : 13;
-         /* 10 or 13 bytes in the next two insns */
-      }
-      if (shortImm) {
-         /* 7 bytes: movl sign-extend(imm32), %r11 */
-         *p++ = 0x49;
-         *p++ = 0xC7;
-         *p++ = 0xC3;
-         p = emit32(p, (UInt)i->Ain.Call.target);
+      /* As per detailed comment for Ain_Call in getRegUsage_AMD64Instr
+         above, %r11 is used as an address temporary. */
+      /* If we don't need to do any fixup actions in the case that the
+         call doesn't happen, just do the simple thing and emit
+         straight-line code.  This is usually the case. */
+      if (i->Ain.Call.cond == Acc_ALWAYS/*call always happens*/
+          || i->Ain.Call.rloc.pri == RLPri_None/*no fixup action*/) {
+         /* jump over the following two insns if the condition does
+            not hold */
+         Bool shortImm = fitsIn32Bits(i->Ain.Call.target);
+         if (i->Ain.Call.cond != Acc_ALWAYS) {
+            *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
+            *p++ = shortImm ? 10 : 13;
+            /* 10 or 13 bytes in the next two insns */
+         }
+         if (shortImm) {
+            /* 7 bytes: movl sign-extend(imm32), %r11 */
+            *p++ = 0x49;
+            *p++ = 0xC7;
+            *p++ = 0xC3;
+            p = emit32(p, (UInt)i->Ain.Call.target);
+         } else {
+            /* 10 bytes: movabsq $target, %r11 */
+            *p++ = 0x49;
+            *p++ = 0xBB;
+            p = emit64(p, i->Ain.Call.target);
+         }
+         /* 3 bytes: call *%r11 */
+         *p++ = 0x41;
+         *p++ = 0xFF;
+         *p++ = 0xD3;
        } else {
-         /* 10 bytes: movabsq $target, %r11 */
+         Int delta;
+         /* Complex case.  We have to generate an if-then-else diamond. */
+         // before:
+         //   j{!cond} else:
+         //   movabsq $target, %r11
+         //   call* %r11
+         // preElse:
+         //   jmp after:
+         // else:
+         //   movabsq $0x5555555555555555, %rax  // possibly
+         //   movq %rax, %rdx                    // possibly
+         // after:
+
+         // before:
+         UChar* pBefore = p;
+
+         //   j{!cond} else:
+         *p++ = toUChar(0x70 + (0xF & (i->Ain.Call.cond ^ 1)));
+         *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
+
+         //   movabsq $target, %r11
           *p++ = 0x49;
           *p++ = 0xBB;
           p = emit64(p, i->Ain.Call.target);
+
+         //   call* %r11
+         *p++ = 0x41;
+         *p++ = 0xFF;
+         *p++ = 0xD3;
+
+         // preElse:
+         UChar* pPreElse = p;
+
+         //   jmp after:
+         *p++ = 0xEB;
+         *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
+
+         // else:
+         UChar* pElse = p;
+
+         /* Do the 'else' actions */
+         switch (i->Ain.Call.rloc.pri) {
+            case RLPri_Int:
+               // movabsq $0x5555555555555555, %rax
+               *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
+               break;
+            case RLPri_2Int:
+               vassert(0); //ATC
+               // movabsq $0x5555555555555555, %rax
+               *p++ = 0x48; *p++ = 0xB8; p = emit64(p, 0x5555555555555555ULL);
+               // movq %rax, %rdx
+               *p++ = 0x48; *p++ = 0x89; *p++ = 0xC2;
+            case RLPri_None: case RLPri_INVALID: default:
+               vassert(0);
+         }
+
+         // after:
+         UChar* pAfter = p;
+
+         // Fix up the branch offsets.  The +2s in the offset
+         // calculations are there because x86 requires conditional
+         // branches to have their offset stated relative to the
+         // instruction immediately following the branch insn.  And in
+         // both cases the branch insns are 2 bytes long.
+
+         // First, the "j{!cond} else:" at pBefore.
+         delta = (Int)(Long)(pElse - (pBefore + 2));
+         vassert(delta >= 0 && delta < 100/*arbitrary*/);
+         *(pBefore+1) = (UChar)delta;
+
+         // And secondly, the "jmp after:" at pPreElse.
+         delta = (Int)(Long)(pAfter - (pPreElse + 2));
+         vassert(delta >= 0 && delta < 100/*arbitrary*/);
+         *(pPreElse+1) = (UChar)delta;
        }
-      /* 3 bytes: call *%r11 */
-      *p++ = 0x41;
-      *p++ = 0xFF;
-      *p++ = 0xD3;
        goto done;
     }
  
@@ -2917,6 +3016,35 @@ Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
        }
        break;
  
+   case Ain_CLoad: {
+      vassert(i->Ain.CLoad.cond != Acc_ALWAYS);
+
+      /* Only 32- or 64-bit variants are allowed. */
+      vassert(i->Ain.CLoad.szB == 4 || i->Ain.CLoad.szB == 8);
+
+      /* Use ptmp for backpatching conditional jumps. */
+      ptmp = NULL;
+
+      /* jmp fwds if !condition */
+      *p++ = toUChar(0x70 + (0xF & (i->Ain.CLoad.cond ^ 1)));
+      ptmp = p; /* fill in this bit later */
+      *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
+
+      /* Now the load.  Either a normal 64 bit load or a normal 32 bit
+         load, which, by the default zero-extension rule, zeroes out
+         the upper half of the destination, as required. */
+      rex = rexAMode_M(i->Ain.CLoad.dst, i->Ain.CLoad.addr);
+      *p++ = i->Ain.CLoad.szB == 4 ? clearWBit(rex) : rex;
+      *p++ = 0x8B;
+      p = doAMode_M(p, i->Ain.CLoad.dst, i->Ain.CLoad.addr);
+
+      /* Fix up the conditional branch */
+      Int delta = p - ptmp;
+      vassert(delta > 0 && delta < 40);
+      *ptmp = toUChar(delta-1);
+      goto done;
+   }
+
     case Ain_MovxLQ:
        /* No, _don't_ ask me why the sense of the args has to be
           different in the S vs Z case.  I don't know. */
diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h

index 09abfe0f7f323d755427238ae0822f28834f6625..02c89e2434590acc6026d182aa2a7077dd35e2c4 100644 (file)
--- a/VEX/priv/host_amd64_defs.h
+++ b/VEX/priv/host_amd64_defs.h
@@ -368,6 +368,7 @@ typedef
        Ain_XIndir,      /* indirect transfer to GA */
        Ain_XAssisted,   /* assisted transfer to GA */
        Ain_CMov64,      /* conditional move */
+      Ain_CLoad,       /* cond. load to int reg, 32 bit ZX or 64 bit only */
        Ain_MovxLQ,      /* reg-reg move, zx-ing/sx-ing top half */
        Ain_LoadEX,      /* mov{s,z}{b,w,l}q from mem to reg */
        Ain_Store,       /* store 32/16/8 bit value in memory */
@@ -505,6 +506,14 @@ typedef
              AMD64RM*      src;
              HReg          dst;
           } CMov64;
+         /* conditional load to int reg, 32 bit ZX or 64 bit only.
+            cond may not be Acc_ALWAYS. */
+         struct {
+            AMD64CondCode cond;
+            UChar         szB; /* 4 or 8 only */
+            AMD64AMode*   addr;
+            HReg          dst;
+         } CLoad;
           /* reg-reg move, sx-ing/zx-ing top half */
           struct {
              Bool syned;
@@ -710,6 +719,8 @@ extern AMD64Instr* AMD64Instr_XIndir     ( HReg dstGA, AMD64AMode* amRIP,
  extern AMD64Instr* AMD64Instr_XAssisted  ( HReg dstGA, AMD64AMode* amRIP,
                                             AMD64CondCode cond, IRJumpKind jk );
  extern AMD64Instr* AMD64Instr_CMov64     ( AMD64CondCode, AMD64RM* src, HReg dst );
+extern AMD64Instr* AMD64Instr_CLoad      ( AMD64CondCode cond, UChar szB,
+                                           AMD64AMode* addr, HReg dst );
  extern AMD64Instr* AMD64Instr_MovxLQ     ( Bool syned, HReg src, HReg dst );
  extern AMD64Instr* AMD64Instr_LoadEX     ( UChar szSmall, Bool syned,
                                             AMD64AMode* src, HReg dst );
diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c

index e6ca6f11f4bf42e30d7fc4d9bc44c118d0c4e3fb..792629455e424585d030bbddced378c898b2cc96 100644 (file)
--- a/VEX/priv/host_amd64_isel.c
+++ b/VEX/priv/host_amd64_isel.c
@@ -4288,6 +4288,32 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt )
  
     switch (stmt->tag) {
  
+   /* --------- LOADG (guarded load) --------- */
+   case Ist_LoadG: {
+      IRLoadG* lg = stmt->Ist.LoadG.details;
+      if (lg->end != Iend_LE)
+         goto stmt_fail;
+
+      UChar szB = 0; /* invalid */
+      switch (lg->cvt) {
+         case ILGop_Ident32: szB = 4; break;
+         case ILGop_Ident64: szB = 8; break;
+         default: break;
+      }
+      if (szB == 0)
+         goto stmt_fail;
+
+      AMD64AMode* amAddr = iselIntExpr_AMode(env, lg->addr);
+      HReg rAlt  = iselIntExpr_R(env, lg->alt);
+      HReg rDst  = lookupIRTemp(env, lg->dst);
+      /* Get the alt value into the dst.  We'll do a conditional load
+         which overwrites it -- or not -- with loaded data. */
+      addInstr(env, mk_iMOVsd_RR(rAlt, rDst));
+      AMD64CondCode cc = iselCondCode(env, lg->guard);
+      addInstr(env, AMD64Instr_CLoad(cc, szB, amAddr, rDst));
+      return;
+   }
+
     /* --------- STORE --------- */
     case Ist_Store: {
        IRType    tya   = typeOfIRExpr(env->type_env, stmt->Ist.Store.addr);
author	Julian Seward <jseward@acm.org>
	Tue, 27 Jan 2015 23:17:02 +0000 (23:17 +0000)
committer	Julian Seward <jseward@acm.org>
	Tue, 27 Jan 2015 23:17:02 +0000 (23:17 +0000)
VEX/priv/guest_amd64_toIR.c		patch \| blob \| blame \| history
VEX/priv/host_amd64_defs.c		patch \| blob \| blame \| history
VEX/priv/host_amd64_defs.h		patch \| blob \| blame \| history
VEX/priv/host_amd64_isel.c		patch \| blob \| blame \| history