From: Julian Seward <jseward@acm.org>
Date: Thu, 21 Jun 2012 08:53:48 +0000 (+0000)
Subject: Add support for
X-Git-Tag: svn/VALGRIND_3_8_1^2~80
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6db26d38793f2ed4eff6cb8f92faee29d24c2129;p=thirdparty%2Fvalgrind.git

Add support for

VTESTPS xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0E /r
VTESTPS ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0E /r
VTESTPD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0F /r
VTESTPD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0F /r
VPTEST xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 17 /r
VPTEST ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 17 /r

(Jakub Jelinek, jakub@redhat.com), #273475 comment 127.


git-svn-id: svn://svn.valgrind.org/vex/trunk@2398
---

diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c
index e2ea02b223..dba5697c75 100644
--- a/VEX/priv/guest_amd64_toIR.c
+++ b/VEX/priv/guest_amd64_toIR.c
@@ -15071,6 +15071,215 @@ static IRTemp math_PBLENDVB_128 ( IRTemp vecE, IRTemp vecG,
    return res;
 }
 
+static void finish_xTESTy ( IRTemp andV, IRTemp andnV, Int sign )
+{
+   /* Set Z=1 iff (vecE & vecG) == 0
+      Set C=1 iff (vecE & not vecG) == 0
+   */
+
+   /* andV, andnV:  vecE & vecG,  vecE and not(vecG) */
+
+   /* andV resp. andnV, reduced to 64-bit values, by or-ing the top
+      and bottom 64-bits together.  It relies on this trick:
+
+      InterleaveLO64x2([a,b],[c,d]) == [b,d]    hence
+
+      InterleaveLO64x2([a,b],[a,b]) == [b,b]    and similarly
+      InterleaveHI64x2([a,b],[a,b]) == [a,a] 
+
+      and so the OR of the above 2 exprs produces
+      [a OR b, a OR b], from which we simply take the lower half.
+   */
+   IRTemp and64  = newTemp(Ity_I64);
+   IRTemp andn64 = newTemp(Ity_I64);
+
+   assign(and64,
+          unop(Iop_V128to64,
+               binop(Iop_OrV128,
+                     binop(Iop_InterleaveLO64x2,
+                           mkexpr(andV), mkexpr(andV)),
+                     binop(Iop_InterleaveHI64x2,
+                           mkexpr(andV), mkexpr(andV)))));
+
+   assign(andn64,
+          unop(Iop_V128to64,
+               binop(Iop_OrV128,
+                     binop(Iop_InterleaveLO64x2,
+                           mkexpr(andnV), mkexpr(andnV)),
+                     binop(Iop_InterleaveHI64x2,
+                           mkexpr(andnV), mkexpr(andnV)))));
+
+   IRTemp z64 = newTemp(Ity_I64);
+   IRTemp c64 = newTemp(Ity_I64);
+   if (sign == 64) {
+      /* When only interested in the most significant bit, just shift
+         arithmetically right and negate.  */
+      assign(z64,
+             unop(Iop_Not64,
+                  binop(Iop_Sar64, mkexpr(and64), mkU8(63))));
+
+      assign(c64,
+             unop(Iop_Not64,
+                  binop(Iop_Sar64, mkexpr(andn64), mkU8(63))));
+   } else {
+      if (sign == 32) {
+         /* When interested in bit 31 and bit 63, mask those bits and
+            fallthrough into the PTEST handling.  */
+         IRTemp t0 = newTemp(Ity_I64);
+         IRTemp t1 = newTemp(Ity_I64);
+         IRTemp t2 = newTemp(Ity_I64);
+         assign(t0, mkU64(0x8000000080000000ULL));
+         assign(t1, binop(Iop_And64, mkexpr(and64), mkexpr(t0)));
+         assign(t2, binop(Iop_And64, mkexpr(andn64), mkexpr(t0)));
+         and64 = t1;
+         andn64 = t2;
+      }
+      /* Now convert and64, andn64 to all-zeroes or all-1s, so we can
+         slice out the Z and C bits conveniently.  We use the standard
+         trick all-zeroes -> all-zeroes, anything-else -> all-ones
+         done by "(x | -x) >>s (word-size - 1)".
+      */
+      assign(z64,
+             unop(Iop_Not64,
+                  binop(Iop_Sar64,
+                        binop(Iop_Or64,
+                              binop(Iop_Sub64, mkU64(0), mkexpr(and64)),
+                                    mkexpr(and64)), mkU8(63))));
+
+      assign(c64,
+             unop(Iop_Not64,
+                  binop(Iop_Sar64,
+                        binop(Iop_Or64,
+                              binop(Iop_Sub64, mkU64(0), mkexpr(andn64)),
+                                    mkexpr(andn64)), mkU8(63))));
+   }
+
+   /* And finally, slice out the Z and C flags and set the flags
+      thunk to COPY for them.  OSAP are set to zero. */
+   IRTemp newOSZACP = newTemp(Ity_I64);
+   assign(newOSZACP, 
+          binop(Iop_Or64,
+                binop(Iop_And64, mkexpr(z64), mkU64(AMD64G_CC_MASK_Z)),
+                binop(Iop_And64, mkexpr(c64), mkU64(AMD64G_CC_MASK_C))));
+
+   stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newOSZACP)));
+   stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(AMD64G_CC_OP_COPY) ));
+   stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
+   stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
+}
+
+
+/* Handles 128 bit versions of PTEST, VTESTPS or VTESTPD.
+   sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
+static Long dis_xTESTy_128 ( VexAbiInfo* vbi, Prefix pfx,
+                             Long delta, Bool isAvx, Int sign )
+{
+   IRTemp addr   = IRTemp_INVALID;
+   Int    alen   = 0;
+   HChar  dis_buf[50];
+   UChar  modrm  = getUChar(delta);
+   UInt   rG     = gregOfRexRM(pfx, modrm);
+   IRTemp vecE = newTemp(Ity_V128);
+   IRTemp vecG = newTemp(Ity_V128);
+
+   if ( epartIsReg(modrm) ) {
+      UInt rE = eregOfRexRM(pfx, modrm);
+      assign(vecE, getXMMReg(rE));
+      delta += 1;
+      DIP( "%s%stest%s %s,%s\n",
+           isAvx ? "v" : "", sign == 0 ? "p" : "",
+           sign == 0 ? "" : sign == 32 ? "ps" : "pd",
+           nameXMMReg(rE), nameXMMReg(rG) );
+   } else {
+      addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
+      if (!isAvx)
+         gen_SEGV_if_not_16_aligned( addr );
+      assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
+      delta += alen;
+      DIP( "%s%stest%s %s,%s\n",
+           isAvx ? "v" : "", sign == 0 ? "p" : "",
+           sign == 0 ? "" : sign == 32 ? "ps" : "pd",
+           dis_buf, nameXMMReg(rG) );
+   }
+
+   assign(vecG, getXMMReg(rG));
+
+   /* Set Z=1 iff (vecE & vecG) == 0
+      Set C=1 iff (vecE & not vecG) == 0
+   */
+
+   /* andV, andnV:  vecE & vecG,  vecE and not(vecG) */
+   IRTemp andV  = newTemp(Ity_V128);
+   IRTemp andnV = newTemp(Ity_V128);
+   assign(andV,  binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG)));
+   assign(andnV, binop(Iop_AndV128,
+                       mkexpr(vecE),
+                       binop(Iop_XorV128, mkexpr(vecG),
+                                          mkV128(0xFFFF))));
+
+   finish_xTESTy ( andV, andnV, sign );
+   return delta;
+}
+
+
+/* Handles 256 bit versions of PTEST, VTESTPS or VTESTPD.
+   sign is 0 for PTEST insn, 32 for VTESTPS and 64 for VTESTPD. */
+static Long dis_xTESTy_256 ( VexAbiInfo* vbi, Prefix pfx,
+                             Long delta, Int sign )
+{
+   IRTemp addr   = IRTemp_INVALID;
+   Int    alen   = 0;
+   HChar  dis_buf[50];
+   UChar  modrm  = getUChar(delta);
+   UInt   rG     = gregOfRexRM(pfx, modrm);
+   IRTemp vecE   = newTemp(Ity_V256);
+   IRTemp vecG   = newTemp(Ity_V256);
+
+   if ( epartIsReg(modrm) ) {
+      UInt rE = eregOfRexRM(pfx, modrm);
+      assign(vecE, getYMMReg(rE));
+      delta += 1;
+      DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "",
+           sign == 0 ? "" : sign == 32 ? "ps" : "pd",
+           nameYMMReg(rE), nameYMMReg(rG) );
+   } else {
+      addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
+      assign(vecE, loadLE( Ity_V256, mkexpr(addr) ));
+      delta += alen;
+      DIP( "v%stest%s %s,%s\n", sign == 0 ? "p" : "",
+           sign == 0 ? "" : sign == 32 ? "ps" : "pd",
+           dis_buf, nameYMMReg(rG) );
+   }
+
+   assign(vecG, getYMMReg(rG));
+
+   /* Set Z=1 iff (vecE & vecG) == 0
+      Set C=1 iff (vecE & not vecG) == 0
+   */
+
+   /* andV, andnV:  vecE & vecG,  vecE and not(vecG) */
+   IRTemp andV  = newTemp(Ity_V256);
+   IRTemp andnV = newTemp(Ity_V256);
+   assign(andV,  binop(Iop_AndV256, mkexpr(vecE), mkexpr(vecG)));
+   assign(andnV, binop(Iop_AndV256,
+                       mkexpr(vecE), unop(Iop_NotV256, mkexpr(vecG))));
+
+   IRTemp andVhi  = IRTemp_INVALID;
+   IRTemp andVlo  = IRTemp_INVALID;
+   IRTemp andnVhi = IRTemp_INVALID;
+   IRTemp andnVlo = IRTemp_INVALID;
+   breakupV256toV128s( andV, &andVhi, &andVlo );
+   breakupV256toV128s( andnV, &andnVhi, &andnVlo );
+
+   IRTemp andV128  = newTemp(Ity_V128);
+   IRTemp andnV128 = newTemp(Ity_V128);
+   assign( andV128, binop( Iop_OrV128, mkexpr(andVhi), mkexpr(andVlo) ) );
+   assign( andnV128, binop( Iop_OrV128, mkexpr(andnVhi), mkexpr(andnVlo) ) );
+
+   finish_xTESTy ( andV128, andnV128, sign );
+   return delta;
+}
+
 
 /* Handles 128 bit versions of PMOVZXBW and PMOVSXBW. */
 static Long dis_PMOVxXBW_128 ( VexAbiInfo* vbi, Prefix pfx,
@@ -15363,116 +15572,7 @@ Long dis_ESC_0F38__SSE4 ( Bool* decode_OK,
          Logical compare (set ZF and CF from AND/ANDN of the operands) */
       if (have66noF2noF3(pfx)
           && (sz == 2 || /* ignore redundant REX.W */ sz == 8)) {
-         modrm = getUChar(delta);
-         IRTemp vecE = newTemp(Ity_V128);
-         IRTemp vecG = newTemp(Ity_V128);
-
-         if ( epartIsReg(modrm) ) {
-            assign(vecE, getXMMReg(eregOfRexRM(pfx, modrm)));
-            delta += 1;
-            DIP( "ptest %s,%s\n", 
-                 nameXMMReg( eregOfRexRM(pfx, modrm) ),
-                 nameXMMReg( gregOfRexRM(pfx, modrm) ) );
-         } else {
-            addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 0 );
-            gen_SEGV_if_not_16_aligned( addr );
-            assign(vecE, loadLE( Ity_V128, mkexpr(addr) ));
-            delta += alen;
-            DIP( "ptest %s,%s\n",
-                 dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
-         }
-
-         assign(vecG, getXMMReg(gregOfRexRM(pfx, modrm)));
-
-         /* Set Z=1 iff (vecE & vecG) == 0
-            Set C=1 iff (vecE & not vecG) == 0
-         */
-
-         /* andV, andnV:  vecE & vecG,  vecE and not(vecG) */
-         IRTemp andV  = newTemp(Ity_V128);
-         IRTemp andnV = newTemp(Ity_V128);
-         assign(andV,  binop(Iop_AndV128, mkexpr(vecE), mkexpr(vecG)));
-         assign(andnV, binop(Iop_AndV128,
-                             mkexpr(vecE),
-                             binop(Iop_XorV128, mkexpr(vecG),
-                                                mkV128(0xFFFF))));
-
-         /* The same, but reduced to 64-bit values, by or-ing the top
-            and bottom 64-bits together.  It relies on this trick:
-
-             InterleaveLO64x2([a,b],[c,d]) == [b,d]    hence
-
-             InterleaveLO64x2([a,b],[a,b]) == [b,b]    and similarly
-             InterleaveHI64x2([a,b],[a,b]) == [a,a] 
-
-             and so the OR of the above 2 exprs produces
-             [a OR b, a OR b], from which we simply take the lower half.
-         */
-         IRTemp and64  = newTemp(Ity_I64);
-         IRTemp andn64 = newTemp(Ity_I64);
-   
-         assign(
-            and64,
-            unop(Iop_V128to64,
-                 binop(Iop_OrV128,
-                       binop(Iop_InterleaveLO64x2, mkexpr(andV), mkexpr(andV)),
-                       binop(Iop_InterleaveHI64x2, mkexpr(andV), mkexpr(andV))
-                 )
-            )
-         );
-
-         assign(
-            andn64,
-            unop(Iop_V128to64,
-                 binop(Iop_OrV128,
-                       binop(Iop_InterleaveLO64x2, mkexpr(andnV), mkexpr(andnV)),
-                       binop(Iop_InterleaveHI64x2, mkexpr(andnV), mkexpr(andnV))
-                 )
-             )
-          );
-
-         /* Now convert and64, andn64 to all-zeroes or all-1s, so we can
-            slice out the Z and C bits conveniently.  We use the standard
-            trick all-zeroes -> all-zeroes, anything-else -> all-ones
-            done by "(x | -x) >>s (word-size - 1)".
-         */
-         IRTemp z64 = newTemp(Ity_I64);
-         IRTemp c64 = newTemp(Ity_I64);
-         assign(z64,
-                unop(Iop_Not64,
-                     binop(Iop_Sar64,
-                           binop(Iop_Or64,
-                                 binop(Iop_Sub64, mkU64(0), mkexpr(and64)),
-                                 mkexpr(and64)
-                           ), 
-                           mkU8(63)))
-         );
-
-         assign(c64,
-                unop(Iop_Not64,
-                     binop(Iop_Sar64,
-                           binop(Iop_Or64,
-                                 binop(Iop_Sub64, mkU64(0), mkexpr(andn64)),
-                                 mkexpr(andn64)
-                           ),
-                           mkU8(63)))
-         );
-
-         /* And finally, slice out the Z and C flags and set the flags
-            thunk to COPY for them.  OSAP are set to zero. */
-         IRTemp newOSZACP = newTemp(Ity_I64);
-         assign(newOSZACP, 
-                binop(Iop_Or64,
-                      binop(Iop_And64, mkexpr(z64), mkU64(AMD64G_CC_MASK_Z)),
-                      binop(Iop_And64, mkexpr(c64), mkU64(AMD64G_CC_MASK_C))
-                )
-         );
-
-         stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(newOSZACP)));
-         stmt( IRStmt_Put( OFFB_CC_OP,   mkU64(AMD64G_CC_OP_COPY) ));
-         stmt( IRStmt_Put( OFFB_CC_DEP2, mkU64(0) ));
-         stmt( IRStmt_Put( OFFB_CC_NDEP, mkU64(0) ));
-
+         delta = dis_xTESTy_128( vbi, pfx, delta, False/*!isAvx*/, 0 );
          goto decode_success;
       }
       break;
@@ -23236,6 +23336,45 @@ Long dis_ESC_0F38__VEX (
       }
       break;
 
+   case 0x0E:
+      /* VTESTPS xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0E /r */
+      if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
+         delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 32 );
+         goto decode_success;
+      }
+      /* VTESTPS ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0E /r */
+      if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
+         delta = dis_xTESTy_256( vbi, pfx, delta, 32 );
+         goto decode_success;
+      }
+      break;
+
+   case 0x0F:
+      /* VTESTPD xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 0F /r */
+      if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
+         delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 64 );
+         goto decode_success;
+      }
+      /* VTESTPD ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 0F /r */
+      if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
+         delta = dis_xTESTy_256( vbi, pfx, delta, 64 );
+         goto decode_success;
+      }
+      break;
+
+   case 0x17:
+      /* VPTEST xmm2/m128, xmm1 = VEX.128.66.0F38.WIG 17 /r */
+      if (have66noF2noF3(pfx) && 0==getVexL(pfx)/*128*/) {
+         delta = dis_xTESTy_128( vbi, pfx, delta, True/*isAvx*/, 0 );
+         goto decode_success;
+      }
+      /* VPTEST ymm2/m256, ymm1 = VEX.256.66.0F38.WIG 17 /r */
+      if (have66noF2noF3(pfx) && 1==getVexL(pfx)/*256*/) {
+         delta = dis_xTESTy_256( vbi, pfx, delta, 0 );
+         goto decode_success;
+      }
+      break;
+
    case 0x18:
       /* VBROADCASTSS m32, xmm1 = VEX.128.66.0F38.WIG 18 /r */
       if (have66noF2noF3(pfx)