]> git.ipfire.org Git - thirdparty/valgrind.git/commitdiff
Implement the SSE4.1 insn PCMPEQQ. n-i-bz. (VEX side changes)
authorJulian Seward <jseward@acm.org>
Wed, 19 Oct 2011 15:24:01 +0000 (15:24 +0000)
committerJulian Seward <jseward@acm.org>
Wed, 19 Oct 2011 15:24:01 +0000 (15:24 +0000)
** MERGE TO AVX **

git-svn-id: svn://svn.valgrind.org/vex/trunk@2218

VEX/priv/guest_amd64_toIR.c
VEX/priv/host_amd64_isel.c
VEX/priv/host_generic_simd128.c
VEX/priv/host_generic_simd128.h
VEX/priv/ir_defs.c
VEX/pub/libvex_ir.h

index c6ad6a1ed46f87e5d75fc6cbe7225cf0c76f3116..9fbe6647249306fc818816b2e759fc790df097ac 100644 (file)
@@ -16135,6 +16135,17 @@ DisResult disInstr_AMD64_WRK (
       goto decode_success;
    }
 
+   /* 66 0F 38 29 = PCMPEQQ
+      64x2 equality comparison
+   */
+   if ( have66noF2noF3( pfx ) && sz == 2 
+        && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x29) {
+      /* FIXME: this needs an alignment check */
+      delta = dis_SSEint_E_to_G( vbi, pfx, delta+3, 
+                                 "pcmpeqq", Iop_CmpEQ64x2, False );
+      goto decode_success;
+   }
+
    /* ---------------------------------------------------- */
    /* --- end of the SSE4 decoder                      --- */
    /* ---------------------------------------------------- */
index c887ca3a9024a718b81f0e85f503c1fb620493d0..86c089d1a3d8ff31265da3ca88f0ca34d6dac605 100644 (file)
@@ -3658,6 +3658,8 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
                            goto do_SseAssistedBinary;
       case Iop_Min8Sx16:   fn = (HWord)h_generic_calc_Min8Sx16;
                            goto do_SseAssistedBinary;
+      case Iop_CmpEQ64x2:  fn = (HWord)h_generic_calc_CmpEQ64x2;
+                           goto do_SseAssistedBinary;
       case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2;
                            goto do_SseAssistedBinary;
       case Iop_QNarrowBin32Sto16Ux8:
index 4fd1df58abef08d27a27eed257ed8ee49702a043..e29a2eb554a4ca411b742bc9da9ef470c83995d0 100644 (file)
@@ -88,6 +88,12 @@ static inline UChar min8S ( Char xx, Char yy )
    return toUChar((xx < yy) ? xx : yy);
 }
 
+static inline ULong cmpEQ64 ( Long xx, Long yy )
+{
+   return (((Long)xx) == ((Long)yy))
+             ? 0xFFFFFFFFFFFFFFFFULL : 0ULL;
+}
+
 static inline ULong cmpGT64S ( Long xx, Long yy )
 {
    return (((Long)xx) > ((Long)yy))
@@ -225,6 +231,13 @@ void h_generic_calc_Min8Sx16 ( /*OUT*/V128* res,
    res->w8[15] = min8S(argL->w8[15], argR->w8[15]);
 }
 
+void h_generic_calc_CmpEQ64x2 ( /*OUT*/V128* res,
+                                V128* argL, V128* argR )
+{
+   res->w64[0] = cmpEQ64(argL->w64[0], argR->w64[0]);
+   res->w64[1] = cmpEQ64(argL->w64[1], argR->w64[1]);
+}
+
 void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res,
                                  V128* argL, V128* argR )
 {
index ed8a7db68a856f23b11ae3353e16ec03fe01ea31..797617823cb74bddcf63f042614c2c6697b5f0e6 100644 (file)
@@ -57,6 +57,7 @@ extern void h_generic_calc_Max16Ux8   ( /*OUT*/V128*, V128*, V128* );
 extern void h_generic_calc_Min16Ux8   ( /*OUT*/V128*, V128*, V128* );
 extern void h_generic_calc_Max8Sx16   ( /*OUT*/V128*, V128*, V128* );
 extern void h_generic_calc_Min8Sx16   ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_CmpEQ64x2  ( /*OUT*/V128*, V128*, V128* );
 extern void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128*, V128*, V128* );
 extern void h_generic_calc_SarN64x2   ( /*OUT*/V128*, V128*, UInt );
 extern void h_generic_calc_SarN8x16   ( /*OUT*/V128*, V128*, UInt );
index c604ad20c4621d74d531aefadd64190e34f73540..13667d7c79e88a4f5ffb2ddaeb5bf54e89329b9c 100644 (file)
@@ -777,6 +777,7 @@ void ppIROp ( IROp op )
       case Iop_CmpEQ8x16:  vex_printf("CmpEQ8x16"); return;
       case Iop_CmpEQ16x8:  vex_printf("CmpEQ16x8"); return;
       case Iop_CmpEQ32x4:  vex_printf("CmpEQ32x4"); return;
+      case Iop_CmpEQ64x2:  vex_printf("CmpEQ64x2"); return;
       case Iop_CmpGT8Sx16: vex_printf("CmpGT8Sx16"); return;
       case Iop_CmpGT16Sx8: vex_printf("CmpGT16Sx8"); return;
       case Iop_CmpGT32Sx4: vex_printf("CmpGT32Sx4"); return;
@@ -2434,6 +2435,7 @@ void typeOfPrimop ( IROp op,
       case Iop_Min8Sx16: case Iop_Min16Sx8: case Iop_Min32Sx4:
       case Iop_Min8Ux16: case Iop_Min16Ux8: case Iop_Min32Ux4:
       case Iop_CmpEQ8x16:  case Iop_CmpEQ16x8:  case Iop_CmpEQ32x4:
+      case Iop_CmpEQ64x2:
       case Iop_CmpGT8Sx16: case Iop_CmpGT16Sx8: case Iop_CmpGT32Sx4:
       case Iop_CmpGT64Sx2:
       case Iop_CmpGT8Ux16: case Iop_CmpGT16Ux8: case Iop_CmpGT32Ux4:
index 435090fc2094d47d2356f83557110e6268644feb..f8324697f0a86be2f5e2b3bb50a3f21dec5c92b4 100644 (file)
@@ -1170,7 +1170,7 @@ typedef
       Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4,
 
       /* COMPARISON */
-      Iop_CmpEQ8x16,  Iop_CmpEQ16x8,  Iop_CmpEQ32x4,
+      Iop_CmpEQ8x16,  Iop_CmpEQ16x8,  Iop_CmpEQ32x4,  Iop_CmpEQ64x2,
       Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2,
       Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4,