From: Julian Seward <jseward@acm.org>
Date: Sat, 22 Oct 2011 09:32:16 +0000 (+0000)
Subject: VEX side fixes to match r12190, which is a fix for #279698 (incorrect
X-Git-Tag: svn/VALGRIND_3_7_0^2~5
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=578a006a4aebbace309554b523f00a0041b34f2b;p=thirdparty%2Fvalgrind.git

VEX side fixes to match r12190, which is a fix for #279698 (incorrect
Memcheck handling of saturating narrowing operations.)


git-svn-id: svn://svn.valgrind.org/vex/trunk@2223
---

diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c
index 86c089d1a3..ca8bdb4e64 100644
--- a/VEX/priv/host_amd64_isel.c
+++ b/VEX/priv/host_amd64_isel.c
@@ -1100,6 +1100,10 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
             fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; break;
          case Iop_QNarrowBin16Sto8Ux8:
             fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; break;
+         case Iop_NarrowBin16to8x8:
+            fn = (HWord)h_generic_calc_NarrowBin16to8x8; break;
+         case Iop_NarrowBin32to16x4:
+            fn = (HWord)h_generic_calc_NarrowBin32to16x4; break;
 
          case Iop_QSub8Sx8:
             fn = (HWord)h_generic_calc_QSub8Sx8; break;
@@ -3665,6 +3669,12 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
       case Iop_QNarrowBin32Sto16Ux8:
                            fn = (HWord)h_generic_calc_QNarrowBin32Sto16Ux8;
                            goto do_SseAssistedBinary;
+      case Iop_NarrowBin16to8x16:
+                           fn = (HWord)h_generic_calc_NarrowBin16to8x16;
+                           goto do_SseAssistedBinary;
+      case Iop_NarrowBin32to16x8:
+                           fn = (HWord)h_generic_calc_NarrowBin32to16x8;
+                           goto do_SseAssistedBinary;
       do_SseAssistedBinary: {
          /* RRRufff!  RRRufff code is what we're generating here.  Oh
             well. */
diff --git a/VEX/priv/host_generic_simd128.c b/VEX/priv/host_generic_simd128.c
index e29a2eb554..e9d1437485 100644
--- a/VEX/priv/host_generic_simd128.c
+++ b/VEX/priv/host_generic_simd128.c
@@ -118,7 +118,19 @@ static inline UShort qnarrow32Sto16U ( UInt xx0 )
    return (UShort)xx;
 }
 
-void h_generic_calc_Mul32x4 ( /*OUT*/V128* res,
+static inline UShort narrow32to16 ( UInt xx )
+{
+   return (UShort)xx;
+}
+
+static inline UChar narrow16to8 ( UShort xx )
+{
+   return (UChar)xx;
+}
+
+
+void VEX_REGPARM(3)
+     h_generic_calc_Mul32x4 ( /*OUT*/V128* res,
                               V128* argL, V128* argR )
 {
    res->w32[0] = mul32(argL->w32[0], argR->w32[0]);
@@ -127,7 +139,8 @@ void h_generic_calc_Mul32x4 ( /*OUT*/V128* res,
    res->w32[3] = mul32(argL->w32[3], argR->w32[3]);
 }
 
-void h_generic_calc_Max32Sx4 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Max32Sx4 ( /*OUT*/V128* res,
                                V128* argL, V128* argR )
 {
    res->w32[0] = max32S(argL->w32[0], argR->w32[0]);
@@ -136,7 +149,8 @@ void h_generic_calc_Max32Sx4 ( /*OUT*/V128* res,
    res->w32[3] = max32S(argL->w32[3], argR->w32[3]);
 }
 
-void h_generic_calc_Min32Sx4 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Min32Sx4 ( /*OUT*/V128* res,
                                V128* argL, V128* argR )
 {
    res->w32[0] = min32S(argL->w32[0], argR->w32[0]);
@@ -145,7 +159,8 @@ void h_generic_calc_Min32Sx4 ( /*OUT*/V128* res,
    res->w32[3] = min32S(argL->w32[3], argR->w32[3]);
 }
 
-void h_generic_calc_Max32Ux4 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Max32Ux4 ( /*OUT*/V128* res,
                                V128* argL, V128* argR )
 {
    res->w32[0] = max32U(argL->w32[0], argR->w32[0]);
@@ -154,7 +169,8 @@ void h_generic_calc_Max32Ux4 ( /*OUT*/V128* res,
    res->w32[3] = max32U(argL->w32[3], argR->w32[3]);
 }
 
-void h_generic_calc_Min32Ux4 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Min32Ux4 ( /*OUT*/V128* res,
                                V128* argL, V128* argR )
 {
    res->w32[0] = min32U(argL->w32[0], argR->w32[0]);
@@ -163,7 +179,8 @@ void h_generic_calc_Min32Ux4 ( /*OUT*/V128* res,
    res->w32[3] = min32U(argL->w32[3], argR->w32[3]);
 }
 
-void h_generic_calc_Max16Ux8 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Max16Ux8 ( /*OUT*/V128* res,
                                V128* argL, V128* argR )
 {
    res->w16[0] = max16U(argL->w16[0], argR->w16[0]);
@@ -176,7 +193,8 @@ void h_generic_calc_Max16Ux8 ( /*OUT*/V128* res,
    res->w16[7] = max16U(argL->w16[7], argR->w16[7]);
 }
 
-void h_generic_calc_Min16Ux8 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Min16Ux8 ( /*OUT*/V128* res,
                                V128* argL, V128* argR )
 {
    res->w16[0] = min16U(argL->w16[0], argR->w16[0]);
@@ -189,7 +207,8 @@ void h_generic_calc_Min16Ux8 ( /*OUT*/V128* res,
    res->w16[7] = min16U(argL->w16[7], argR->w16[7]);
 }
 
-void h_generic_calc_Max8Sx16 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Max8Sx16 ( /*OUT*/V128* res,
                                V128* argL, V128* argR )
 {
    res->w8[ 0] = max8S(argL->w8[ 0], argR->w8[ 0]);
@@ -210,7 +229,8 @@ void h_generic_calc_Max8Sx16 ( /*OUT*/V128* res,
    res->w8[15] = max8S(argL->w8[15], argR->w8[15]);
 }
 
-void h_generic_calc_Min8Sx16 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Min8Sx16 ( /*OUT*/V128* res,
                                V128* argL, V128* argR )
 {
    res->w8[ 0] = min8S(argL->w8[ 0], argR->w8[ 0]);
@@ -231,14 +251,16 @@ void h_generic_calc_Min8Sx16 ( /*OUT*/V128* res,
    res->w8[15] = min8S(argL->w8[15], argR->w8[15]);
 }
 
-void h_generic_calc_CmpEQ64x2 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_CmpEQ64x2 ( /*OUT*/V128* res,
                                 V128* argL, V128* argR )
 {
    res->w64[0] = cmpEQ64(argL->w64[0], argR->w64[0]);
    res->w64[1] = cmpEQ64(argL->w64[1], argR->w64[1]);
 }
 
-void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res,
                                  V128* argL, V128* argR )
 {
    res->w64[0] = cmpGT64S(argL->w64[0], argR->w64[0]);
@@ -252,7 +274,8 @@ void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res,
    semantics of these primops (Sar64x2, etc) it is an error if in
    fact we are ever given an out-of-range shift amount. 
 */
-void h_generic_calc_SarN64x2 ( /*OUT*/V128* res,
+void /*not-regparm*/
+     h_generic_calc_SarN64x2 ( /*OUT*/V128* res,
                                V128* argL, UInt nn)
 {
    /* vassert(nn < 64); */
@@ -261,7 +284,8 @@ void h_generic_calc_SarN64x2 ( /*OUT*/V128* res,
    res->w64[1] = sar64(argL->w64[1], nn);
 }
 
-void h_generic_calc_SarN8x16 ( /*OUT*/V128* res,
+void /*not-regparm*/
+     h_generic_calc_SarN8x16 ( /*OUT*/V128* res,
                               V128* argL, UInt nn)
 {
    /* vassert(nn < 8); */
@@ -284,7 +308,8 @@ void h_generic_calc_SarN8x16 ( /*OUT*/V128* res,
    res->w8[15] = sar8(argL->w8[15], nn);
 }
 
-void h_generic_calc_QNarrowBin32Sto16Ux8 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_QNarrowBin32Sto16Ux8 ( /*OUT*/V128* res,
                                            V128* argL, V128* argR )
 {
    res->w16[0] = qnarrow32Sto16U(argR->w32[0]);
@@ -297,6 +322,42 @@ void h_generic_calc_QNarrowBin32Sto16Ux8 ( /*OUT*/V128* res,
    res->w16[7] = qnarrow32Sto16U(argL->w32[3]);
 }
 
+void VEX_REGPARM(3)
+     h_generic_calc_NarrowBin16to8x16 ( /*OUT*/V128* res,
+                                        V128* argL, V128* argR )
+{
+   res->w8[ 0] = narrow16to8(argR->w16[0]);
+   res->w8[ 1] = narrow16to8(argR->w16[1]);
+   res->w8[ 2] = narrow16to8(argR->w16[2]);
+   res->w8[ 3] = narrow16to8(argR->w16[3]);
+   res->w8[ 4] = narrow16to8(argR->w16[4]);
+   res->w8[ 5] = narrow16to8(argR->w16[5]);
+   res->w8[ 6] = narrow16to8(argR->w16[6]);
+   res->w8[ 7] = narrow16to8(argR->w16[7]);
+   res->w8[ 8] = narrow16to8(argL->w16[0]);
+   res->w8[ 9] = narrow16to8(argL->w16[1]);
+   res->w8[10] = narrow16to8(argL->w16[2]);
+   res->w8[11] = narrow16to8(argL->w16[3]);
+   res->w8[12] = narrow16to8(argL->w16[4]);
+   res->w8[13] = narrow16to8(argL->w16[5]);
+   res->w8[14] = narrow16to8(argL->w16[6]);
+   res->w8[15] = narrow16to8(argL->w16[7]);
+}
+
+void VEX_REGPARM(3)
+     h_generic_calc_NarrowBin32to16x8 ( /*OUT*/V128* res,
+                                        V128* argL, V128* argR )
+{
+   res->w16[0] = narrow32to16(argR->w32[0]);
+   res->w16[1] = narrow32to16(argR->w32[1]);
+   res->w16[2] = narrow32to16(argR->w32[2]);
+   res->w16[3] = narrow32to16(argR->w32[3]);
+   res->w16[4] = narrow32to16(argL->w32[0]);
+   res->w16[5] = narrow32to16(argL->w32[1]);
+   res->w16[6] = narrow32to16(argL->w32[2]);
+   res->w16[7] = narrow32to16(argL->w32[3]);
+}
+
 
 /*---------------------------------------------------------------*/
 /*--- end                              host_generic_simd128.c ---*/
diff --git a/VEX/priv/host_generic_simd128.h b/VEX/priv/host_generic_simd128.h
index 797617823c..6e37a870b3 100644
--- a/VEX/priv/host_generic_simd128.h
+++ b/VEX/priv/host_generic_simd128.h
@@ -45,26 +45,43 @@
 
 #include "libvex_basictypes.h"
 
-/* DO NOT MAKE THESE INTO REGPARM FNS!  THIS WILL BREAK CALLING
-   SEQUENCES GENERATED BY host-x86/isel.c. */
-
-extern void h_generic_calc_Mul32x4    ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Max32Sx4   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Min32Sx4   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Max32Ux4   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Min32Ux4   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Max16Ux8   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Min16Ux8   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Max8Sx16   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Min8Sx16   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_CmpEQ64x2  ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_SarN64x2   ( /*OUT*/V128*, V128*, UInt );
-extern void h_generic_calc_SarN8x16   ( /*OUT*/V128*, V128*, UInt );
-
-extern void h_generic_calc_QNarrowBin32Sto16Ux8
+extern VEX_REGPARM(3)
+       void h_generic_calc_Mul32x4    ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Max32Sx4   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Min32Sx4   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Max32Ux4   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Min32Ux4   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Max16Ux8   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Min16Ux8   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Max8Sx16   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Min8Sx16   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_CmpEQ64x2  ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128*, V128*, V128* );
+
+extern /*not-regparm*/
+       void h_generic_calc_SarN64x2   ( /*OUT*/V128*, V128*, UInt );
+extern /*not-regparm*/
+       void h_generic_calc_SarN8x16   ( /*OUT*/V128*, V128*, UInt );
+
+extern VEX_REGPARM(3)
+       void h_generic_calc_QNarrowBin32Sto16Ux8
+                                      ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_NarrowBin16to8x16
+                                      ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_NarrowBin32to16x8
                                       ( /*OUT*/V128*, V128*, V128* );
-
 
 #endif /* ndef __VEX_HOST_GENERIC_SIMD128_H */
 
diff --git a/VEX/priv/host_generic_simd64.c b/VEX/priv/host_generic_simd64.c
index 61bdbd3e68..693d796994 100644
--- a/VEX/priv/host_generic_simd64.c
+++ b/VEX/priv/host_generic_simd64.c
@@ -296,6 +296,16 @@ static inline UChar qnarrow16Sto8U ( UShort xx0 )
    return (UChar)xx;
 }
 
+static inline UShort narrow32to16 ( UInt xx )
+{
+   return (UShort)xx;
+}
+
+static inline UChar narrow16to8 ( UShort xx )
+{
+   return (UChar)xx;
+}
+
 /* shifts: we don't care about out-of-range ones, since
    that is dealt with at a higher level. */
 
@@ -817,6 +827,44 @@ ULong h_generic_calc_QNarrowBin16Sto8Ux8 ( ULong aa, ULong bb )
           );
 }
 
+/* ------------ Truncating narrowing ------------ */
+
+ULong h_generic_calc_NarrowBin32to16x4 ( ULong aa, ULong bb )
+{
+   UInt d = sel32x2_1(aa);
+   UInt c = sel32x2_0(aa);
+   UInt b = sel32x2_1(bb);
+   UInt a = sel32x2_0(bb);
+   return mk16x4( 
+             narrow32to16(d),
+             narrow32to16(c),
+             narrow32to16(b),
+             narrow32to16(a)
+          );
+}
+
+ULong h_generic_calc_NarrowBin16to8x8 ( ULong aa, ULong bb )
+{
+   UShort h = sel16x4_3(aa);
+   UShort g = sel16x4_2(aa);
+   UShort f = sel16x4_1(aa);
+   UShort e = sel16x4_0(aa);
+   UShort d = sel16x4_3(bb);
+   UShort c = sel16x4_2(bb);
+   UShort b = sel16x4_1(bb);
+   UShort a = sel16x4_0(bb);
+   return mk8x8( 
+             narrow16to8(h),
+             narrow16to8(g),
+             narrow16to8(f),
+             narrow16to8(e),
+             narrow16to8(d),
+             narrow16to8(c),
+             narrow16to8(b),
+             narrow16to8(a)
+          );
+}
+
 /* ------------ Interleaving ------------ */
 
 ULong h_generic_calc_InterleaveHI8x8 ( ULong aa, ULong bb )
diff --git a/VEX/priv/host_generic_simd64.h b/VEX/priv/host_generic_simd64.h
index 1807ed7f68..6275480c72 100644
--- a/VEX/priv/host_generic_simd64.h
+++ b/VEX/priv/host_generic_simd64.h
@@ -90,6 +90,8 @@ extern ULong h_generic_calc_CmpNEZ8x8  ( ULong );
 extern ULong h_generic_calc_QNarrowBin32Sto16Sx4 ( ULong, ULong );
 extern ULong h_generic_calc_QNarrowBin16Sto8Sx8  ( ULong, ULong );
 extern ULong h_generic_calc_QNarrowBin16Sto8Ux8  ( ULong, ULong );
+extern ULong h_generic_calc_NarrowBin32to16x4    ( ULong, ULong );
+extern ULong h_generic_calc_NarrowBin16to8x8     ( ULong, ULong );
 
 extern ULong h_generic_calc_InterleaveHI8x8 ( ULong, ULong );
 extern ULong h_generic_calc_InterleaveLO8x8 ( ULong, ULong );
diff --git a/VEX/priv/host_x86_isel.c b/VEX/priv/host_x86_isel.c
index 7ec6305705..3b0d995495 100644
--- a/VEX/priv/host_x86_isel.c
+++ b/VEX/priv/host_x86_isel.c
@@ -42,6 +42,7 @@
 #include "main_globals.h"
 #include "host_generic_regs.h"
 #include "host_generic_simd64.h"
+#include "host_generic_simd128.h"
 #include "host_x86_defs.h"
 
 /* TODO 21 Apr 2005:
@@ -2392,6 +2393,10 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
             fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish;
          case Iop_QNarrowBin16Sto8Ux8:
             fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish;
+         case Iop_NarrowBin16to8x8:
+            fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish;
+         case Iop_NarrowBin32to16x4:
+            fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish;
 
          case Iop_QSub8Sx8:
             fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
@@ -3135,6 +3140,7 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
 #  define SSE2_OR_ABOVE                                   \
        (env->hwcaps & VEX_HWCAPS_X86_SSE2)
 
+   HWord     fn = 0; /* address of helper fn, if required */
    MatchInfo mi;
    Bool      arg1isEReg = False;
    X86SseOp  op = Xsse_INVALID;
@@ -3601,6 +3607,59 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
          return dst;
       }
 
+      case Iop_NarrowBin32to16x8:
+         fn = (HWord)h_generic_calc_NarrowBin32to16x8;
+         goto do_SseAssistedBinary;
+      case Iop_NarrowBin16to8x16:
+         fn = (HWord)h_generic_calc_NarrowBin16to8x16;
+         goto do_SseAssistedBinary;
+      do_SseAssistedBinary: {
+         /* As with the amd64 case (where this is copied from) we
+            generate pretty bad code. */
+         vassert(fn != 0);
+         HReg dst = newVRegV(env);
+         HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
+         HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+         HReg argp = newVRegI(env);
+         /* subl $112, %esp         -- make a space */
+         sub_from_esp(env, 112);
+         /* leal 48(%esp), %r_argp  -- point into it */
+         addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()),
+                                      argp));
+         /* andl $-16, %r_argp      -- 16-align the pointer */
+         addInstr(env, X86Instr_Alu32R(Xalu_AND,
+                                       X86RMI_Imm( ~(UInt)15 ), 
+                                       argp));
+         /* Prepare 3 arg regs:
+            leal  0(%r_argp), %eax
+            leal 16(%r_argp), %edx
+            leal 32(%r_argp), %ecx
+         */
+         addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp),
+                                      hregX86_EAX()));
+         addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp),
+                                      hregX86_EDX()));
+         addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp),
+                                      hregX86_ECX()));
+         /* Store the two args, at (%edx) and (%ecx):
+            movupd  %argL, 0(%edx)
+            movupd  %argR, 0(%ecx)
+         */
+         addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL,
+                                        X86AMode_IR(0, hregX86_EDX())));
+         addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR,
+                                        X86AMode_IR(0, hregX86_ECX())));
+         /* call the helper */
+         addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn, 3 ));
+         /* fetch the result from memory, using %r_argp, which the
+            register allocator will keep alive across the call. */
+         addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst,
+                                        X86AMode_IR(0, argp)));
+         /* and finally, clear the space */
+         add_to_esp(env, 112);
+         return dst;
+      }
+
       default:
          break;
    } /* switch (e->Iex.Binop.op) */
diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c
index 13667d7c79..21506497ff 100644
--- a/VEX/priv/ir_defs.c
+++ b/VEX/priv/ir_defs.c
@@ -514,6 +514,8 @@ void ppIROp ( IROp op )
       case Iop_QNarrowBin16Sto8Ux8: vex_printf("QNarrowBin16Sto8Ux8"); return;
       case Iop_QNarrowBin16Sto8Sx8: vex_printf("QNarrowBin16Sto8Sx8"); return;
       case Iop_QNarrowBin32Sto16Sx4: vex_printf("QNarrowBin32Sto16Sx4"); return;
+      case Iop_NarrowBin16to8x8: vex_printf("NarrowBin16to8x8"); return;
+      case Iop_NarrowBin32to16x4: vex_printf("NarrowBin32to16x4"); return;
       case Iop_InterleaveHI8x8: vex_printf("InterleaveHI8x8"); return;
       case Iop_InterleaveHI16x4: vex_printf("InterleaveHI16x4"); return;
       case Iop_InterleaveHI32x2: vex_printf("InterleaveHI32x2"); return;
@@ -2066,6 +2068,7 @@ void typeOfPrimop ( IROp op,
       case Iop_PwAdd32Fx2:
       case Iop_QNarrowBin32Sto16Sx4:
       case Iop_QNarrowBin16Sto8Sx8: case Iop_QNarrowBin16Sto8Ux8:
+      case Iop_NarrowBin16to8x8: case Iop_NarrowBin32to16x4:
       case Iop_Sub8x8: case Iop_Sub16x4: case Iop_Sub32x2:
       case Iop_QSub8Sx8: case Iop_QSub16Sx4:
       case Iop_QSub32Sx2: case Iop_QSub64Sx1:
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h
index f8324697f0..0432af3953 100644
--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
@@ -923,6 +923,7 @@ typedef
       */
       Iop_QNarrowBin16Sto8Ux8,
       Iop_QNarrowBin16Sto8Sx8, Iop_QNarrowBin32Sto16Sx4,
+      Iop_NarrowBin16to8x8,    Iop_NarrowBin32to16x4,
 
       /* INTERLEAVING */
       /* Interleave lanes from low or high halves of