VEX side fixes to match r12190, which is a fix for #279698 (incorrect

author Julian Seward <jseward@acm.org>

Sat, 22 Oct 2011 09:32:16 +0000 (09:32 +0000)

committer Julian Seward <jseward@acm.org>

Sat, 22 Oct 2011 09:32:16 +0000 (09:32 +0000)
author Julian Seward <jseward@acm.org>
Sat, 22 Oct 2011 09:32:16 +0000 (09:32 +0000)
committer Julian Seward <jseward@acm.org>
Sat, 22 Oct 2011 09:32:16 +0000 (09:32 +0000)
diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c

index 86c089d1a3d8ff31265da3ca88f0ca34d6dac605..ca8bdb4e64ee06d83d48f8951407aa44bb7f864c 100644 (file)
--- a/VEX/priv/host_amd64_isel.c
+++ b/VEX/priv/host_amd64_isel.c
@@ -1100,6 +1100,10 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
              fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; break;
           case Iop_QNarrowBin16Sto8Ux8:
              fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; break;
+         case Iop_NarrowBin16to8x8:
+            fn = (HWord)h_generic_calc_NarrowBin16to8x8; break;
+         case Iop_NarrowBin32to16x4:
+            fn = (HWord)h_generic_calc_NarrowBin32to16x4; break;
  
           case Iop_QSub8Sx8:
              fn = (HWord)h_generic_calc_QSub8Sx8; break;
@@ -3665,6 +3669,12 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
        case Iop_QNarrowBin32Sto16Ux8:
                             fn = (HWord)h_generic_calc_QNarrowBin32Sto16Ux8;
                             goto do_SseAssistedBinary;
+      case Iop_NarrowBin16to8x16:
+                           fn = (HWord)h_generic_calc_NarrowBin16to8x16;
+                           goto do_SseAssistedBinary;
+      case Iop_NarrowBin32to16x8:
+                           fn = (HWord)h_generic_calc_NarrowBin32to16x8;
+                           goto do_SseAssistedBinary;
        do_SseAssistedBinary: {
           /* RRRufff!  RRRufff code is what we're generating here.  Oh
              well. */
diff --git a/VEX/priv/host_generic_simd128.c b/VEX/priv/host_generic_simd128.c

index e29a2eb554a4ca411b742bc9da9ef470c83995d0..e9d1437485f4ad01759f346ddc8caf999d0bae5e 100644 (file)
--- a/VEX/priv/host_generic_simd128.c
+++ b/VEX/priv/host_generic_simd128.c
@@ -118,7 +118,19 @@ static inline UShort qnarrow32Sto16U ( UInt xx0 )
     return (UShort)xx;
  }
  
-void h_generic_calc_Mul32x4 ( /*OUT*/V128* res,
+static inline UShort narrow32to16 ( UInt xx )
+{
+   return (UShort)xx;
+}
+
+static inline UChar narrow16to8 ( UShort xx )
+{
+   return (UChar)xx;
+}
+
+
+void VEX_REGPARM(3)
+     h_generic_calc_Mul32x4 ( /*OUT*/V128* res,
                                V128* argL, V128* argR )
  {
     res->w32[0] = mul32(argL->w32[0], argR->w32[0]);
@@ -127,7 +139,8 @@ void h_generic_calc_Mul32x4 ( /*OUT*/V128* res,
     res->w32[3] = mul32(argL->w32[3], argR->w32[3]);
  }
  
-void h_generic_calc_Max32Sx4 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Max32Sx4 ( /*OUT*/V128* res,
                                 V128* argL, V128* argR )
  {
     res->w32[0] = max32S(argL->w32[0], argR->w32[0]);
@@ -136,7 +149,8 @@ void h_generic_calc_Max32Sx4 ( /*OUT*/V128* res,
     res->w32[3] = max32S(argL->w32[3], argR->w32[3]);
  }
  
-void h_generic_calc_Min32Sx4 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Min32Sx4 ( /*OUT*/V128* res,
                                 V128* argL, V128* argR )
  {
     res->w32[0] = min32S(argL->w32[0], argR->w32[0]);
@@ -145,7 +159,8 @@ void h_generic_calc_Min32Sx4 ( /*OUT*/V128* res,
     res->w32[3] = min32S(argL->w32[3], argR->w32[3]);
  }
  
-void h_generic_calc_Max32Ux4 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Max32Ux4 ( /*OUT*/V128* res,
                                 V128* argL, V128* argR )
  {
     res->w32[0] = max32U(argL->w32[0], argR->w32[0]);
@@ -154,7 +169,8 @@ void h_generic_calc_Max32Ux4 ( /*OUT*/V128* res,
     res->w32[3] = max32U(argL->w32[3], argR->w32[3]);
  }
  
-void h_generic_calc_Min32Ux4 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Min32Ux4 ( /*OUT*/V128* res,
                                 V128* argL, V128* argR )
  {
     res->w32[0] = min32U(argL->w32[0], argR->w32[0]);
@@ -163,7 +179,8 @@ void h_generic_calc_Min32Ux4 ( /*OUT*/V128* res,
     res->w32[3] = min32U(argL->w32[3], argR->w32[3]);
  }
  
-void h_generic_calc_Max16Ux8 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Max16Ux8 ( /*OUT*/V128* res,
                                 V128* argL, V128* argR )
  {
     res->w16[0] = max16U(argL->w16[0], argR->w16[0]);
@@ -176,7 +193,8 @@ void h_generic_calc_Max16Ux8 ( /*OUT*/V128* res,
     res->w16[7] = max16U(argL->w16[7], argR->w16[7]);
  }
  
-void h_generic_calc_Min16Ux8 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Min16Ux8 ( /*OUT*/V128* res,
                                 V128* argL, V128* argR )
  {
     res->w16[0] = min16U(argL->w16[0], argR->w16[0]);
@@ -189,7 +207,8 @@ void h_generic_calc_Min16Ux8 ( /*OUT*/V128* res,
     res->w16[7] = min16U(argL->w16[7], argR->w16[7]);
  }
  
-void h_generic_calc_Max8Sx16 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Max8Sx16 ( /*OUT*/V128* res,
                                 V128* argL, V128* argR )
  {
     res->w8[ 0] = max8S(argL->w8[ 0], argR->w8[ 0]);
@@ -210,7 +229,8 @@ void h_generic_calc_Max8Sx16 ( /*OUT*/V128* res,
     res->w8[15] = max8S(argL->w8[15], argR->w8[15]);
  }
  
-void h_generic_calc_Min8Sx16 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_Min8Sx16 ( /*OUT*/V128* res,
                                 V128* argL, V128* argR )
  {
     res->w8[ 0] = min8S(argL->w8[ 0], argR->w8[ 0]);
@@ -231,14 +251,16 @@ void h_generic_calc_Min8Sx16 ( /*OUT*/V128* res,
     res->w8[15] = min8S(argL->w8[15], argR->w8[15]);
  }
  
-void h_generic_calc_CmpEQ64x2 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_CmpEQ64x2 ( /*OUT*/V128* res,
                                  V128* argL, V128* argR )
  {
     res->w64[0] = cmpEQ64(argL->w64[0], argR->w64[0]);
     res->w64[1] = cmpEQ64(argL->w64[1], argR->w64[1]);
  }
  
-void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res,
                                   V128* argL, V128* argR )
  {
     res->w64[0] = cmpGT64S(argL->w64[0], argR->w64[0]);
@@ -252,7 +274,8 @@ void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res,
     semantics of these primops (Sar64x2, etc) it is an error if in
     fact we are ever given an out-of-range shift amount. 
  */
-void h_generic_calc_SarN64x2 ( /*OUT*/V128* res,
+void /*not-regparm*/
+     h_generic_calc_SarN64x2 ( /*OUT*/V128* res,
                                 V128* argL, UInt nn)
  {
     /* vassert(nn < 64); */
@@ -261,7 +284,8 @@ void h_generic_calc_SarN64x2 ( /*OUT*/V128* res,
     res->w64[1] = sar64(argL->w64[1], nn);
  }
  
-void h_generic_calc_SarN8x16 ( /*OUT*/V128* res,
+void /*not-regparm*/
+     h_generic_calc_SarN8x16 ( /*OUT*/V128* res,
                                V128* argL, UInt nn)
  {
     /* vassert(nn < 8); */
@@ -284,7 +308,8 @@ void h_generic_calc_SarN8x16 ( /*OUT*/V128* res,
     res->w8[15] = sar8(argL->w8[15], nn);
  }
  
-void h_generic_calc_QNarrowBin32Sto16Ux8 ( /*OUT*/V128* res,
+void VEX_REGPARM(3)
+     h_generic_calc_QNarrowBin32Sto16Ux8 ( /*OUT*/V128* res,
                                             V128* argL, V128* argR )
  {
     res->w16[0] = qnarrow32Sto16U(argR->w32[0]);
@@ -297,6 +322,42 @@ void h_generic_calc_QNarrowBin32Sto16Ux8 ( /*OUT*/V128* res,
     res->w16[7] = qnarrow32Sto16U(argL->w32[3]);
  }
  
+void VEX_REGPARM(3)
+     h_generic_calc_NarrowBin16to8x16 ( /*OUT*/V128* res,
+                                        V128* argL, V128* argR )
+{
+   res->w8[ 0] = narrow16to8(argR->w16[0]);
+   res->w8[ 1] = narrow16to8(argR->w16[1]);
+   res->w8[ 2] = narrow16to8(argR->w16[2]);
+   res->w8[ 3] = narrow16to8(argR->w16[3]);
+   res->w8[ 4] = narrow16to8(argR->w16[4]);
+   res->w8[ 5] = narrow16to8(argR->w16[5]);
+   res->w8[ 6] = narrow16to8(argR->w16[6]);
+   res->w8[ 7] = narrow16to8(argR->w16[7]);
+   res->w8[ 8] = narrow16to8(argL->w16[0]);
+   res->w8[ 9] = narrow16to8(argL->w16[1]);
+   res->w8[10] = narrow16to8(argL->w16[2]);
+   res->w8[11] = narrow16to8(argL->w16[3]);
+   res->w8[12] = narrow16to8(argL->w16[4]);
+   res->w8[13] = narrow16to8(argL->w16[5]);
+   res->w8[14] = narrow16to8(argL->w16[6]);
+   res->w8[15] = narrow16to8(argL->w16[7]);
+}
+
+void VEX_REGPARM(3)
+     h_generic_calc_NarrowBin32to16x8 ( /*OUT*/V128* res,
+                                        V128* argL, V128* argR )
+{
+   res->w16[0] = narrow32to16(argR->w32[0]);
+   res->w16[1] = narrow32to16(argR->w32[1]);
+   res->w16[2] = narrow32to16(argR->w32[2]);
+   res->w16[3] = narrow32to16(argR->w32[3]);
+   res->w16[4] = narrow32to16(argL->w32[0]);
+   res->w16[5] = narrow32to16(argL->w32[1]);
+   res->w16[6] = narrow32to16(argL->w32[2]);
+   res->w16[7] = narrow32to16(argL->w32[3]);
+}
+
  
  /*---------------------------------------------------------------*/
  /*--- end                              host_generic_simd128.c ---*/
diff --git a/VEX/priv/host_generic_simd128.h b/VEX/priv/host_generic_simd128.h

index 797617823cb74bddcf63f042614c2c6697b5f0e6..6e37a870b3b31053121c6aaad92daf775131b239 100644 (file)
--- a/VEX/priv/host_generic_simd128.h
+++ b/VEX/priv/host_generic_simd128.h
@@ -45,26 +45,43 @@
  
  #include "libvex_basictypes.h"
  
-/* DO NOT MAKE THESE INTO REGPARM FNS!  THIS WILL BREAK CALLING
-   SEQUENCES GENERATED BY host-x86/isel.c. */
-
-extern void h_generic_calc_Mul32x4    ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Max32Sx4   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Min32Sx4   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Max32Ux4   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Min32Ux4   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Max16Ux8   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Min16Ux8   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Max8Sx16   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_Min8Sx16   ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_CmpEQ64x2  ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128*, V128*, V128* );
-extern void h_generic_calc_SarN64x2   ( /*OUT*/V128*, V128*, UInt );
-extern void h_generic_calc_SarN8x16   ( /*OUT*/V128*, V128*, UInt );
-
-extern void h_generic_calc_QNarrowBin32Sto16Ux8
+extern VEX_REGPARM(3)
+       void h_generic_calc_Mul32x4    ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Max32Sx4   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Min32Sx4   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Max32Ux4   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Min32Ux4   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Max16Ux8   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Min16Ux8   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Max8Sx16   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_Min8Sx16   ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_CmpEQ64x2  ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128*, V128*, V128* );
+
+extern /*not-regparm*/
+       void h_generic_calc_SarN64x2   ( /*OUT*/V128*, V128*, UInt );
+extern /*not-regparm*/
+       void h_generic_calc_SarN8x16   ( /*OUT*/V128*, V128*, UInt );
+
+extern VEX_REGPARM(3)
+       void h_generic_calc_QNarrowBin32Sto16Ux8
+                                      ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_NarrowBin16to8x16
+                                      ( /*OUT*/V128*, V128*, V128* );
+extern VEX_REGPARM(3)
+       void h_generic_calc_NarrowBin32to16x8
                                        ( /*OUT*/V128*, V128*, V128* );
-
  
  #endif /* ndef __VEX_HOST_GENERIC_SIMD128_H */
  
diff --git a/VEX/priv/host_generic_simd64.c b/VEX/priv/host_generic_simd64.c

index 61bdbd3e68a7d6c2ee64a54ac8c97f92accae0ff..693d796994d5d68d1a8d685eea6385d3c14f7dbb 100644 (file)
--- a/VEX/priv/host_generic_simd64.c
+++ b/VEX/priv/host_generic_simd64.c
@@ -296,6 +296,16 @@ static inline UChar qnarrow16Sto8U ( UShort xx0 )
     return (UChar)xx;
  }
  
+static inline UShort narrow32to16 ( UInt xx )
+{
+   return (UShort)xx;
+}
+
+static inline UChar narrow16to8 ( UShort xx )
+{
+   return (UChar)xx;
+}
+
  /* shifts: we don't care about out-of-range ones, since
     that is dealt with at a higher level. */
  
@@ -817,6 +827,44 @@ ULong h_generic_calc_QNarrowBin16Sto8Ux8 ( ULong aa, ULong bb )
            );
  }
  
+/* ------------ Truncating narrowing ------------ */
+
+ULong h_generic_calc_NarrowBin32to16x4 ( ULong aa, ULong bb )
+{
+   UInt d = sel32x2_1(aa);
+   UInt c = sel32x2_0(aa);
+   UInt b = sel32x2_1(bb);
+   UInt a = sel32x2_0(bb);
+   return mk16x4( 
+             narrow32to16(d),
+             narrow32to16(c),
+             narrow32to16(b),
+             narrow32to16(a)
+          );
+}
+
+ULong h_generic_calc_NarrowBin16to8x8 ( ULong aa, ULong bb )
+{
+   UShort h = sel16x4_3(aa);
+   UShort g = sel16x4_2(aa);
+   UShort f = sel16x4_1(aa);
+   UShort e = sel16x4_0(aa);
+   UShort d = sel16x4_3(bb);
+   UShort c = sel16x4_2(bb);
+   UShort b = sel16x4_1(bb);
+   UShort a = sel16x4_0(bb);
+   return mk8x8( 
+             narrow16to8(h),
+             narrow16to8(g),
+             narrow16to8(f),
+             narrow16to8(e),
+             narrow16to8(d),
+             narrow16to8(c),
+             narrow16to8(b),
+             narrow16to8(a)
+          );
+}
+
  /* ------------ Interleaving ------------ */
  
  ULong h_generic_calc_InterleaveHI8x8 ( ULong aa, ULong bb )
diff --git a/VEX/priv/host_generic_simd64.h b/VEX/priv/host_generic_simd64.h

index 1807ed7f68cb46aec984bee7ded3c6d23f132d7e..6275480c726e9027444362337d3cdad650c04e43 100644 (file)
--- a/VEX/priv/host_generic_simd64.h
+++ b/VEX/priv/host_generic_simd64.h
@@ -90,6 +90,8 @@ extern ULong h_generic_calc_CmpNEZ8x8  ( ULong );
  extern ULong h_generic_calc_QNarrowBin32Sto16Sx4 ( ULong, ULong );
  extern ULong h_generic_calc_QNarrowBin16Sto8Sx8  ( ULong, ULong );
  extern ULong h_generic_calc_QNarrowBin16Sto8Ux8  ( ULong, ULong );
+extern ULong h_generic_calc_NarrowBin32to16x4    ( ULong, ULong );
+extern ULong h_generic_calc_NarrowBin16to8x8     ( ULong, ULong );
  
  extern ULong h_generic_calc_InterleaveHI8x8 ( ULong, ULong );
  extern ULong h_generic_calc_InterleaveLO8x8 ( ULong, ULong );
diff --git a/VEX/priv/host_x86_isel.c b/VEX/priv/host_x86_isel.c

index 7ec63057050f7e3b40f777371f228c38f1f119e3..3b0d9954952df20acf57487bd22b4ed46017015c 100644 (file)
--- a/VEX/priv/host_x86_isel.c
+++ b/VEX/priv/host_x86_isel.c
@@ -42,6 +42,7 @@
  #include "main_globals.h"
  #include "host_generic_regs.h"
  #include "host_generic_simd64.h"
+#include "host_generic_simd128.h"
  #include "host_x86_defs.h"
  
  /* TODO 21 Apr 2005:
@@ -2392,6 +2393,10 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
              fn = (HWord)h_generic_calc_QNarrowBin16Sto8Sx8; goto binnish;
           case Iop_QNarrowBin16Sto8Ux8:
              fn = (HWord)h_generic_calc_QNarrowBin16Sto8Ux8; goto binnish;
+         case Iop_NarrowBin16to8x8:
+            fn = (HWord)h_generic_calc_NarrowBin16to8x8; goto binnish;
+         case Iop_NarrowBin32to16x4:
+            fn = (HWord)h_generic_calc_NarrowBin32to16x4; goto binnish;
  
           case Iop_QSub8Sx8:
              fn = (HWord)h_generic_calc_QSub8Sx8; goto binnish;
@@ -3135,6 +3140,7 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
  #  define SSE2_OR_ABOVE                                   \
         (env->hwcaps & VEX_HWCAPS_X86_SSE2)
  
+   HWord     fn = 0; /* address of helper fn, if required */
     MatchInfo mi;
     Bool      arg1isEReg = False;
     X86SseOp  op = Xsse_INVALID;
@@ -3601,6 +3607,59 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
           return dst;
        }
  
+      case Iop_NarrowBin32to16x8:
+         fn = (HWord)h_generic_calc_NarrowBin32to16x8;
+         goto do_SseAssistedBinary;
+      case Iop_NarrowBin16to8x16:
+         fn = (HWord)h_generic_calc_NarrowBin16to8x16;
+         goto do_SseAssistedBinary;
+      do_SseAssistedBinary: {
+         /* As with the amd64 case (where this is copied from) we
+            generate pretty bad code. */
+         vassert(fn != 0);
+         HReg dst = newVRegV(env);
+         HReg argL = iselVecExpr(env, e->Iex.Binop.arg1);
+         HReg argR = iselVecExpr(env, e->Iex.Binop.arg2);
+         HReg argp = newVRegI(env);
+         /* subl $112, %esp         -- make a space */
+         sub_from_esp(env, 112);
+         /* leal 48(%esp), %r_argp  -- point into it */
+         addInstr(env, X86Instr_Lea32(X86AMode_IR(48, hregX86_ESP()),
+                                      argp));
+         /* andl $-16, %r_argp      -- 16-align the pointer */
+         addInstr(env, X86Instr_Alu32R(Xalu_AND,
+                                       X86RMI_Imm( ~(UInt)15 ), 
+                                       argp));
+         /* Prepare 3 arg regs:
+            leal  0(%r_argp), %eax
+            leal 16(%r_argp), %edx
+            leal 32(%r_argp), %ecx
+         */
+         addInstr(env, X86Instr_Lea32(X86AMode_IR(0, argp),
+                                      hregX86_EAX()));
+         addInstr(env, X86Instr_Lea32(X86AMode_IR(16, argp),
+                                      hregX86_EDX()));
+         addInstr(env, X86Instr_Lea32(X86AMode_IR(32, argp),
+                                      hregX86_ECX()));
+         /* Store the two args, at (%edx) and (%ecx):
+            movupd  %argL, 0(%edx)
+            movupd  %argR, 0(%ecx)
+         */
+         addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argL,
+                                        X86AMode_IR(0, hregX86_EDX())));
+         addInstr(env, X86Instr_SseLdSt(False/*!isLoad*/, argR,
+                                        X86AMode_IR(0, hregX86_ECX())));
+         /* call the helper */
+         addInstr(env, X86Instr_Call( Xcc_ALWAYS, (Addr32)fn, 3 ));
+         /* fetch the result from memory, using %r_argp, which the
+            register allocator will keep alive across the call. */
+         addInstr(env, X86Instr_SseLdSt(True/*isLoad*/, dst,
+                                        X86AMode_IR(0, argp)));
+         /* and finally, clear the space */
+         add_to_esp(env, 112);
+         return dst;
+      }
+
        default:
           break;
     } /* switch (e->Iex.Binop.op) */
diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c

index 13667d7c79e88a4f5ffb2ddaeb5bf54e89329b9c..21506497ffa0923aca72e83646d8fbf1edc30cdf 100644 (file)
--- a/VEX/priv/ir_defs.c
+++ b/VEX/priv/ir_defs.c
@@ -514,6 +514,8 @@ void ppIROp ( IROp op )
        case Iop_QNarrowBin16Sto8Ux8: vex_printf("QNarrowBin16Sto8Ux8"); return;
        case Iop_QNarrowBin16Sto8Sx8: vex_printf("QNarrowBin16Sto8Sx8"); return;
        case Iop_QNarrowBin32Sto16Sx4: vex_printf("QNarrowBin32Sto16Sx4"); return;
+      case Iop_NarrowBin16to8x8: vex_printf("NarrowBin16to8x8"); return;
+      case Iop_NarrowBin32to16x4: vex_printf("NarrowBin32to16x4"); return;
        case Iop_InterleaveHI8x8: vex_printf("InterleaveHI8x8"); return;
        case Iop_InterleaveHI16x4: vex_printf("InterleaveHI16x4"); return;
        case Iop_InterleaveHI32x2: vex_printf("InterleaveHI32x2"); return;
@@ -2066,6 +2068,7 @@ void typeOfPrimop ( IROp op,
        case Iop_PwAdd32Fx2:
        case Iop_QNarrowBin32Sto16Sx4:
        case Iop_QNarrowBin16Sto8Sx8: case Iop_QNarrowBin16Sto8Ux8:
+      case Iop_NarrowBin16to8x8: case Iop_NarrowBin32to16x4:
        case Iop_Sub8x8: case Iop_Sub16x4: case Iop_Sub32x2:
        case Iop_QSub8Sx8: case Iop_QSub16Sx4:
        case Iop_QSub32Sx2: case Iop_QSub64Sx1:
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h

index f8324697f0a86be2f5e2b3bb50a3f21dec5c92b4..0432af395337ee08ab9d2c77af4b06235929b969 100644 (file)
--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
@@ -923,6 +923,7 @@ typedef
        */
        Iop_QNarrowBin16Sto8Ux8,
        Iop_QNarrowBin16Sto8Sx8, Iop_QNarrowBin32Sto16Sx4,
+      Iop_NarrowBin16to8x8,    Iop_NarrowBin32to16x4,
  
        /* INTERLEAVING */
        /* Interleave lanes from low or high halves of
author	Julian Seward <jseward@acm.org>
	Sat, 22 Oct 2011 09:32:16 +0000 (09:32 +0000)
committer	Julian Seward <jseward@acm.org>
	Sat, 22 Oct 2011 09:32:16 +0000 (09:32 +0000)
VEX/priv/host_amd64_isel.c		patch \| blob \| blame \| history
VEX/priv/host_generic_simd128.c		patch \| blob \| blame \| history
VEX/priv/host_generic_simd128.h		patch \| blob \| blame \| history
VEX/priv/host_generic_simd64.c		patch \| blob \| blame \| history
VEX/priv/host_generic_simd64.h		patch \| blob \| blame \| history
VEX/priv/host_x86_isel.c		patch \| blob \| blame \| history
VEX/priv/ir_defs.c		patch \| blob \| blame \| history
VEX/pub/libvex_ir.h		patch \| blob \| blame \| history