x86 front end: Implement MASKMOVQ (MMX class insn, introduced in SSE1)

author Julian Seward <jseward@acm.org>

Wed, 27 Dec 2006 01:15:29 +0000 (01:15 +0000)

committer Julian Seward <jseward@acm.org>

Wed, 27 Dec 2006 01:15:29 +0000 (01:15 +0000)
author Julian Seward <jseward@acm.org>
Wed, 27 Dec 2006 01:15:29 +0000 (01:15 +0000)
committer Julian Seward <jseward@acm.org>
Wed, 27 Dec 2006 01:15:29 +0000 (01:15 +0000)
diff --git a/VEX/priv/guest-x86/toIR.c b/VEX/priv/guest-x86/toIR.c

index 84f783cf488963e5a66d3b42588911a67c69dfea..749b4819b469bf0ab688d42bfbb32ef7915f46bd 100644 (file)
--- a/VEX/priv/guest-x86/toIR.c
+++ b/VEX/priv/guest-x86/toIR.c
@@ -5679,6 +5679,38 @@ UInt dis_MMX ( Bool* decode_ok, UChar sorb, Int sz, Int delta )
           break;
        }
  
+      case 0xF7: {
+         IRTemp addr    = newTemp(Ity_I32);
+         IRTemp regD    = newTemp(Ity_I64);
+         IRTemp regM    = newTemp(Ity_I64);
+         IRTemp mask    = newTemp(Ity_I64);
+         IRTemp olddata = newTemp(Ity_I64);
+         IRTemp newdata = newTemp(Ity_I64);
+
+         modrm = getIByte(delta);
+         if (sz != 4 || (!epartIsReg(modrm)))
+            goto mmx_decode_failure;
+         delta++;
+
+         assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) ));
+         assign( regM, getMMXReg( eregOfRM(modrm) ));
+         assign( regD, getMMXReg( gregOfRM(modrm) ));
+         assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) );
+         assign( olddata, loadLE( Ity_I64, mkexpr(addr) ));
+         assign( newdata, 
+                 binop(Iop_Or64, 
+                       binop(Iop_And64, 
+                             mkexpr(regD), 
+                             mkexpr(mask) ),
+                       binop(Iop_And64, 
+                             mkexpr(olddata),
+                             unop(Iop_Not64, mkexpr(mask)))) );
+         storeLE( mkexpr(addr), mkexpr(newdata) );
+         DIP("maskmovq %s,%s\n", nameMMXReg( eregOfRM(modrm) ),
+                                 nameMMXReg( gregOfRM(modrm) ) );
+         break;
+      }
+
        /* --- MMX decode failure --- */
        default:
        mmx_decode_failure:
@@ -7670,6 +7702,16 @@ DisResult disInstr_X86_WRK (
        goto decode_success;
     }
  
+   /* ***--- this is an MMX class insn introduced in SSE1 ---*** */
+   /* 0F F7 = MASKMOVQ -- 8x8 masked store */
+   if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF7) {
+      Bool ok = False;
+      delta = dis_MMX( &ok, sorb, sz, delta+1 );
+      if (!ok)
+         goto decode_failure;
+      goto decode_success;
+   }
+
     /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */
     if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) {
        delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 );
@@ -9314,6 +9356,50 @@ DisResult disInstr_X86_WRK (
        /* else fall through */
     }
  
+   /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */
+   if (insn[0] == 0x0F && insn[1] == 0xF7) {
+      modrm = getIByte(delta+2);
+      if (sz == 2 && epartIsReg(modrm)) {
+         IRTemp regD    = newTemp(Ity_V128);
+         IRTemp mask    = newTemp(Ity_V128);
+         IRTemp olddata = newTemp(Ity_V128);
+         IRTemp newdata = newTemp(Ity_V128);
+                addr    = newTemp(Ity_I32);
+
+         assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) ));
+         assign( regD, getXMMReg( gregOfRM(modrm) ));
+
+         /* Unfortunately can't do the obvious thing with SarN8x16
+            here since that can't be re-emitted as SSE2 code - no such
+            insn. */
+        assign( 
+            mask, 
+            binop(Iop_64HLtoV128,
+                  binop(Iop_SarN8x8, 
+                        getXMMRegLane64( eregOfRM(modrm), 1 ), 
+                        mkU8(7) ),
+                  binop(Iop_SarN8x8, 
+                        getXMMRegLane64( eregOfRM(modrm), 0 ), 
+                        mkU8(7) ) ));
+         assign( olddata, loadLE( Ity_V128, mkexpr(addr) ));
+         assign( newdata, 
+                 binop(Iop_OrV128, 
+                       binop(Iop_AndV128, 
+                             mkexpr(regD), 
+                             mkexpr(mask) ),
+                       binop(Iop_AndV128, 
+                             mkexpr(olddata),
+                             unop(Iop_NotV128, mkexpr(mask)))) );
+         storeLE( mkexpr(addr), mkexpr(newdata) );
+
+         delta += 2+1;
+         DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRM(modrm) ),
+                                   nameXMMReg( gregOfRM(modrm) ) );
+         goto decode_success;
+      }
+      /* else fall through */
+   }
+
     /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */
     if (insn[0] == 0x0F && insn[1] == 0xE7) {
        modrm = getIByte(delta+2);
diff --git a/VEX/priv/host-generic/h_generic_simd64.c b/VEX/priv/host-generic/h_generic_simd64.c

index cd06da9e0bd1a28e986dab88879d6fb2ce913859..4f9bfe81b8ab75db83c89573dc42facc63f94c70 100644 (file)
--- a/VEX/priv/host-generic/h_generic_simd64.c
+++ b/VEX/priv/host-generic/h_generic_simd64.c
@@ -299,6 +299,11 @@ static inline UChar qnarrow16Uto8 ( UShort xx0 )
  /* shifts: we don't care about out-of-range ones, since
     that is dealt with at a higher level. */
  
+static inline UChar sar8 ( UChar v, UInt n )
+{
+   return toUChar(((Char)v) >> n);
+}
+
  static inline UShort shl16 ( UShort v, UInt n )
  {
     return toUShort(v << n);
@@ -868,6 +873,22 @@ ULong h_generic_calc_SarN16x4 ( ULong xx, UInt nn )
            );
  }
  
+ULong h_generic_calc_SarN8x8 ( ULong xx, UInt nn )
+{
+   /* vassert(nn < 8); */
+   nn &= 7;
+   return mk8x8(
+             sar8( sel8x8_7(xx), nn ),
+             sar8( sel8x8_6(xx), nn ),
+             sar8( sel8x8_5(xx), nn ),
+             sar8( sel8x8_4(xx), nn ),
+             sar8( sel8x8_3(xx), nn ),
+             sar8( sel8x8_2(xx), nn ),
+             sar8( sel8x8_1(xx), nn ),
+             sar8( sel8x8_0(xx), nn )
+          );
+}
+
  /* ------------ Averaging ------------ */
  
  ULong h_generic_calc_Avg8Ux8 ( ULong xx, ULong yy )
diff --git a/VEX/priv/host-generic/h_generic_simd64.h b/VEX/priv/host-generic/h_generic_simd64.h

index 4b9532f91e1333ff3e18b82b8c87b53cee4b46a0..93f9e904bfb71ace6fd65d2c169ac10d90038136 100644 (file)
--- a/VEX/priv/host-generic/h_generic_simd64.h
+++ b/VEX/priv/host-generic/h_generic_simd64.h
@@ -114,6 +114,7 @@ extern ULong h_generic_calc_ShlN32x2 ( ULong, UInt );
  extern ULong h_generic_calc_ShrN16x4 ( ULong, UInt );
  extern ULong h_generic_calc_ShrN32x2 ( ULong, UInt );
  
+extern ULong h_generic_calc_SarN8x8  ( ULong, UInt );
  extern ULong h_generic_calc_SarN16x4 ( ULong, UInt );
  extern ULong h_generic_calc_SarN32x2 ( ULong, UInt );
  
diff --git a/VEX/priv/host-x86/isel.c b/VEX/priv/host-x86/isel.c

index 4e3c9e41de704735f244155947c310414f1f31ec..b9aeddd0fc3e166520572b825217a0cc47d00d3b 100644 (file)
--- a/VEX/priv/host-x86/isel.c
+++ b/VEX/priv/host-x86/isel.c
@@ -2223,6 +2223,8 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e )
              fn = (HWord)h_generic_calc_SarN32x2; goto shifty;
           case Iop_SarN16x4:
              fn = (HWord)h_generic_calc_SarN16x4; goto shifty;
+         case Iop_SarN8x8:
+            fn = (HWord)h_generic_calc_SarN8x8;  goto shifty;
           shifty: {
              /* Note: the following assumes all helpers are of
                 signature 
diff --git a/VEX/priv/ir/irdefs.c b/VEX/priv/ir/irdefs.c

index 64d83f24e70f88e3967d0498c78efd9731f666be..88b7d636a5f687c52634fd22bcd758f3200d4d9b 100644 (file)
--- a/VEX/priv/ir/irdefs.c
+++ b/VEX/priv/ir/irdefs.c
@@ -340,6 +340,7 @@ void ppIROp ( IROp op )
        case Iop_ShlN32x2: vex_printf("ShlN32x2"); return;
        case Iop_ShrN16x4: vex_printf("ShrN16x4"); return;
        case Iop_ShrN32x2: vex_printf("ShrN32x2"); return;
+      case Iop_SarN8x8: vex_printf("SarN8x8"); return;
        case Iop_SarN16x4: vex_printf("SarN16x4"); return;
        case Iop_SarN32x2: vex_printf("SarN32x2"); return;
        case Iop_QNarrow16Ux4: vex_printf("QNarrow16Ux4"); return;
@@ -1477,7 +1478,7 @@ void typeOfPrimop ( IROp op,
  
        case Iop_ShlN32x2: case Iop_ShlN16x4:
        case Iop_ShrN32x2: case Iop_ShrN16x4:
-      case Iop_SarN32x2: case Iop_SarN16x4:
+      case Iop_SarN32x2: case Iop_SarN16x4: case Iop_SarN8x8:
           BINARY(Ity_I64,Ity_I8, Ity_I64);
  
        case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h

index 2fc41076380f9828534ff7b94b170cbdcdcbe8a0..3a48f4ff020a8a1785f3257b2858222b98d2ad8a 100644 (file)
--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
@@ -681,9 +681,9 @@ typedef
        Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2,
  
        /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
-      Iop_ShlN16x4, Iop_ShlN32x2,
-      Iop_ShrN16x4, Iop_ShrN32x2,
-      Iop_SarN16x4, Iop_SarN32x2,
+                   Iop_ShlN16x4, Iop_ShlN32x2,
+                   Iop_ShrN16x4, Iop_ShrN32x2,
+      Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2,
  
        /* NARROWING -- narrow 2xI64 into 1xI64, hi half from left arg */
        Iop_QNarrow16Ux4,
author	Julian Seward <jseward@acm.org>
	Wed, 27 Dec 2006 01:15:29 +0000 (01:15 +0000)
committer	Julian Seward <jseward@acm.org>
	Wed, 27 Dec 2006 01:15:29 +0000 (01:15 +0000)
VEX/priv/guest-x86/toIR.c		patch \| blob \| blame \| history
VEX/priv/host-generic/h_generic_simd64.c		patch \| blob \| blame \| history
VEX/priv/host-generic/h_generic_simd64.h		patch \| blob \| blame \| history
VEX/priv/host-x86/isel.c		patch \| blob \| blame \| history
VEX/priv/ir/irdefs.c		patch \| blob \| blame \| history
VEX/pub/libvex_ir.h		patch \| blob \| blame \| history