Implement

author Julian Seward <jseward@acm.org>

Wed, 20 Jun 2012 11:46:19 +0000 (11:46 +0000)

committer Julian Seward <jseward@acm.org>

Wed, 20 Jun 2012 11:46:19 +0000 (11:46 +0000)
author Julian Seward <jseward@acm.org>
Wed, 20 Jun 2012 11:46:19 +0000 (11:46 +0000)
committer Julian Seward <jseward@acm.org>
Wed, 20 Jun 2012 11:46:19 +0000 (11:46 +0000)
diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c

index 0288ed09da08783b548dd186439d1a8252839a40..364bb794876dc00eb010436b094917a1e43f6770 100644 (file)
--- a/VEX/priv/guest_amd64_toIR.c
+++ b/VEX/priv/guest_amd64_toIR.c
@@ -8986,6 +8986,20 @@ static void breakupV256toV128s ( IRTemp t256,
     assign(*t0, unop(Iop_V256toV128_0, mkexpr(t256)));
  }
  
+/* Break a V128-bit value up into two 64-bit ints. */
+
+static void breakupV128to64s ( IRTemp t128,
+                               /*OUTs*/
+                               IRTemp* t1, IRTemp* t0 )
+{
+   vassert(t0 && *t0 == IRTemp_INVALID);
+   vassert(t1 && *t1 == IRTemp_INVALID);
+   *t0 = newTemp(Ity_I64);
+   *t1 = newTemp(Ity_I64);
+   assign( *t0, unop(Iop_V128to64,   mkexpr(t128)) );
+   assign( *t1, unop(Iop_V128HIto64, mkexpr(t128)) );
+}
+
  /* Helper for the SSSE3 (not SSE3) PMULHRSW insns.  Given two 64-bit
     values (aa,bb), computes, for each of the 4 16-bit lanes:
  
@@ -23015,6 +23029,66 @@ Long dis_ESC_0F__VEX (
  /*---                                                      ---*/
  /*------------------------------------------------------------*/
  
+static IRTemp math_PERMILPS_VAR_128 ( IRTemp dataV, IRTemp ctrlV )
+{
+   /* In the control vector, zero out all but the bottom two bits of
+      each 32-bit lane. */
+   IRExpr* cv1 = binop(Iop_ShrN32x4,
+                       binop(Iop_ShlN32x4, mkexpr(ctrlV), mkU8(30)),
+                       mkU8(30));
+   /* And use the resulting cleaned-up control vector as steering
+      in a Perm operation. */
+   IRTemp res = newTemp(Ity_V128);
+   assign(res, binop(Iop_Perm32x4, mkexpr(dataV), cv1));
+   return res;
+}
+
+static IRTemp math_PERMILPS_VAR_256 ( IRTemp dataV, IRTemp ctrlV )
+{
+   IRTemp dHi, dLo, cHi, cLo;
+   dHi = dLo = cHi = cLo = IRTemp_INVALID;
+   breakupV256toV128s( dataV, &dHi, &dLo );
+   breakupV256toV128s( ctrlV, &cHi, &cLo );
+   IRTemp rHi = math_PERMILPS_VAR_128( dHi, cHi );
+   IRTemp rLo = math_PERMILPS_VAR_128( dLo, cLo );
+   IRTemp res = newTemp(Ity_V256);
+   assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo)));
+   return res;
+}
+
+static IRTemp math_PERMILPD_VAR_128 ( IRTemp dataV, IRTemp ctrlV )
+{
+   /* No cleverness here .. */
+   IRTemp dHi, dLo, cHi, cLo;
+   dHi = dLo = cHi = cLo = IRTemp_INVALID;
+   breakupV128to64s( dataV, &dHi, &dLo );
+   breakupV128to64s( ctrlV, &cHi, &cLo );
+   IRExpr* rHi
+      = IRExpr_Mux0X( unop(Iop_64to8,
+                           binop(Iop_And64, mkexpr(cHi), mkU64(2))),
+                      mkexpr(dLo), mkexpr(dHi) );
+   IRExpr* rLo
+      = IRExpr_Mux0X( unop(Iop_64to8,
+                           binop(Iop_And64, mkexpr(cLo), mkU64(2))),
+                      mkexpr(dLo), mkexpr(dHi) );
+   IRTemp res = newTemp(Ity_V128);
+   assign(res, binop(Iop_64HLtoV128, rHi, rLo));
+   return res;
+}
+
+static IRTemp math_PERMILPD_VAR_256 ( IRTemp dataV, IRTemp ctrlV )
+{
+   IRTemp dHi, dLo, cHi, cLo;
+   dHi = dLo = cHi = cLo = IRTemp_INVALID;
+   breakupV256toV128s( dataV, &dHi, &dLo );
+   breakupV256toV128s( ctrlV, &cHi, &cLo );
+   IRTemp rHi = math_PERMILPD_VAR_128( dHi, cHi );
+   IRTemp rLo = math_PERMILPD_VAR_128( dLo, cLo );
+   IRTemp res = newTemp(Ity_V256);
+   assign(res, binop(Iop_V128HLtoV256, mkexpr(rHi), mkexpr(rLo)));
+   return res;
+}
+
  __attribute__((noinline))
  static
  Long dis_ESC_0F38__VEX (
@@ -23048,6 +23122,120 @@ Long dis_ESC_0F38__VEX (
        }
        break;
  
+   case 0x0C:
+      /* VPERMILPS xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0C /r */
+      if (have66noF2noF3(pfx)
+          && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
+         UChar  modrm = getUChar(delta);
+         UInt   rG    = gregOfRexRM(pfx, modrm);
+         UInt   rV    = getVexNvvvv(pfx);
+         IRTemp ctrlV = newTemp(Ity_V128);
+         if (epartIsReg(modrm)) {
+            UInt rE = eregOfRexRM(pfx, modrm);
+            delta += 1;
+            DIP("vpermilps %s,%s,%s\n",
+                nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
+            assign(ctrlV, getXMMReg(rE));
+         } else {
+            addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
+            delta += alen;
+            DIP("vpermilps %s,%s,%s\n",
+                dis_buf, nameXMMReg(rV), nameXMMReg(rG));
+            assign(ctrlV, loadLE(Ity_V128, mkexpr(addr)));
+         }
+         IRTemp dataV = newTemp(Ity_V128);
+         assign(dataV, getXMMReg(rV));
+         IRTemp resV = math_PERMILPS_VAR_128(dataV, ctrlV);
+         putYMMRegLoAndZU(rG, mkexpr(resV));
+         *uses_vvvv = True;
+         goto decode_success;
+      }
+      /* VPERMILPS ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0C /r */
+      if (have66noF2noF3(pfx)
+          && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
+         UChar  modrm = getUChar(delta);
+         UInt   rG    = gregOfRexRM(pfx, modrm);
+         UInt   rV    = getVexNvvvv(pfx);
+         IRTemp ctrlV = newTemp(Ity_V256);
+         if (epartIsReg(modrm)) {
+            UInt rE = eregOfRexRM(pfx, modrm);
+            delta += 1;
+            DIP("vpermilps %s,%s,%s\n",
+                nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
+            assign(ctrlV, getYMMReg(rE));
+         } else {
+            addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
+            delta += alen;
+            DIP("vpermilps %s,%s,%s\n",
+                dis_buf, nameYMMReg(rV), nameYMMReg(rG));
+            assign(ctrlV, loadLE(Ity_V256, mkexpr(addr)));
+         }
+         IRTemp dataV = newTemp(Ity_V256);
+         assign(dataV, getYMMReg(rV));
+         IRTemp resV = math_PERMILPS_VAR_256(dataV, ctrlV);
+         putYMMReg(rG, mkexpr(resV));
+         *uses_vvvv = True;
+         goto decode_success;
+      }
+      break;
+
+   case 0x0D:
+      /* VPERMILPD xmm3/m128, xmm2, xmm1 = VEX.NDS.128.66.0F38.W0 0D /r */
+      if (have66noF2noF3(pfx)
+          && 0==getVexL(pfx)/*128*/ && 0==getRexW(pfx)/*W0*/) {
+         UChar  modrm = getUChar(delta);
+         UInt   rG    = gregOfRexRM(pfx, modrm);
+         UInt   rV    = getVexNvvvv(pfx);
+         IRTemp ctrlV = newTemp(Ity_V128);
+         if (epartIsReg(modrm)) {
+            UInt rE = eregOfRexRM(pfx, modrm);
+            delta += 1;
+            DIP("vpermilpd %s,%s,%s\n",
+                nameXMMReg(rE), nameXMMReg(rV), nameXMMReg(rG));
+            assign(ctrlV, getXMMReg(rE));
+         } else {
+            addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
+            delta += alen;
+            DIP("vpermilpd %s,%s,%s\n",
+                dis_buf, nameXMMReg(rV), nameXMMReg(rG));
+            assign(ctrlV, loadLE(Ity_V128, mkexpr(addr)));
+         }
+         IRTemp dataV = newTemp(Ity_V128);
+         assign(dataV, getXMMReg(rV));
+         IRTemp resV = math_PERMILPD_VAR_128(dataV, ctrlV);
+         putYMMRegLoAndZU(rG, mkexpr(resV));
+         *uses_vvvv = True;
+         goto decode_success;
+      }
+      /* VPERMILPD ymm3/m256, ymm2, ymm1 = VEX.NDS.256.66.0F38.W0 0D /r */
+      if (have66noF2noF3(pfx)
+          && 1==getVexL(pfx)/*256*/ && 0==getRexW(pfx)/*W0*/) {
+         UChar  modrm = getUChar(delta);
+         UInt   rG    = gregOfRexRM(pfx, modrm);
+         UInt   rV    = getVexNvvvv(pfx);
+         IRTemp ctrlV = newTemp(Ity_V256);
+         if (epartIsReg(modrm)) {
+            UInt rE = eregOfRexRM(pfx, modrm);
+            delta += 1;
+            DIP("vpermilpd %s,%s,%s\n",
+                nameYMMReg(rE), nameYMMReg(rV), nameYMMReg(rG));
+            assign(ctrlV, getYMMReg(rE));
+         } else {
+            addr = disAMode( &alen, vbi, pfx, delta, dis_buf, 1 );
+            delta += alen;
+            DIP("vpermilpd %s,%s,%s\n",
+                dis_buf, nameYMMReg(rV), nameYMMReg(rG));
+            assign(ctrlV, loadLE(Ity_V256, mkexpr(addr)));
+         }
+         IRTemp dataV = newTemp(Ity_V256);
+         assign(dataV, getYMMReg(rV));
+         IRTemp resV = math_PERMILPD_VAR_256(dataV, ctrlV);
+         putYMMReg(rG, mkexpr(resV));
+         *uses_vvvv = True;
+         goto decode_success;
+      }
+      break;
+
     case 0x18:
        /* VBROADCASTSS m32, xmm1 = VEX.128.66.0F38.WIG 18 /r */
        if (have66noF2noF3(pfx)
diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c

index 11d7d9b92ee0dcca0acd5f7256945a9489dc6bc8..c8625f55885979c74d2d12efaa97fec1da0883c7 100644 (file)
--- a/VEX/priv/host_amd64_isel.c
+++ b/VEX/priv/host_amd64_isel.c
@@ -3243,6 +3243,8 @@ static HReg iselVecExpr_wrk ( ISelEnv* env, IRExpr* e )
                             goto do_SseAssistedBinary;
        case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2;
                             goto do_SseAssistedBinary;
+      case Iop_Perm32x4:   fn = (HWord)h_generic_calc_Perm32x4;
+                           goto do_SseAssistedBinary;
        case Iop_QNarrowBin32Sto16Ux8:
                             fn = (HWord)h_generic_calc_QNarrowBin32Sto16Ux8;
                             goto do_SseAssistedBinary;
diff --git a/VEX/priv/host_generic_simd128.c b/VEX/priv/host_generic_simd128.c

index 6e1100c303986af0908ada1c9c753292094cdd55..14d454666c4405c279590f40201e319d1c34cb19 100644 (file)
--- a/VEX/priv/host_generic_simd128.c
+++ b/VEX/priv/host_generic_simd128.c
@@ -358,6 +358,16 @@ void VEX_REGPARM(3)
     res->w16[7] = narrow32to16(argL->w32[3]);
  }
  
+void VEX_REGPARM(3)
+     h_generic_calc_Perm32x4 ( /*OUT*/V128* res,
+                               V128* argL, V128* argR )
+{
+   res->w32[0] = argL->w32[ argR->w32[0] & 3 ];
+   res->w32[1] = argL->w32[ argR->w32[1] & 3 ];
+   res->w32[2] = argL->w32[ argR->w32[2] & 3 ];
+   res->w32[3] = argL->w32[ argR->w32[3] & 3 ];
+}
+
  
  /*---------------------------------------------------------------*/
  /*--- end                              host_generic_simd128.c ---*/
diff --git a/VEX/priv/host_generic_simd128.h b/VEX/priv/host_generic_simd128.h

index 6f9cc97bf57d4a23b6422a7b1cd8965aaff209b6..c5a76357847449784fdad75ec2306b70a04af6cf 100644 (file)
--- a/VEX/priv/host_generic_simd128.h
+++ b/VEX/priv/host_generic_simd128.h
@@ -83,6 +83,9 @@ extern VEX_REGPARM(3)
         void h_generic_calc_NarrowBin32to16x8
                                        ( /*OUT*/V128*, V128*, V128* );
  
+extern VEX_REGPARM(3)
+       void h_generic_calc_Perm32x4   ( /*OUT*/V128*, V128*, V128* );
+
  #endif /* ndef __VEX_HOST_GENERIC_SIMD128_H */
  
  /*---------------------------------------------------------------*/
diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c

index efbe3c210a0adea48ca3e01953e45fb5bf5771c7..445b7bf879a1b613017f8662f0c06ed0cf1670d4 100644 (file)
--- a/VEX/priv/ir_defs.c
+++ b/VEX/priv/ir_defs.c
@@ -925,6 +925,7 @@ void ppIROp ( IROp op )
        case Iop_ExtractV128: vex_printf("ExtractV128"); return;
  
        case Iop_Perm8x16: vex_printf("Perm8x16"); return;
+      case Iop_Perm32x4: vex_printf("Perm32x4"); return;
        case Iop_Reverse16_8x16: vex_printf("Reverse16_8x16"); return;
        case Iop_Reverse32_8x16: vex_printf("Reverse32_8x16"); return;
        case Iop_Reverse32_16x8: vex_printf("Reverse32_16x8"); return;
@@ -2579,7 +2580,7 @@ void typeOfPrimop ( IROp op,
        case Iop_InterleaveOddLanes8x16: case Iop_InterleaveEvenLanes8x16:
        case Iop_InterleaveOddLanes16x8: case Iop_InterleaveEvenLanes16x8:
        case Iop_InterleaveOddLanes32x4: case Iop_InterleaveEvenLanes32x4:
-      case Iop_Perm8x16:
+      case Iop_Perm8x16: case Iop_Perm32x4:
        case Iop_Recps32Fx4:
        case Iop_Rsqrts32Fx4:
           BINARY(Ity_V128,Ity_V128, Ity_V128);
diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h

index cda42181b7a583888a0c1c2a86cc206c51b31d38..06dc82e69a9a1afa2b0f9d4fdfb915139ce1004d 100644 (file)
--- a/VEX/pub/libvex_ir.h
+++ b/VEX/pub/libvex_ir.h
@@ -1415,6 +1415,7 @@ typedef
           argR[i] values may only be in the range 0 .. 15, else behaviour
           is undefined. */
        Iop_Perm8x16,
+      Iop_Perm32x4, /* ditto, except argR values are restricted to 0 .. 3 */
  
        /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
           See floating-point equiwalents for details. */
author	Julian Seward <jseward@acm.org>
	Wed, 20 Jun 2012 11:46:19 +0000 (11:46 +0000)
committer	Julian Seward <jseward@acm.org>
	Wed, 20 Jun 2012 11:46:19 +0000 (11:46 +0000)
VEX/priv/guest_amd64_toIR.c		patch \| blob \| blame \| history
VEX/priv/host_amd64_isel.c		patch \| blob \| blame \| history
VEX/priv/host_generic_simd128.c		patch \| blob \| blame \| history
VEX/priv/host_generic_simd128.h		patch \| blob \| blame \| history
VEX/priv/ir_defs.c		patch \| blob \| blame \| history
VEX/pub/libvex_ir.h		patch \| blob \| blame \| history