goto decode_success;
}
+ /* 66 0f 38 2B /r = PACKUSDW xmm1, xmm2/m128
+ 2x 32x4 S->U saturating narrow from xmm2/m128 to xmm1 */
+ if ( have66noF2noF3( pfx )
+ && sz == 2
+ && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x2B ) {
+
+ modrm = insn[3];
+
+ IRTemp argL = newTemp(Ity_V128);
+ IRTemp argR = newTemp(Ity_V128);
+
+ if ( epartIsReg(modrm) ) {
+ assign( argL, getXMMReg( eregOfRexRM(pfx, modrm) ) );
+ delta += 3+1;
+ DIP( "packusdw %s,%s\n",
+ nameXMMReg( eregOfRexRM(pfx, modrm) ),
+ nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ } else {
+ addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
+ gen_SEGV_if_not_16_aligned( addr );
+ assign( argL, loadLE( Ity_V128, mkexpr(addr) ));
+ delta += 3+alen;
+ DIP( "packusdw %s,%s\n",
+ dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) );
+ }
+
+ assign(argR, getXMMReg( gregOfRexRM(pfx, modrm) ));
+
+ putXMMReg( gregOfRexRM(pfx, modrm),
+ binop( Iop_QNarrow32Sto16Ux8, mkexpr(argL), mkexpr(argR)) );
+
+ goto decode_success;
+ }
+
/* ---------------------------------------------------- */
/* --- end of the SSE4 decoder --- */
/* ---------------------------------------------------- */
goto do_SseAssistedBinary;
case Iop_CmpGT64Sx2: fn = (HWord)h_generic_calc_CmpGT64Sx2;
goto do_SseAssistedBinary;
+ case Iop_QNarrow32Sto16Ux8:
+ fn = (HWord)h_generic_calc_QNarrow32Sto16Ux8;
+ goto do_SseAssistedBinary;
do_SseAssistedBinary: {
/* RRRufff! RRRufff code is what we're generating here. Oh
well. */
return toUChar(((Char)v) >> n);
}
+static inline UShort qnarrow32Sto16U ( UInt xx0 )
+{
+ Int xx = (Int)xx0;
+ if (xx < 0) xx = 0;
+ if (xx > 65535) xx = 65535;
+ return (UShort)xx;
+}
+
void h_generic_calc_Mul32x4 ( /*OUT*/V128* res,
V128* argL, V128* argR )
{
res->w8[15] = sar8(argL->w8[15], nn);
}
+void h_generic_calc_QNarrow32Sto16Ux8 ( /*OUT*/V128* res,
+ V128* argL, V128* argR )
+{
+ res->w16[0] = qnarrow32Sto16U(argR->w32[0]);
+ res->w16[1] = qnarrow32Sto16U(argR->w32[1]);
+ res->w16[2] = qnarrow32Sto16U(argR->w32[2]);
+ res->w16[3] = qnarrow32Sto16U(argR->w32[3]);
+ res->w16[4] = qnarrow32Sto16U(argL->w32[0]);
+ res->w16[5] = qnarrow32Sto16U(argL->w32[1]);
+ res->w16[6] = qnarrow32Sto16U(argL->w32[2]);
+ res->w16[7] = qnarrow32Sto16U(argL->w32[3]);
+}
+
+
/*---------------------------------------------------------------*/
/*--- end host_generic_simd128.c ---*/
/*---------------------------------------------------------------*/
extern void h_generic_calc_SarN64x2 ( /*OUT*/V128*, V128*, UInt );
extern void h_generic_calc_SarN8x16 ( /*OUT*/V128*, V128*, UInt );
+extern void h_generic_calc_QNarrow32Sto16Ux8
+ ( /*OUT*/V128*, V128*, V128* );
+
#endif /* ndef __VEX_HOST_GENERIC_SIMD128_H */
case Iop_Narrow16x8: vex_printf("Narrow16x8"); return;
case Iop_Narrow32x4: vex_printf("Narrow32x4"); return;
+ case Iop_QNarrow32Sto16Ux8: vex_printf("QNarrow32Sto16Ux8"); return;
case Iop_QNarrow16Sto8Ux16: vex_printf("QNarrow16Sto8Ux16"); return;
case Iop_QNarrow32Uto16Ux8: vex_printf("QNarrow32Uto16Ux8"); return;
case Iop_QNarrow16Sto8Sx16: vex_printf("QNarrow16Sto8Sx16"); return;
case Iop_Sar8x16: case Iop_Sar16x8: case Iop_Sar32x4: case Iop_Sar64x2:
case Iop_Sal8x16: case Iop_Sal16x8: case Iop_Sal32x4: case Iop_Sal64x2:
case Iop_Rol8x16: case Iop_Rol16x8: case Iop_Rol32x4:
- case Iop_QNarrow16Sto8Ux16:
+ case Iop_QNarrow16Sto8Ux16: case Iop_QNarrow32Sto16Ux8:
case Iop_QNarrow16Sto8Sx16: case Iop_QNarrow32Sto16Sx8:
case Iop_QNarrow32Uto16Ux8:
case Iop_Narrow16x8: case Iop_Narrow32x4:
/* NARROWING -- narrow 2xV128 into 1xV128, hi half from left arg */
/* See comments above w.r.t. U vs S issues in saturated narrowing. */
- Iop_QNarrow16Sto8Ux16,
+ Iop_QNarrow16Sto8Ux16, Iop_QNarrow32Sto16Ux8,
Iop_QNarrow16Sto8Sx16, Iop_QNarrow32Sto16Sx8,
Iop_QNarrow16Uto8Ux16, Iop_QNarrow32Uto16Ux8,
Iop_Narrow16x8, Iop_Narrow32x4,