From: Julian Seward Date: Wed, 27 Dec 2006 04:21:05 +0000 (+0000) Subject: Merge r1702 (x86 front end: Implement MASKMOVQ and MASKMOVDQU) X-Git-Tag: svn/VALGRIND_3_2_3^2~24 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2badeee1533c0440cb9afa6c5cbd017141cd2dfe;p=thirdparty%2Fvalgrind.git Merge r1702 (x86 front end: Implement MASKMOVQ and MASKMOVDQU) git-svn-id: svn://svn.valgrind.org/vex/branches/VEX_3_2_BRANCH@1703 --- diff --git a/VEX/priv/guest-x86/toIR.c b/VEX/priv/guest-x86/toIR.c index 7bed907463..3239cf8aa3 100644 --- a/VEX/priv/guest-x86/toIR.c +++ b/VEX/priv/guest-x86/toIR.c @@ -5679,6 +5679,38 @@ UInt dis_MMX ( Bool* decode_ok, UChar sorb, Int sz, Int delta ) break; } + case 0xF7: { + IRTemp addr = newTemp(Ity_I32); + IRTemp regD = newTemp(Ity_I64); + IRTemp regM = newTemp(Ity_I64); + IRTemp mask = newTemp(Ity_I64); + IRTemp olddata = newTemp(Ity_I64); + IRTemp newdata = newTemp(Ity_I64); + + modrm = getIByte(delta); + if (sz != 4 || (!epartIsReg(modrm))) + goto mmx_decode_failure; + delta++; + + assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) )); + assign( regM, getMMXReg( eregOfRM(modrm) )); + assign( regD, getMMXReg( gregOfRM(modrm) )); + assign( mask, binop(Iop_SarN8x8, mkexpr(regM), mkU8(7)) ); + assign( olddata, loadLE( Ity_I64, mkexpr(addr) )); + assign( newdata, + binop(Iop_Or64, + binop(Iop_And64, + mkexpr(regD), + mkexpr(mask) ), + binop(Iop_And64, + mkexpr(olddata), + unop(Iop_Not64, mkexpr(mask)))) ); + storeLE( mkexpr(addr), mkexpr(newdata) ); + DIP("maskmovq %s,%s\n", nameMMXReg( eregOfRM(modrm) ), + nameMMXReg( gregOfRM(modrm) ) ); + break; + } + /* --- MMX decode failure --- */ default: mmx_decode_failure: @@ -7670,6 +7702,16 @@ DisResult disInstr_X86_WRK ( goto decode_success; } + /* ***--- this is an MMX class insn introduced in SSE1 ---*** */ + /* 0F F7 = MASKMOVQ -- 8x8 masked store */ + if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xF7) { + Bool ok = False; + delta = dis_MMX( &ok, sorb, sz, delta+1 ); + if (!ok) + goto decode_failure; + goto decode_success; + } + /* 0F 5F = MAXPS -- max 32Fx4 from R/M to R */ if (sz == 4 && insn[0] == 0x0F && insn[1] == 0x5F) { delta = dis_SSE_E_to_G_all( sorb, delta+2, "maxps", Iop_Max32Fx4 ); @@ -9314,6 +9356,50 @@ DisResult disInstr_X86_WRK ( /* else fall through */ } + /* 66 0F F7 = MASKMOVDQU -- store selected bytes of double quadword */ + if (insn[0] == 0x0F && insn[1] == 0xF7) { + modrm = getIByte(delta+2); + if (sz == 2 && epartIsReg(modrm)) { + IRTemp regD = newTemp(Ity_V128); + IRTemp mask = newTemp(Ity_V128); + IRTemp olddata = newTemp(Ity_V128); + IRTemp newdata = newTemp(Ity_V128); + addr = newTemp(Ity_I32); + + assign( addr, handleSegOverride( sorb, getIReg(4, R_EDI) )); + assign( regD, getXMMReg( gregOfRM(modrm) )); + + /* Unfortunately can't do the obvious thing with SarN8x16 + here since that can't be re-emitted as SSE2 code - no such + insn. */ + assign( + mask, + binop(Iop_64HLtoV128, + binop(Iop_SarN8x8, + getXMMRegLane64( eregOfRM(modrm), 1 ), + mkU8(7) ), + binop(Iop_SarN8x8, + getXMMRegLane64( eregOfRM(modrm), 0 ), + mkU8(7) ) )); + assign( olddata, loadLE( Ity_V128, mkexpr(addr) )); + assign( newdata, + binop(Iop_OrV128, + binop(Iop_AndV128, + mkexpr(regD), + mkexpr(mask) ), + binop(Iop_AndV128, + mkexpr(olddata), + unop(Iop_NotV128, mkexpr(mask)))) ); + storeLE( mkexpr(addr), mkexpr(newdata) ); + + delta += 2+1; + DIP("maskmovdqu %s,%s\n", nameXMMReg( eregOfRM(modrm) ), + nameXMMReg( gregOfRM(modrm) ) ); + goto decode_success; + } + /* else fall through */ + } + /* 66 0F E7 = MOVNTDQ -- for us, just a plain SSE store. */ if (insn[0] == 0x0F && insn[1] == 0xE7) { modrm = getIByte(delta+2); diff --git a/VEX/priv/host-generic/h_generic_simd64.c b/VEX/priv/host-generic/h_generic_simd64.c index cd06da9e0b..4f9bfe81b8 100644 --- a/VEX/priv/host-generic/h_generic_simd64.c +++ b/VEX/priv/host-generic/h_generic_simd64.c @@ -299,6 +299,11 @@ static inline UChar qnarrow16Uto8 ( UShort xx0 ) /* shifts: we don't care about out-of-range ones, since that is dealt with at a higher level. */ +static inline UChar sar8 ( UChar v, UInt n ) +{ + return toUChar(((Char)v) >> n); +} + static inline UShort shl16 ( UShort v, UInt n ) { return toUShort(v << n); @@ -868,6 +873,22 @@ ULong h_generic_calc_SarN16x4 ( ULong xx, UInt nn ) ); } +ULong h_generic_calc_SarN8x8 ( ULong xx, UInt nn ) +{ + /* vassert(nn < 8); */ + nn &= 7; + return mk8x8( + sar8( sel8x8_7(xx), nn ), + sar8( sel8x8_6(xx), nn ), + sar8( sel8x8_5(xx), nn ), + sar8( sel8x8_4(xx), nn ), + sar8( sel8x8_3(xx), nn ), + sar8( sel8x8_2(xx), nn ), + sar8( sel8x8_1(xx), nn ), + sar8( sel8x8_0(xx), nn ) + ); +} + /* ------------ Averaging ------------ */ ULong h_generic_calc_Avg8Ux8 ( ULong xx, ULong yy ) diff --git a/VEX/priv/host-generic/h_generic_simd64.h b/VEX/priv/host-generic/h_generic_simd64.h index 4b9532f91e..93f9e904bf 100644 --- a/VEX/priv/host-generic/h_generic_simd64.h +++ b/VEX/priv/host-generic/h_generic_simd64.h @@ -114,6 +114,7 @@ extern ULong h_generic_calc_ShlN32x2 ( ULong, UInt ); extern ULong h_generic_calc_ShrN16x4 ( ULong, UInt ); extern ULong h_generic_calc_ShrN32x2 ( ULong, UInt ); +extern ULong h_generic_calc_SarN8x8 ( ULong, UInt ); extern ULong h_generic_calc_SarN16x4 ( ULong, UInt ); extern ULong h_generic_calc_SarN32x2 ( ULong, UInt ); diff --git a/VEX/priv/host-x86/isel.c b/VEX/priv/host-x86/isel.c index 22e3ef6524..f10cbca244 100644 --- a/VEX/priv/host-x86/isel.c +++ b/VEX/priv/host-x86/isel.c @@ -2223,6 +2223,8 @@ static void iselInt64Expr_wrk ( HReg* rHi, HReg* rLo, ISelEnv* env, IRExpr* e ) fn = (HWord)h_generic_calc_SarN32x2; goto shifty; case Iop_SarN16x4: fn = (HWord)h_generic_calc_SarN16x4; goto shifty; + case Iop_SarN8x8: + fn = (HWord)h_generic_calc_SarN8x8; goto shifty; shifty: { /* Note: the following assumes all helpers are of signature diff --git a/VEX/priv/ir/irdefs.c b/VEX/priv/ir/irdefs.c index 5598801e32..9eddf44a8b 100644 --- a/VEX/priv/ir/irdefs.c +++ b/VEX/priv/ir/irdefs.c @@ -340,6 +340,7 @@ void ppIROp ( IROp op ) case Iop_ShlN32x2: vex_printf("ShlN32x2"); return; case Iop_ShrN16x4: vex_printf("ShrN16x4"); return; case Iop_ShrN32x2: vex_printf("ShrN32x2"); return; + case Iop_SarN8x8: vex_printf("SarN8x8"); return; case Iop_SarN16x4: vex_printf("SarN16x4"); return; case Iop_SarN32x2: vex_printf("SarN32x2"); return; case Iop_QNarrow16Ux4: vex_printf("QNarrow16Ux4"); return; @@ -1471,7 +1472,7 @@ void typeOfPrimop ( IROp op, case Iop_ShlN32x2: case Iop_ShlN16x4: case Iop_ShrN32x2: case Iop_ShrN16x4: - case Iop_SarN32x2: case Iop_SarN16x4: + case Iop_SarN32x2: case Iop_SarN16x4: case Iop_SarN8x8: BINARY(Ity_I64,Ity_I8, Ity_I64); case Iop_Shl8: case Iop_Shr8: case Iop_Sar8: diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index 94dfdd8d93..92f582d8b8 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -494,9 +494,9 @@ typedef Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2, /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */ - Iop_ShlN16x4, Iop_ShlN32x2, - Iop_ShrN16x4, Iop_ShrN32x2, - Iop_SarN16x4, Iop_SarN32x2, + Iop_ShlN16x4, Iop_ShlN32x2, + Iop_ShrN16x4, Iop_ShrN32x2, + Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2, /* NARROWING -- narrow 2xI64 into 1xI64, hi half from left arg */ Iop_QNarrow16Ux4,