From: Julian Seward Date: Wed, 19 Oct 2011 14:50:27 +0000 (+0000) Subject: Implement SSE4.1 PMULUDQ. Fixes #280290. ** MERGE TO AVX ** X-Git-Tag: svn/VALGRIND_3_7_0^2~11 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2d56b3bc54a0d066b110217106b4b7fec9630143;p=thirdparty%2Fvalgrind.git Implement SSE4.1 PMULUDQ. Fixes #280290. ** MERGE TO AVX ** git-svn-id: svn://svn.valgrind.org/vex/trunk@2217 --- diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c index 7b57f305ff..c6ad6a1ed4 100644 --- a/VEX/priv/guest_amd64_toIR.c +++ b/VEX/priv/guest_amd64_toIR.c @@ -16095,6 +16095,46 @@ DisResult disInstr_AMD64_WRK ( goto decode_success; } + /* 66 0F 38 28 = PMULUDQ -- signed widening multiply of 32-lanes 0 x + 0 to form lower 64-bit half and lanes 2 x 2 to form upper 64-bit + half */ + /* This is a really poor translation -- could be improved if + performance critical. It's a copy-paste of PMULDQ, too. */ + if (have66noF2noF3(pfx) && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x38 && insn[2] == 0x28) { + IRTemp sV, dV; + IRTemp s3, s2, s1, s0, d3, d2, d1, d0; + sV = newTemp(Ity_V128); + dV = newTemp(Ity_V128); + s3 = s2 = s1 = s0 = d3 = d2 = d1 = d0 = IRTemp_INVALID; + t1 = newTemp(Ity_I64); + t0 = newTemp(Ity_I64); + modrm = insn[3]; + assign( dV, getXMMReg(gregOfRexRM(pfx,modrm)) ); + + if (epartIsReg(modrm)) { + assign( sV, getXMMReg(eregOfRexRM(pfx,modrm)) ); + delta += 3+1; + DIP("pmuldq %s,%s\n", nameXMMReg(eregOfRexRM(pfx,modrm)), + nameXMMReg(gregOfRexRM(pfx,modrm))); + } else { + addr = disAMode ( &alen, vbi, pfx, delta+3, dis_buf, 0 ); + assign( sV, loadLE(Ity_V128, mkexpr(addr)) ); + delta += 3+alen; + DIP("pmuldq %s,%s\n", dis_buf, + nameXMMReg(gregOfRexRM(pfx,modrm))); + } + + breakup128to32s( dV, &d3, &d2, &d1, &d0 ); + breakup128to32s( sV, &s3, &s2, &s1, &s0 ); + + assign( t0, binop( Iop_MullS32, mkexpr(d0), mkexpr(s0)) ); + putXMMRegLane64( gregOfRexRM(pfx,modrm), 0, mkexpr(t0) ); + assign( t1, binop( Iop_MullS32, mkexpr(d2), mkexpr(s2)) ); + putXMMRegLane64( gregOfRexRM(pfx,modrm), 1, mkexpr(t1) ); + goto decode_success; + } + /* ---------------------------------------------------- */ /* --- end of the SSE4 decoder --- */ /* ---------------------------------------------------- */