From: Julian Seward Date: Mon, 14 Feb 2011 13:19:51 +0000 (+0000) Subject: Merge from trunk, r2072 (Implement ROUNDPD and ROUNDPS (imm rounding X-Git-Tag: svn/VALGRIND_3_6_1^2~13 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3545b370c19c2c637f74390bddcd039e461b44b3;p=thirdparty%2Fvalgrind.git Merge from trunk, r2072 (Implement ROUNDPD and ROUNDPS (imm rounding mode only).) git-svn-id: svn://svn.valgrind.org/vex/branches/VEX_3_6_BRANCH@2090 --- diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c index 79b1269211..8b6d4fc278 100644 --- a/VEX/priv/guest_amd64_toIR.c +++ b/VEX/priv/guest_amd64_toIR.c @@ -15230,7 +15230,8 @@ DisResult disInstr_AMD64_WRK ( imm = insn[3+alen]; if (imm & ~3) goto decode_failure; delta += 3+alen+1; - DIP( "roundsd $%d,%s,%s\n", + DIP( "rounds%c $%d,%s,%s\n", + isD ? 'd' : 's', imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); } @@ -15249,6 +15250,133 @@ DisResult disInstr_AMD64_WRK ( goto decode_success; } + + /* 66 0F 3A 09 /r ib = ROUNDPD imm8, xmm2/m128, xmm1 + (Partial implementation only -- only deal with cases where + the rounding mode is specified directly by the immediate byte.) + */ + if (have66noF2noF3(pfx) + && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x09) { + + IRTemp src0 = newTemp(Ity_F64); + IRTemp src1 = newTemp(Ity_F64); + IRTemp res0 = newTemp(Ity_F64); + IRTemp res1 = newTemp(Ity_F64); + Int imm = 0; + + modrm = insn[3]; + + if (epartIsReg(modrm)) { + assign( src0, + getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) ); + assign( src1, + getXMMRegLane64F( eregOfRexRM(pfx, modrm), 1 ) ); + imm = insn[3+1]; + if (imm & ~3) goto decode_failure; + delta += 3+1+1; + DIP( "roundpd $%d,%s,%s\n", + imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), + nameXMMReg( gregOfRexRM(pfx, modrm) ) ); + } else { + addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); + gen_SEGV_if_not_16_aligned(addr); + assign( src0, loadLE(Ity_F64, + binop(Iop_Add64, mkexpr(addr), mkU64(0) ))); + assign( src1, loadLE(Ity_F64, + binop(Iop_Add64, mkexpr(addr), mkU64(8) ))); + imm = insn[3+alen]; + if (imm & ~3) goto decode_failure; + delta += 3+alen+1; + DIP( "roundpd $%d,%s,%s\n", + imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); + } + + /* (imm & 3) contains an Intel-encoded rounding mode. Because + that encoding is the same as the encoding for IRRoundingMode, + we can use that value directly in the IR as a rounding + mode. */ + assign(res0, binop(Iop_RoundF64toInt, mkU32(imm & 3), mkexpr(src0)) ); + assign(res1, binop(Iop_RoundF64toInt, mkU32(imm & 3), mkexpr(src1)) ); + + putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) ); + putXMMRegLane64F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) ); + + goto decode_success; + } + + + /* 66 0F 3A 08 /r ib = ROUNDPS imm8, xmm2/m128, xmm1 + (Partial implementation only -- only deal with cases where + the rounding mode is specified directly by the immediate byte.) + */ + if (have66noF2noF3(pfx) + && sz == 2 + && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x08) { + + IRTemp src0 = newTemp(Ity_F32); + IRTemp src1 = newTemp(Ity_F32); + IRTemp src2 = newTemp(Ity_F32); + IRTemp src3 = newTemp(Ity_F32); + IRTemp res0 = newTemp(Ity_F32); + IRTemp res1 = newTemp(Ity_F32); + IRTemp res2 = newTemp(Ity_F32); + IRTemp res3 = newTemp(Ity_F32); + Int imm = 0; + + modrm = insn[3]; + + if (epartIsReg(modrm)) { + assign( src0, + getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) ); + assign( src1, + getXMMRegLane32F( eregOfRexRM(pfx, modrm), 1 ) ); + assign( src2, + getXMMRegLane32F( eregOfRexRM(pfx, modrm), 2 ) ); + assign( src3, + getXMMRegLane32F( eregOfRexRM(pfx, modrm), 3 ) ); + imm = insn[3+1]; + if (imm & ~3) goto decode_failure; + delta += 3+1+1; + DIP( "roundps $%d,%s,%s\n", + imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), + nameXMMReg( gregOfRexRM(pfx, modrm) ) ); + } else { + addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); + gen_SEGV_if_not_16_aligned(addr); + assign( src0, loadLE(Ity_F32, + binop(Iop_Add64, mkexpr(addr), mkU64(0) ))); + assign( src1, loadLE(Ity_F32, + binop(Iop_Add64, mkexpr(addr), mkU64(4) ))); + assign( src2, loadLE(Ity_F32, + binop(Iop_Add64, mkexpr(addr), mkU64(8) ))); + assign( src3, loadLE(Ity_F32, + binop(Iop_Add64, mkexpr(addr), mkU64(12) ))); + imm = insn[3+alen]; + if (imm & ~3) goto decode_failure; + delta += 3+alen+1; + DIP( "roundps $%d,%s,%s\n", + imm, dis_buf, nameXMMReg( gregOfRexRM(pfx, modrm) ) ); + } + + /* (imm & 3) contains an Intel-encoded rounding mode. Because + that encoding is the same as the encoding for IRRoundingMode, + we can use that value directly in the IR as a rounding + mode. */ + assign(res0, binop(Iop_RoundF32toInt, mkU32(imm & 3), mkexpr(src0)) ); + assign(res1, binop(Iop_RoundF32toInt, mkU32(imm & 3), mkexpr(src1)) ); + assign(res2, binop(Iop_RoundF32toInt, mkU32(imm & 3), mkexpr(src2)) ); + assign(res3, binop(Iop_RoundF32toInt, mkU32(imm & 3), mkexpr(src3)) ); + + putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res0) ); + putXMMRegLane32F( gregOfRexRM(pfx, modrm), 1, mkexpr(res1) ); + putXMMRegLane32F( gregOfRexRM(pfx, modrm), 2, mkexpr(res2) ); + putXMMRegLane32F( gregOfRexRM(pfx, modrm), 3, mkexpr(res3) ); + + goto decode_success; + } + + /* F3 0F BD -- LZCNT (count leading zeroes. An AMD extension, which we can only decode if we're sure this is an AMD cpu that supports LZCNT, since otherwise it's BSR, which behaves