From 51b91b19a390d390a5eaf25457c9bd82f699831a Mon Sep 17 00:00:00 2001 From: Julian Seward Date: Fri, 23 Jul 2010 21:23:25 +0000 Subject: [PATCH] Support the SSE4 insn 'roundss' in 32-bit mode. Lack of this was causing problems for people running 32-bit apps on MacOSX 10.6 on newer hardware. Fixes #241377. git-svn-id: svn://svn.valgrind.org/vex/trunk@1987 --- VEX/priv/guest_x86_toIR.c | 61 +++++++++++++++++++++++++++++++++++++++ VEX/priv/host_x86_isel.c | 19 ++++++++++++ 2 files changed, 80 insertions(+) diff --git a/VEX/priv/guest_x86_toIR.c b/VEX/priv/guest_x86_toIR.c index 1dcdf627b3..2bf234a772 100644 --- a/VEX/priv/guest_x86_toIR.c +++ b/VEX/priv/guest_x86_toIR.c @@ -12522,6 +12522,67 @@ DisResult disInstr_X86_WRK ( /* --- end of the SSSE3 decoder. --- */ /* ---------------------------------------------------- */ + /* ---------------------------------------------------- */ + /* --- start of the SSE4 decoder --- */ + /* ---------------------------------------------------- */ + + /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1 + (Partial implementation only -- only deal with cases where + the rounding mode is specified directly by the immediate byte.) + 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1 + (Limitations ditto) + */ + if (sz == 2 + && insn[0] == 0x0F && insn[1] == 0x3A + && (/*insn[2] == 0x0B || */insn[2] == 0x0A)) { + + Bool isD = insn[2] == 0x0B; + IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32); + IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32); + Int imm = 0; + + modrm = insn[3]; + + if (epartIsReg(modrm)) { + assign( src, + isD ? getXMMRegLane64F( eregOfRM(modrm), 0 ) + : getXMMRegLane32F( eregOfRM(modrm), 0 ) ); + imm = insn[3+1]; + if (imm & ~3) goto decode_failure; + delta += 3+1+1; + DIP( "rounds%c $%d,%s,%s\n", + isD ? 'd' : 's', + imm, nameXMMReg( eregOfRM(modrm) ), + nameXMMReg( gregOfRM(modrm) ) ); + } else { + addr = disAMode( &alen, sorb, delta+3, dis_buf ); + assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) )); + imm = insn[3+alen]; + if (imm & ~3) goto decode_failure; + delta += 3+alen+1; + DIP( "roundsd $%d,%s,%s\n", + imm, dis_buf, nameXMMReg( gregOfRM(modrm) ) ); + } + + /* (imm & 3) contains an Intel-encoded rounding mode. Because + that encoding is the same as the encoding for IRRoundingMode, + we can use that value directly in the IR as a rounding + mode. */ + assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, + mkU32(imm & 3), mkexpr(src)) ); + + if (isD) + putXMMRegLane64F( gregOfRM(modrm), 0, mkexpr(res) ); + else + putXMMRegLane32F( gregOfRM(modrm), 0, mkexpr(res) ); + + goto decode_success; + } + + /* ---------------------------------------------------- */ + /* --- end of the SSE4 decoder --- */ + /* ---------------------------------------------------- */ + after_sse_decoders: /* ---------------------------------------------------- */ diff --git a/VEX/priv/host_x86_isel.c b/VEX/priv/host_x86_isel.c index 02d83d8af8..e1242d69b6 100644 --- a/VEX/priv/host_x86_isel.c +++ b/VEX/priv/host_x86_isel.c @@ -2776,6 +2776,25 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) return dst; } + if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) { + HReg rf = iselFltExpr(env, e->Iex.Binop.arg2); + HReg dst = newVRegF(env); + + /* rf now holds the value to be rounded. The first thing to do + is set the FPU's rounding mode accordingly. */ + + /* Set host rounding mode */ + set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); + + /* grndint %rf, %dst */ + addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst)); + + /* Restore default FPU rounding. */ + set_FPU_rounding_default( env ); + + return dst; + } + ppIRExpr(e); vpanic("iselFltExpr_wrk"); } -- 2.47.3