From: Julian Seward Date: Sun, 27 Jun 2010 09:06:34 +0000 (+0000) Subject: Implement ROUNDSS (partial implementation, in the case where X-Git-Tag: svn/VALGRIND_3_6_1^2~84 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7af731cb2ffc86a0055d8fb94cf43c8ff3894808;p=thirdparty%2Fvalgrind.git Implement ROUNDSS (partial implementation, in the case where the rounding mode is specified within the instruction itself). git-svn-id: svn://svn.valgrind.org/vex/trunk@1986 --- diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c index 4868478eb6..8fc2ff4029 100644 --- a/VEX/priv/guest_amd64_toIR.c +++ b/VEX/priv/guest_amd64_toIR.c @@ -15005,30 +15005,38 @@ DisResult disInstr_AMD64_WRK ( } - /* 66 0f 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1 (Partial - implementation only -- only deal with cases where the rounding - mode is specified directly by the immediate byte. */ - if (have66noF2noF3( pfx ) + /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1 + (Partial implementation only -- only deal with cases where + the rounding mode is specified directly by the immediate byte.) + 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1 + (Limitations ditto) + */ + if (have66noF2noF3(pfx) && sz == 2 - && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0B) { - - modrm = insn[3]; + && insn[0] == 0x0F && insn[1] == 0x3A + && (insn[2] == 0x0B || insn[2] == 0x0A)) { - IRTemp src = newTemp(Ity_F64); - IRTemp res = newTemp(Ity_F64); + Bool isD = insn[2] == 0x0B; + IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32); + IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32); Int imm = 0; + modrm = insn[3]; + if (epartIsReg(modrm)) { - assign( src, getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) ); + assign( src, + isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) + : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) ); imm = insn[3+1]; if (imm & ~3) goto decode_failure; delta += 3+1+1; - DIP( "roundsd $%d,%s,%s\n", + DIP( "rounds%c $%d,%s,%s\n", + isD ? 'd' : 's', imm, nameXMMReg( eregOfRexRM(pfx, modrm) ), nameXMMReg( gregOfRexRM(pfx, modrm) ) ); } else { addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 ); - assign( src, loadLE( Ity_F64, mkexpr(addr) )); + assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) )); imm = insn[3+alen]; if (imm & ~3) goto decode_failure; delta += 3+alen+1; @@ -15040,9 +15048,13 @@ DisResult disInstr_AMD64_WRK ( that encoding is the same as the encoding for IRRoundingMode, we can use that value directly in the IR as a rounding mode. */ - assign(res, binop(Iop_RoundF64toInt, mkU32(imm & 3), mkexpr(src)) ); + assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt, + mkU32(imm & 3), mkexpr(src)) ); - putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) ); + if (isD) + putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) ); + else + putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) ); goto decode_success; } diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c index eccf940853..1c3d012f7d 100644 --- a/VEX/priv/host_amd64_defs.c +++ b/VEX/priv/host_amd64_defs.c @@ -809,12 +809,14 @@ AMD64Instr* AMD64Instr_A87Free ( Int nregs ) vassert(nregs >= 1 && nregs <= 7); return i; } -AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush ) +AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB ) { AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); i->tag = Ain_A87PushPop; i->Ain.A87PushPop.addr = addr; i->Ain.A87PushPop.isPush = isPush; + i->Ain.A87PushPop.szB = szB; + vassert(szB == 8 || szB == 4); return i; } AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op ) @@ -1195,7 +1197,8 @@ void ppAMD64Instr ( AMD64Instr* i, Bool mode64 ) vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs ); break; case Ain_A87PushPop: - vex_printf(i->Ain.A87PushPop.isPush ? "fldl " : "fstpl "); + vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ", + i->Ain.A87PushPop.szB == 4 ? 's' : 'l'); ppAMD64AMode(i->Ain.A87PushPop.addr); break; case Ain_A87FpOp: @@ -2948,17 +2951,18 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i, goto done; case Ain_A87PushPop: + vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4); if (i->Ain.A87PushPop.isPush) { - /* Load from memory into %st(0): fldl amode */ + /* Load from memory into %st(0): flds/fldl amode */ *p++ = clearWBit( rexAMode_M(fake(0), i->Ain.A87PushPop.addr) ); - *p++ = 0xDD; + *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD; p = doAMode_M(p, fake(0)/*subopcode*/, i->Ain.A87PushPop.addr); } else { - /* Dump %st(0) to memory: fstpl amode */ + /* Dump %st(0) to memory: fstps/fstpl amode */ *p++ = clearWBit( rexAMode_M(fake(3), i->Ain.A87PushPop.addr) ); - *p++ = 0xDD; + *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD; p = doAMode_M(p, fake(3)/*subopcode*/, i->Ain.A87PushPop.addr); goto done; } diff --git a/VEX/priv/host_amd64_defs.h b/VEX/priv/host_amd64_defs.h index 21965cd31a..cf49ae148c 100644 --- a/VEX/priv/host_amd64_defs.h +++ b/VEX/priv/host_amd64_defs.h @@ -546,12 +546,13 @@ typedef Int nregs; /* 1 <= nregs <= 7 */ } A87Free; - /* Push a 64-bit FP value from memory onto the stack, or move - a value from the stack to memory and remove it from the - stack. */ + /* Push a 32- or 64-bit FP value from memory onto the stack, + or move a value from the stack to memory and remove it + from the stack. */ struct { AMD64AMode* addr; Bool isPush; + UChar szB; /* 4 or 8 */ } A87PushPop; /* Do an operation on the top-of-stack. This can be unary, in @@ -694,7 +695,7 @@ extern AMD64Instr* AMD64Instr_ACAS ( AMD64AMode* addr, UChar sz ); extern AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz ); extern AMD64Instr* AMD64Instr_A87Free ( Int nregs ); -extern AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush ); +extern AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB ); extern AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op ); extern AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr ); extern AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr ); diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c index ddf7503e96..ff38e17bb1 100644 --- a/VEX/priv/host_amd64_isel.c +++ b/VEX/priv/host_amd64_isel.c @@ -1776,11 +1776,11 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) /* one arg -> top of x87 stack */ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp)); - addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/)); + addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); /* other arg -> top of x87 stack */ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp)); - addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/)); + addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); switch (e->Iex.Triop.op) { case Iop_PRemC3210F64: @@ -2794,6 +2794,30 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) return dst; } + if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) { + AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); + HReg arg = iselFltExpr(env, e->Iex.Binop.arg2); + HReg dst = newVRegV(env); + + /* rf now holds the value to be rounded. The first thing to do + is set the FPU's rounding mode accordingly. */ + + /* Set host x87 rounding mode */ + set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); + + addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, arg, m8_rsp)); + addInstr(env, AMD64Instr_A87Free(1)); + addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 4)); + addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND)); + addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 4)); + addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, dst, m8_rsp)); + + /* Restore default x87 rounding. */ + set_FPU_rounding_default( env ); + + return dst; + } + ppIRExpr(e); vpanic("iselFltExpr_wrk"); } @@ -2937,9 +2961,9 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp)); addInstr(env, AMD64Instr_A87Free(1)); - addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/)); + addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND)); - addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/)); + addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); /* Restore default x87 rounding. */ @@ -2968,12 +2992,12 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) /* one arg -> top of x87 stack */ addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp)); - addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/)); + addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); /* other arg -> top of x87 stack */ addInstr(env, AMD64Instr_SseLdSt( False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp)); - addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/)); + addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); /* do it */ /* XXXROUNDINGFIXME */ @@ -3002,7 +3026,7 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) } /* save result */ - addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/)); + addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); return dst; } @@ -3067,15 +3091,15 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) Int nNeeded = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1; addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp)); addInstr(env, AMD64Instr_A87Free(nNeeded)); - addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/)); + addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); /* XXXROUNDINGFIXME */ /* set roundingmode here */ addInstr(env, AMD64Instr_A87FpOp(fpop)); if (e->Iex.Binop.op==Iop_TanF64) { /* get rid of the extra 1.0 that fptan pushes */ - addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/)); + addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); } - addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/)); + addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); return dst; } diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c index 0e6f2edadf..4d35401d5e 100644 --- a/VEX/priv/ir_defs.c +++ b/VEX/priv/ir_defs.c @@ -302,6 +302,7 @@ void ppIROp ( IROp op ) case Iop_F64toF32: vex_printf("F64toF32"); return; case Iop_RoundF64toInt: vex_printf("RoundF64toInt"); return; + case Iop_RoundF32toInt: vex_printf("RoundF32toInt"); return; case Iop_RoundF64toF32: vex_printf("RoundF64toF32"); return; case Iop_ReinterpF64asI64: vex_printf("ReinterpF64asI64"); return; @@ -1796,6 +1797,7 @@ void typeOfPrimop ( IROp op, BINARY(ity_RMode,Ity_F64, Ity_F64); case Iop_SqrtF32: + case Iop_RoundF32toInt: BINARY(ity_RMode,Ity_F32, Ity_F32); case Iop_CmpF64: diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h index 4b7d628fb4..0e291ebb3c 100644 --- a/VEX/pub/libvex_ir.h +++ b/VEX/pub/libvex_ir.h @@ -639,6 +639,8 @@ typedef Iop_2xm1F64, /* (2^arg - 1.0) */ Iop_RoundF64toInt, /* F64 value to nearest integral value (still as F64) */ + Iop_RoundF32toInt, /* F32 value to nearest integral value (still + as F32) */ /* --- guest ppc32/64 specifics, not mandated by 754. --- */