From: Julian Seward Date: Wed, 30 Apr 2014 22:50:34 +0000 (+0000) Subject: x87 instructions FSIN, FCOS, FSINCOS and FPTAN: handle out-of-range X-Git-Tag: svn/VALGRIND_3_10_1^2~120 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1fa696e0ba183aab851e127d8bceb7af71e0cd18;p=thirdparty%2Fvalgrind.git x87 instructions FSIN, FCOS, FSINCOS and FPTAN: handle out-of-range arguments correctly. Mozilla bug 995564. git-svn-id: svn://svn.valgrind.org/vex/trunk@2850 --- diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c index 4b45823ce1..7462c3b1ae 100644 --- a/VEX/priv/guest_amd64_toIR.c +++ b/VEX/priv/guest_amd64_toIR.c @@ -51,10 +51,6 @@ float-to-float rounding. For all other operations, round-to-nearest is used, regardless. - * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the - simulation claims the argument is in-range (-2^63 <= arg <= 2^63) - even when it isn't. - * some of the FCOM cases could do with testing -- not convinced that the args are the right way round. @@ -5066,6 +5062,42 @@ static IRExpr* get_ST ( Int i ) } +/* Given i, and some expression e, and a condition cond, generate IR + which has the same effect as put_ST(i,e) when cond is true and has + no effect when cond is false. Given the lack of proper + if-then-else in the IR, this is pretty tricky. +*/ + +static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value ) +{ + // new_tag = if cond then FULL else old_tag + // new_val = if cond then (if old_tag==FULL then NaN else val) + // else old_val + + IRTemp old_tag = newTemp(Ity_I8); + assign(old_tag, get_ST_TAG(i)); + IRTemp new_tag = newTemp(Ity_I8); + assign(new_tag, + IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag))); + + IRTemp old_val = newTemp(Ity_F64); + assign(old_val, get_ST_UNCHECKED(i)); + IRTemp new_val = newTemp(Ity_F64); + assign(new_val, + IRExpr_ITE(mkexpr(cond), + IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)), + /* non-0 means full */ + mkQNaN64(), + /* 0 means empty */ + value), + mkexpr(old_val))); + + put_ST_UNCHECKED(i, mkexpr(new_val)); + // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So + // now set it to new_tag instead. + put_ST_TAG(i, mkexpr(new_tag)); +} + /* Adjust FTOP downwards by one register. */ static void fp_push ( void ) @@ -5073,6 +5105,14 @@ static void fp_push ( void ) put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) ); } +/* Adjust FTOP downwards by one register when COND is 1:I1. Else + don't change it. */ + +static void maybe_fp_push ( IRTemp cond ) +{ + put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) ); +} + /* Adjust FTOP upwards by one register, and mark the vacated register as empty. */ @@ -5082,12 +5122,49 @@ static void fp_pop ( void ) put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); } -/* Clear the C2 bit of the FPU status register, for - sin/cos/tan/sincos. */ - -static void clear_C2 ( void ) +/* Set the C2 bit of the FPU status register to e[0]. Assumes that + e[31:1] == 0. +*/ +static void set_C2 ( IRExpr* e ) +{ + IRExpr* cleared = binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)); + put_C3210( binop(Iop_Or64, + cleared, + binop(Iop_Shl64, e, mkU8(AMD64G_FC_SHIFT_C2))) ); +} + +/* Generate code to check that abs(d64) < 2^63 and is finite. This is + used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The + test is simple, but the derivation of it is not so simple. + + The exponent field for an IEEE754 double is 11 bits. That means it + can take values 0 through 0x7FF. If the exponent has value 0x7FF, + the number is either a NaN or an Infinity and so is not finite. + Furthermore, a finite value of exactly 2^63 is the smallest value + that has exponent value 0x43E. Hence, what we need to do is + extract the exponent, ignoring the sign bit and mantissa, and check + it is < 0x43E, or <= 0x43D. + + To make this easily applicable to 32- and 64-bit targets, a + roundabout approach is used. First the number is converted to I64, + then the top 32 bits are taken. Shifting them right by 20 bits + places the sign bit and exponent in the bottom 12 bits. Anding + with 0x7FF gets rid of the sign bit, leaving just the exponent + available for comparison. +*/ +static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 ) { - put_C3210( binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)) ); + IRTemp i64 = newTemp(Ity_I64); + assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) ); + IRTemp exponent = newTemp(Ity_I32); + assign(exponent, + binop(Iop_And32, + binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)), + mkU32(0x7FF))); + IRTemp in_range_and_finite = newTemp(Ity_I1); + assign(in_range_and_finite, + binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D))); + return in_range_and_finite; } /* Invent a plausible-looking FPU status word value: @@ -5717,16 +5794,31 @@ ULong dis_FPU ( /*OUT*/Bool* decode_ok, fp_pop(); break; - case 0xF2: /* FPTAN */ - DIP("ftan\n"); - put_ST_UNCHECKED(0, - binop(Iop_TanF64, - get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - get_ST(0))); - fp_push(); - put_ST(0, IRExpr_Const(IRConst_F64(1.0))); - clear_C2(); /* HACK */ + case 0xF2: { /* FPTAN */ + DIP("fptan\n"); + IRTemp argD = newTemp(Ity_F64); + assign(argD, get_ST(0)); + IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); + IRTemp resD = newTemp(Ity_F64); + assign(resD, + IRExpr_ITE( + mkexpr(argOK), + binop(Iop_TanF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(argD)), + mkexpr(argD)) + ); + put_ST_UNCHECKED(0, mkexpr(resD)); + /* Conditionally push 1.0 on the stack, if the arg is + in range */ + maybe_fp_push(argOK); + maybe_put_ST(argOK, 0, + IRExpr_Const(IRConst_F64(1.0))); + set_C2( binop(Iop_Xor64, + unop(Iop_1Uto64, mkexpr(argOK)), + mkU64(1)) ); break; + } case 0xF3: /* FPATAN */ DIP("fpatan\n"); @@ -5842,19 +5934,30 @@ ULong dis_FPU ( /*OUT*/Bool* decode_ok, break; case 0xFB: { /* FSINCOS */ - IRTemp a1 = newTemp(Ity_F64); - assign( a1, get_ST(0) ); DIP("fsincos\n"); - put_ST_UNCHECKED(0, - binop(Iop_SinF64, - get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - mkexpr(a1))); - fp_push(); - put_ST(0, - binop(Iop_CosF64, + IRTemp argD = newTemp(Ity_F64); + assign(argD, get_ST(0)); + IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); + IRTemp resD = newTemp(Ity_F64); + assign(resD, + IRExpr_ITE( + mkexpr(argOK), + binop(Iop_SinF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(argD)), + mkexpr(argD)) + ); + put_ST_UNCHECKED(0, mkexpr(resD)); + /* Conditionally push the cos value on the stack, if + the arg is in range */ + maybe_fp_push(argOK); + maybe_put_ST(argOK, 0, + binop(Iop_CosF64, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - mkexpr(a1))); - clear_C2(); /* HACK */ + mkexpr(argD))); + set_C2( binop(Iop_Xor64, + unop(Iop_1Uto64, mkexpr(argOK)), + mkU64(1)) ); break; } @@ -5873,23 +5976,28 @@ ULong dis_FPU ( /*OUT*/Bool* decode_ok, get_ST(1))); break; - case 0xFE: /* FSIN */ - DIP("fsin\n"); - put_ST_UNCHECKED(0, - binop(Iop_SinF64, - get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - get_ST(0))); - clear_C2(); /* HACK */ - break; - - case 0xFF: /* FCOS */ - DIP("fcos\n"); - put_ST_UNCHECKED(0, - binop(Iop_CosF64, - get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - get_ST(0))); - clear_C2(); /* HACK */ + case 0xFE: /* FSIN */ + case 0xFF: { /* FCOS */ + Bool isSIN = modrm == 0xFE; + DIP("%s\n", isSIN ? "fsin" : "fcos"); + IRTemp argD = newTemp(Ity_F64); + assign(argD, get_ST(0)); + IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); + IRTemp resD = newTemp(Ity_F64); + assign(resD, + IRExpr_ITE( + mkexpr(argOK), + binop(isSIN ? Iop_SinF64 : Iop_CosF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(argD)), + mkexpr(argD)) + ); + put_ST_UNCHECKED(0, mkexpr(resD)); + set_C2( binop(Iop_Xor64, + unop(Iop_1Uto64, mkexpr(argOK)), + mkU64(1)) ); break; + } default: goto decode_fail; diff --git a/VEX/priv/guest_x86_toIR.c b/VEX/priv/guest_x86_toIR.c index e24105b1f7..bcb821c99b 100644 --- a/VEX/priv/guest_x86_toIR.c +++ b/VEX/priv/guest_x86_toIR.c @@ -54,10 +54,6 @@ for float-to-float rounding. For all other operations, round-to-nearest is used, regardless. - * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the - simulation claims the argument is in-range (-2^63 <= arg <= 2^63) - even when it isn't. - * some of the FCOM cases could do with testing -- not convinced that the args are the right way round. @@ -3603,6 +3599,42 @@ static IRExpr* get_ST ( Int i ) } +/* Given i, and some expression e, and a condition cond, generate IR + which has the same effect as put_ST(i,e) when cond is true and has + no effect when cond is false. Given the lack of proper + if-then-else in the IR, this is pretty tricky. +*/ + +static void maybe_put_ST ( IRTemp cond, Int i, IRExpr* value ) +{ + // new_tag = if cond then FULL else old_tag + // new_val = if cond then (if old_tag==FULL then NaN else val) + // else old_val + + IRTemp old_tag = newTemp(Ity_I8); + assign(old_tag, get_ST_TAG(i)); + IRTemp new_tag = newTemp(Ity_I8); + assign(new_tag, + IRExpr_ITE(mkexpr(cond), mkU8(1)/*FULL*/, mkexpr(old_tag))); + + IRTemp old_val = newTemp(Ity_F64); + assign(old_val, get_ST_UNCHECKED(i)); + IRTemp new_val = newTemp(Ity_F64); + assign(new_val, + IRExpr_ITE(mkexpr(cond), + IRExpr_ITE(binop(Iop_CmpNE8, mkexpr(old_tag), mkU8(0)), + /* non-0 means full */ + mkQNaN64(), + /* 0 means empty */ + value), + mkexpr(old_val))); + + put_ST_UNCHECKED(i, mkexpr(new_val)); + // put_ST_UNCHECKED incorrectly sets tag(i) to always be FULL. So + // now set it to new_tag instead. + put_ST_TAG(i, mkexpr(new_tag)); +} + /* Adjust FTOP downwards by one register. */ static void fp_push ( void ) @@ -3610,6 +3642,14 @@ static void fp_push ( void ) put_ftop( binop(Iop_Sub32, get_ftop(), mkU32(1)) ); } +/* Adjust FTOP downwards by one register when COND is 1:I1. Else + don't change it. */ + +static void maybe_fp_push ( IRTemp cond ) +{ + put_ftop( binop(Iop_Sub32, get_ftop(), unop(Iop_1Uto32,mkexpr(cond))) ); +} + /* Adjust FTOP upwards by one register, and mark the vacated register as empty. */ @@ -3619,12 +3659,49 @@ static void fp_pop ( void ) put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); } -/* Clear the C2 bit of the FPU status register, for - sin/cos/tan/sincos. */ - -static void clear_C2 ( void ) +/* Set the C2 bit of the FPU status register to e[0]. Assumes that + e[31:1] == 0. +*/ +static void set_C2 ( IRExpr* e ) +{ + IRExpr* cleared = binop(Iop_And32, get_C3210(), mkU32(~X86G_FC_MASK_C2)); + put_C3210( binop(Iop_Or32, + cleared, + binop(Iop_Shl32, e, mkU8(X86G_FC_SHIFT_C2))) ); +} + +/* Generate code to check that abs(d64) < 2^63 and is finite. This is + used to do the range checks for FSIN, FCOS, FSINCOS and FPTAN. The + test is simple, but the derivation of it is not so simple. + + The exponent field for an IEEE754 double is 11 bits. That means it + can take values 0 through 0x7FF. If the exponent has value 0x7FF, + the number is either a NaN or an Infinity and so is not finite. + Furthermore, a finite value of exactly 2^63 is the smallest value + that has exponent value 0x43E. Hence, what we need to do is + extract the exponent, ignoring the sign bit and mantissa, and check + it is < 0x43E, or <= 0x43D. + + To make this easily applicable to 32- and 64-bit targets, a + roundabout approach is used. First the number is converted to I64, + then the top 32 bits are taken. Shifting them right by 20 bits + places the sign bit and exponent in the bottom 12 bits. Anding + with 0x7FF gets rid of the sign bit, leaving just the exponent + available for comparison. +*/ +static IRTemp math_IS_TRIG_ARG_FINITE_AND_IN_RANGE ( IRTemp d64 ) { - put_C3210( binop(Iop_And32, get_C3210(), mkU32(~X86G_FC_MASK_C2)) ); + IRTemp i64 = newTemp(Ity_I64); + assign(i64, unop(Iop_ReinterpF64asI64, mkexpr(d64)) ); + IRTemp exponent = newTemp(Ity_I32); + assign(exponent, + binop(Iop_And32, + binop(Iop_Shr32, unop(Iop_64HIto32, mkexpr(i64)), mkU8(20)), + mkU32(0x7FF))); + IRTemp in_range_and_finite = newTemp(Ity_I1); + assign(in_range_and_finite, + binop(Iop_CmpLE32U, mkexpr(exponent), mkU32(0x43D))); + return in_range_and_finite; } /* Invent a plausible-looking FPU status word value: @@ -4245,16 +4322,31 @@ UInt dis_FPU ( Bool* decode_ok, UChar sorb, Int delta ) fp_pop(); break; - case 0xF2: /* FPTAN */ - DIP("ftan\n"); - put_ST_UNCHECKED(0, - binop(Iop_TanF64, - get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - get_ST(0))); - fp_push(); - put_ST(0, IRExpr_Const(IRConst_F64(1.0))); - clear_C2(); /* HACK */ + case 0xF2: { /* FPTAN */ + DIP("fptan\n"); + IRTemp argD = newTemp(Ity_F64); + assign(argD, get_ST(0)); + IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); + IRTemp resD = newTemp(Ity_F64); + assign(resD, + IRExpr_ITE( + mkexpr(argOK), + binop(Iop_TanF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(argD)), + mkexpr(argD)) + ); + put_ST_UNCHECKED(0, mkexpr(resD)); + /* Conditionally push 1.0 on the stack, if the arg is + in range */ + maybe_fp_push(argOK); + maybe_put_ST(argOK, 0, + IRExpr_Const(IRConst_F64(1.0))); + set_C2( binop(Iop_Xor32, + unop(Iop_1Uto32, mkexpr(argOK)), + mkU32(1)) ); break; + } case 0xF3: /* FPATAN */ DIP("fpatan\n"); @@ -4368,19 +4460,30 @@ UInt dis_FPU ( Bool* decode_ok, UChar sorb, Int delta ) break; case 0xFB: { /* FSINCOS */ - IRTemp a1 = newTemp(Ity_F64); - assign( a1, get_ST(0) ); DIP("fsincos\n"); - put_ST_UNCHECKED(0, - binop(Iop_SinF64, - get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - mkexpr(a1))); - fp_push(); - put_ST(0, + IRTemp argD = newTemp(Ity_F64); + assign(argD, get_ST(0)); + IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); + IRTemp resD = newTemp(Ity_F64); + assign(resD, + IRExpr_ITE( + mkexpr(argOK), + binop(Iop_SinF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(argD)), + mkexpr(argD)) + ); + put_ST_UNCHECKED(0, mkexpr(resD)); + /* Conditionally push the cos value on the stack, if + the arg is in range */ + maybe_fp_push(argOK); + maybe_put_ST(argOK, 0, binop(Iop_CosF64, get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - mkexpr(a1))); - clear_C2(); /* HACK */ + mkexpr(argD))); + set_C2( binop(Iop_Xor32, + unop(Iop_1Uto32, mkexpr(argOK)), + mkU32(1)) ); break; } @@ -4399,23 +4502,28 @@ UInt dis_FPU ( Bool* decode_ok, UChar sorb, Int delta ) get_ST(1))); break; - case 0xFE: /* FSIN */ - DIP("fsin\n"); - put_ST_UNCHECKED(0, - binop(Iop_SinF64, - get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - get_ST(0))); - clear_C2(); /* HACK */ - break; - - case 0xFF: /* FCOS */ - DIP("fcos\n"); - put_ST_UNCHECKED(0, - binop(Iop_CosF64, - get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ - get_ST(0))); - clear_C2(); /* HACK */ + case 0xFE: /* FSIN */ + case 0xFF: { /* FCOS */ + Bool isSIN = modrm == 0xFE; + DIP("%s\n", isSIN ? "fsin" : "fcos"); + IRTemp argD = newTemp(Ity_F64); + assign(argD, get_ST(0)); + IRTemp argOK = math_IS_TRIG_ARG_FINITE_AND_IN_RANGE(argD); + IRTemp resD = newTemp(Ity_F64); + assign(resD, + IRExpr_ITE( + mkexpr(argOK), + binop(isSIN ? Iop_SinF64 : Iop_CosF64, + get_FAKE_roundingmode(), /* XXXROUNDINGFIXME */ + mkexpr(argD)), + mkexpr(argD)) + ); + put_ST_UNCHECKED(0, mkexpr(resD)); + set_C2( binop(Iop_Xor32, + unop(Iop_1Uto32, mkexpr(argOK)), + mkU32(1)) ); break; + } default: goto decode_fail; diff --git a/VEX/priv/host_amd64_defs.c b/VEX/priv/host_amd64_defs.c index b4a31f97a5..0f3acc8235 100644 --- a/VEX/priv/host_amd64_defs.c +++ b/VEX/priv/host_amd64_defs.c @@ -3069,7 +3069,6 @@ Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc, case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break; case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break; case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break; - case Afp_TAN: *p++ = 0xD9; *p++ = 0xF2; break; case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break; case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break; case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break; @@ -3078,7 +3077,24 @@ Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc, case Afp_YL2XP1: *p++ = 0xD9; *p++ = 0xF9; break; case Afp_PREM: *p++ = 0xD9; *p++ = 0xF8; break; case Afp_PREM1: *p++ = 0xD9; *p++ = 0xF5; break; - default: goto bad; + case Afp_TAN: + /* fptan pushes 1.0 on the FP stack, except when the + argument is out of range. Hence we have to do the + instruction, then inspect C2 to see if there is an out + of range condition. If there is, we skip the fincstp + that is used by the in-range case to get rid of this + extra 1.0 value. */ + *p++ = 0xD9; *p++ = 0xF2; // fptan + *p++ = 0x50; // pushq %rax + *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax + *p++ = 0x66; *p++ = 0xA9; + *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax + *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp + *p++ = 0xD9; *p++ = 0xF7; // fincstp + *p++ = 0x58; // after_fincstp: popq %rax + break; + default: + goto bad; } goto done; diff --git a/VEX/priv/host_amd64_isel.c b/VEX/priv/host_amd64_isel.c index a7eedb453b..9bae167107 100644 --- a/VEX/priv/host_amd64_isel.c +++ b/VEX/priv/host_amd64_isel.c @@ -3031,11 +3031,12 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8)); /* XXXROUNDINGFIXME */ /* set roundingmode here */ + /* Note that AMD64Instr_A87FpOp(Afp_TAN) sets the condition + codes. I don't think that matters, since this insn + selector never generates such an instruction intervening + between an flag-setting instruction and a flag-using + instruction. */ addInstr(env, AMD64Instr_A87FpOp(fpop)); - if (e->Iex.Binop.op==Iop_TanF64) { - /* get rid of the extra 1.0 that fptan pushes */ - addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); - } addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8)); addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); return dst; diff --git a/VEX/priv/host_x86_defs.c b/VEX/priv/host_x86_defs.c index d6f1edd907..91aa03a2de 100644 --- a/VEX/priv/host_x86_defs.c +++ b/VEX/priv/host_x86_defs.c @@ -2023,11 +2023,25 @@ static UChar* do_fop1_st ( UChar* p, X86FpOp op ) case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break; case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break; case Xfp_MOV: break; - case Xfp_TAN: p = do_ffree_st7(p); /* since fptan pushes 1.0 */ - *p++ = 0xD9; *p++ = 0xF2; /* fptan */ - *p++ = 0xD9; *p++ = 0xF7; /* fincstp */ - break; - default: vpanic("do_fop1_st: unknown op"); + case Xfp_TAN: + /* fptan pushes 1.0 on the FP stack, except when the argument + is out of range. Hence we have to do the instruction, + then inspect C2 to see if there is an out of range + condition. If there is, we skip the fincstp that is used + by the in-range case to get rid of this extra 1.0 + value. */ + p = do_ffree_st7(p); /* since fptan sometimes pushes 1.0 */ + *p++ = 0xD9; *p++ = 0xF2; // fptan + *p++ = 0x50; // pushl %eax + *p++ = 0xDF; *p++ = 0xE0; // fnstsw %ax + *p++ = 0x66; *p++ = 0xA9; + *p++ = 0x00; *p++ = 0x04; // testw $0x400,%ax + *p++ = 0x75; *p++ = 0x02; // jnz after_fincstp + *p++ = 0xD9; *p++ = 0xF7; // fincstp + *p++ = 0x58; // after_fincstp: popl %eax + break; + default: + vpanic("do_fop1_st: unknown op"); } return p; } diff --git a/VEX/priv/host_x86_isel.c b/VEX/priv/host_x86_isel.c index 090e9aafdd..4147176a0a 100644 --- a/VEX/priv/host_x86_isel.c +++ b/VEX/priv/host_x86_isel.c @@ -3147,6 +3147,11 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) HReg src = iselDblExpr(env, e->Iex.Binop.arg2); /* XXXROUNDINGFIXME */ /* set roundingmode here */ + /* Note that X86Instr_FpUnary(Xfp_TAN,..) sets the condition + codes. I don't think that matters, since this insn + selector never generates such an instruction intervening + between an flag-setting instruction and a flag-using + instruction. */ addInstr(env, X86Instr_FpUnary(fpop,src,res)); if (fpop != Xfp_SQRT && fpop != Xfp_NEG && fpop != Xfp_ABS)