From: Julian Seward Date: Sun, 8 May 2005 23:03:48 +0000 (+0000) Subject: Make a whole bunch more x87 instructions work on amd64. X-Git-Tag: svn/VALGRIND_3_0_1^2~164 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=3012eb33540c5ab75e37688696c7a3fba1d4bd80;p=thirdparty%2Fvalgrind.git Make a whole bunch more x87 instructions work on amd64. git-svn-id: svn://svn.valgrind.org/vex/trunk@1170 --- diff --git a/VEX/priv/guest-amd64/gdefs.h b/VEX/priv/guest-amd64/gdefs.h index 8e0006e2a9..80cc455fc8 100644 --- a/VEX/priv/guest-amd64/gdefs.h +++ b/VEX/priv/guest-amd64/gdefs.h @@ -168,10 +168,10 @@ extern void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* st ); #define AMD64G_CC_MASK_P (1 << AMD64G_CC_SHIFT_P) /* FPU flag masks */ -//#define AMD64G_FC_MASK_C3 (1 << 14) -//#define AMD64G_FC_MASK_C2 (1 << 10) -//#define AMD64G_FC_MASK_C1 (1 << 9) -//#define AMD64G_FC_MASK_C0 (1 << 8) +#define AMD64G_FC_MASK_C3 (1 << 14) +#define AMD64G_FC_MASK_C2 (1 << 10) +#define AMD64G_FC_MASK_C1 (1 << 9) +#define AMD64G_FC_MASK_C0 (1 << 8) /* %RFLAGS thunk descriptors. A four-word thunk is used to record details of the most recent flag-setting operation, so the flags can diff --git a/VEX/priv/guest-amd64/toIR.c b/VEX/priv/guest-amd64/toIR.c index cc06790a41..a87fb857b7 100644 --- a/VEX/priv/guest-amd64/toIR.c +++ b/VEX/priv/guest-amd64/toIR.c @@ -338,7 +338,7 @@ static void unimplemented ( HChar* str ) #define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG) #define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG) #define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP) -//.. #define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210) +#define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210) #define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND) //.. //.. #define OFFB_CS offsetof(VexGuestX86State,guest_CS) @@ -4124,17 +4124,18 @@ static void put_ftop ( IRExpr* e ) stmt( IRStmt_Put( OFFB_FTOP, e ) ); } -//.. /* --------- Get/put the C3210 bits. --------- */ -//.. -//.. static IRExpr* get_C3210 ( void ) -//.. { -//.. return IRExpr_Get( OFFB_FC3210, Ity_I32 ); -//.. } -//.. -//.. static void put_C3210 ( IRExpr* e ) -//.. { -//.. stmt( IRStmt_Put( OFFB_FC3210, e ) ); -//.. } +/* --------- Get/put the C3210 bits. --------- */ + +static IRExpr* /* :: Ity_I64 */ get_C3210 ( void ) +{ + return IRExpr_Get( OFFB_FC3210, Ity_I64 ); +} + +static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ ) +{ + vassert(typeOfIRExpr(irbb->tyenv, e) == Ity_I64); + stmt( IRStmt_Put( OFFB_FC3210, e ) ); +} /* --------- Get/put the FPU rounding mode. --------- */ static IRExpr* /* :: Ity_I32 */ get_fpround ( void ) @@ -4257,13 +4258,13 @@ static void fp_pop ( void ) put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) ); } -//.. /* Clear the C2 bit of the FPU status register, for -//.. sin/cos/tan/sincos. */ -//.. -//.. static void clear_C2 ( void ) -//.. { -//.. put_C3210( binop(Iop_And32, get_C3210(), mkU32(~X86G_FC_MASK_C2)) ); -//.. } +/* Clear the C2 bit of the FPU status register, for + sin/cos/tan/sincos. */ + +static void clear_C2 ( void ) +{ + put_C3210( binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)) ); +} /* ------------------------------------------------------- */ @@ -4824,18 +4825,18 @@ ULong dis_FPU ( /*OUT*/Bool* decode_ok, put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL))); break; -//.. case 0xF0: /* F2XM1 */ -//.. DIP("f2xm1\n"); -//.. put_ST_UNCHECKED(0, unop(Iop_2xm1F64, get_ST(0))); -//.. break; -//.. -//.. case 0xF1: /* FYL2X */ -//.. DIP("fyl2x\n"); -//.. put_ST_UNCHECKED(1, binop(Iop_Yl2xF64, -//.. get_ST(1), get_ST(0))); -//.. fp_pop(); -//.. break; -//.. + case 0xF0: /* F2XM1 */ + DIP("f2xm1\n"); + put_ST_UNCHECKED(0, unop(Iop_2xm1F64, get_ST(0))); + break; + + case 0xF1: /* FYL2X */ + DIP("fyl2x\n"); + put_ST_UNCHECKED(1, binop(Iop_Yl2xF64, + get_ST(1), get_ST(0))); + fp_pop(); + break; + //.. case 0xF2: /* FPTAN */ //.. DIP("ftan\n"); //.. put_ST_UNCHECKED(0, unop(Iop_TanF64, get_ST(0))); @@ -4843,14 +4844,14 @@ ULong dis_FPU ( /*OUT*/Bool* decode_ok, //.. put_ST(0, IRExpr_Const(IRConst_F64(1.0))); //.. clear_C2(); /* HACK */ //.. break; -//.. -//.. case 0xF3: /* FPATAN */ -//.. DIP("fpatan\n"); -//.. put_ST_UNCHECKED(1, binop(Iop_AtanF64, -//.. get_ST(1), get_ST(0))); -//.. fp_pop(); -//.. break; -//.. + + case 0xF3: /* FPATAN */ + DIP("fpatan\n"); + put_ST_UNCHECKED(1, binop(Iop_AtanF64, + get_ST(1), get_ST(0))); + fp_pop(); + break; + //.. case 0xF5: { /* FPREM1 -- IEEE compliant */ //.. IRTemp a1 = newTemp(Ity_F64); //.. IRTemp a2 = newTemp(Ity_F64); @@ -4896,40 +4897,40 @@ ULong dis_FPU ( /*OUT*/Bool* decode_ok, put_ST_UNCHECKED(0, unop(Iop_SqrtF64, get_ST(0))); break; -//.. case 0xFB: { /* FSINCOS */ -//.. IRTemp a1 = newTemp(Ity_F64); -//.. assign( a1, get_ST(0) ); -//.. DIP("fsincos\n"); -//.. put_ST_UNCHECKED(0, unop(Iop_SinF64, mkexpr(a1))); -//.. fp_push(); -//.. put_ST(0, unop(Iop_CosF64, mkexpr(a1))); -//.. clear_C2(); /* HACK */ -//.. break; -//.. } -//.. -//.. case 0xFC: /* FRNDINT */ -//.. DIP("frndint\n"); -//.. put_ST_UNCHECKED(0, -//.. binop(Iop_RoundF64, get_roundingmode(), get_ST(0)) ); -//.. break; -//.. -//.. case 0xFD: /* FSCALE */ -//.. DIP("fscale\n"); -//.. put_ST_UNCHECKED(0, binop(Iop_ScaleF64, -//.. get_ST(0), get_ST(1))); -//.. break; -//.. -//.. case 0xFE: /* FSIN */ -//.. DIP("fsin\n"); -//.. put_ST_UNCHECKED(0, unop(Iop_SinF64, get_ST(0))); -//.. clear_C2(); /* HACK */ -//.. break; -//.. -//.. case 0xFF: /* FCOS */ -//.. DIP("fcos\n"); -//.. put_ST_UNCHECKED(0, unop(Iop_CosF64, get_ST(0))); -//.. clear_C2(); /* HACK */ -//.. break; + case 0xFB: { /* FSINCOS */ + IRTemp a1 = newTemp(Ity_F64); + assign( a1, get_ST(0) ); + DIP("fsincos\n"); + put_ST_UNCHECKED(0, unop(Iop_SinF64, mkexpr(a1))); + fp_push(); + put_ST(0, unop(Iop_CosF64, mkexpr(a1))); + clear_C2(); /* HACK */ + break; + } + + case 0xFC: /* FRNDINT */ + DIP("frndint\n"); + put_ST_UNCHECKED(0, + binop(Iop_RoundF64, get_roundingmode(), get_ST(0)) ); + break; + + case 0xFD: /* FSCALE */ + DIP("fscale\n"); + put_ST_UNCHECKED(0, binop(Iop_ScaleF64, + get_ST(0), get_ST(1))); + break; + + case 0xFE: /* FSIN */ + DIP("fsin\n"); + put_ST_UNCHECKED(0, unop(Iop_SinF64, get_ST(0))); + clear_C2(); /* HACK */ + break; + + case 0xFF: /* FCOS */ + DIP("fcos\n"); + put_ST_UNCHECKED(0, unop(Iop_CosF64, get_ST(0))); + clear_C2(); /* HACK */ + break; default: goto decode_fail; @@ -5037,6 +5038,16 @@ ULong dis_FPU ( /*OUT*/Bool* decode_ok, get_ST(0), get_ST(r_src)) ); break; + case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */ + r_src = (UInt)modrm - 0xD8; + DIP("fcmovu %%st(%u), %%st(0)\n", r_src); + put_ST_UNCHECKED(0, + IRExpr_Mux0X( + unop(Iop_1Uto8, + mk_amd64g_calculate_condition(AMD64CondP)), + get_ST(0), get_ST(r_src)) ); + break; + //.. case 0xE9: /* FUCOMPP %st(0),%st(1) */ //.. DIP("fucompp %%st(0),%%st(1)\n"); //.. /* This forces C1 to zero, which isn't right. */ diff --git a/VEX/priv/host-amd64/hdefs.c b/VEX/priv/host-amd64/hdefs.c index d4a7f9585c..90c65c61c2 100644 --- a/VEX/priv/host-amd64/hdefs.c +++ b/VEX/priv/host-amd64/hdefs.c @@ -558,30 +558,30 @@ HChar* showAMD64ShiftOp ( AMD64ShiftOp op ) { } } -//.. HChar* showAMD64FpOp ( AMD64FpOp op ) { -//.. switch (op) { +HChar* showA87FpOp ( A87FpOp op ) { + switch (op) { //.. case Xfp_ADD: return "add"; //.. case Xfp_SUB: return "sub"; //.. case Xfp_MUL: return "mul"; //.. case Xfp_DIV: return "div"; -//.. case Xfp_SCALE: return "scale"; -//.. case Xfp_ATAN: return "atan"; -//.. case Xfp_YL2X: return "yl2x"; + case Afp_SCALE: return "scale"; + case Afp_ATAN: return "atan"; + case Afp_YL2X: return "yl2x"; //.. case Xfp_YL2XP1: return "yl2xp1"; //.. case Xfp_PREM: return "prem"; //.. case Xfp_PREM1: return "prem1"; -//.. case Xfp_SQRT: return "sqrt"; + case Afp_SQRT: return "sqrt"; //.. case Xfp_ABS: return "abs"; //.. case Xfp_NEG: return "chs"; //.. case Xfp_MOV: return "mov"; -//.. case Xfp_SIN: return "sin"; -//.. case Xfp_COS: return "cos"; + case Afp_SIN: return "sin"; + case Afp_COS: return "cos"; //.. case Xfp_TAN: return "tan"; -//.. case Xfp_ROUND: return "round"; -//.. case Xfp_2XM1: return "2xm1"; -//.. default: vpanic("showAMD64FpOp"); -//.. } -//.. } + case Afp_ROUND: return "round"; + case Afp_2XM1: return "2xm1"; + default: vpanic("showA87FpOp"); + } +} HChar* showAMD64SseOp ( AMD64SseOp op ) { switch (op) { @@ -807,8 +807,38 @@ AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ) { } AMD64Instr* AMD64Instr_MFence ( void ) { - AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); - i->tag = Ain_MFence; + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_MFence; + return i; +} +AMD64Instr* AMD64Instr_A87Free ( Int nregs ) +{ + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_A87Free; + i->Ain.A87Free.nregs = nregs; + vassert(nregs >= 1 && nregs <= 7); + return i; +} +AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush ) +{ + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_A87PushPop; + i->Ain.A87PushPop.addr = addr; + i->Ain.A87PushPop.isPush = isPush; + return i; +} +AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op ) +{ + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_A87FpOp; + i->Ain.A87FpOp.op = op; + return i; +} +AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr ) +{ + AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr)); + i->tag = Ain_A87LdCW; + i->Ain.A87LdCW.addr = addr; return i; } @@ -1147,6 +1177,20 @@ void ppAMD64Instr ( AMD64Instr* i ) case Ain_MFence: vex_printf("mfence" ); return; + case Ain_A87Free: + vex_printf("ffree %%st(7..%d)\n", 7 - i->Ain.A87Free.nregs ); + break; + case Ain_A87PushPop: + vex_printf(i->Ain.A87PushPop.isPush ? "fldl " : "fstpl "); + ppAMD64AMode(i->Ain.A87PushPop.addr); + break; + case Ain_A87FpOp: + vex_printf("f%s\n", showA87FpOp(i->Ain.A87FpOp.op)); + break; + case Ain_A87LdCW: + vex_printf("fldcw "); + ppAMD64AMode(i->Ain.A87LdCW.addr); + break; //.. case Xin_FpUnary: //.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpUnary.op)); //.. ppHRegAMD64(i->Xin.FpUnary.src); @@ -1457,6 +1501,16 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i ) return; case Ain_MFence: return; + case Ain_A87Free: + return; + case Ain_A87PushPop: + addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr); + return; + case Ain_A87FpOp: + return; + case Ain_A87LdCW: + addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr); + return; //.. case Xin_FpUnary: //.. addHRegUse(u, HRmRead, i->Xin.FpUnary.src); //.. addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst); @@ -1583,7 +1637,7 @@ void getRegUsage_AMD64Instr ( HRegUsage* u, AMD64Instr* i ) } /* local helper */ -static void mapReg(HRegRemap* m, HReg* r) +static inline void mapReg(HRegRemap* m, HReg* r) { *r = lookupHRegRemap(m, *r); } @@ -1655,6 +1709,16 @@ void mapRegs_AMD64Instr ( HRegRemap* m, AMD64Instr* i ) return; case Ain_MFence: return; + case Ain_A87Free: + return; + case Ain_A87PushPop: + mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr); + return; + case Ain_A87FpOp: + return; + case Ain_A87LdCW: + mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr); + return; //.. case Xin_FpUnary: //.. mapReg(m, &i->Xin.FpUnary.src); //.. mapReg(m, &i->Xin.FpUnary.dst); @@ -2080,14 +2144,15 @@ static UChar rexAMode_R ( HReg greg, HReg ereg ) } -//.. /* Emit ffree %st(7) */ -//.. static UChar* do_ffree_st7 ( UChar* p ) -//.. { -//.. *p++ = 0xDD; -//.. *p++ = 0xC7; -//.. return p; -//.. } -//.. +/* Emit ffree %st(N) */ +static UChar* do_ffree_st ( UChar* p, Int n ) +{ + vassert(n >= 0 && n <= 7); + *p++ = 0xDD; + *p++ = toUChar(0xC0 + n); + return p; +} + //.. /* Emit fstp %st(i), 1 <= i <= 7 */ //.. static UChar* do_fstp_st ( UChar* p, Int i ) //.. { @@ -2187,6 +2252,7 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i ) UChar rex; UChar* p = &buf[0]; UChar* ptmp; + Int j; vassert(nbuf >= 32); /* Wrap an integer as a int register, for use assembling @@ -2744,6 +2810,51 @@ Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i ) *p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0; goto done; + case Ain_A87Free: + vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7); + for (j = 0; j < i->Ain.A87Free.nregs; j++) { + p = do_ffree_st(p, 7-j); + } + goto done; + + case Ain_A87PushPop: + if (i->Ain.A87PushPop.isPush) { + /* Load from memory into %st(0): fldl amode */ + *p++ = clearWBit( + rexAMode_M(fake(0), i->Ain.A87PushPop.addr) ); + *p++ = 0xDD; + p = doAMode_M(p, fake(0)/*subopcode*/, i->Ain.A87PushPop.addr); + } else { + /* Dump %st(0) to memory: fstpl amode */ + *p++ = clearWBit( + rexAMode_M(fake(3), i->Ain.A87PushPop.addr) ); + *p++ = 0xDD; + p = doAMode_M(p, fake(3)/*subopcode*/, i->Ain.A87PushPop.addr); + goto done; + } + goto done; + + case Ain_A87FpOp: + switch (i->Ain.A87FpOp.op) { + case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break; + case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break; + case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break; + case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break; + case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break; + case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break; + case Afp_ATAN: *p++ = 0xD9; *p++ = 0xF3; break; + case Afp_YL2X: *p++ = 0xD9; *p++ = 0xF1; break; + default: goto bad; + } + goto done; + + case Ain_A87LdCW: + *p++ = clearWBit( + rexAMode_M(fake(5), i->Ain.A87LdCW.addr) ); + *p++ = 0xD9; + p = doAMode_M(p, fake(5)/*subopcode*/, i->Ain.A87LdCW.addr); + goto done; + case Ain_Store: if (i->Ain.Store.sz == 2) { /* This just goes to show the crazyness of the instruction diff --git a/VEX/priv/host-amd64/hdefs.h b/VEX/priv/host-amd64/hdefs.h index f66c937ddd..5507785b1c 100644 --- a/VEX/priv/host-amd64/hdefs.h +++ b/VEX/priv/host-amd64/hdefs.h @@ -291,20 +291,21 @@ typedef extern HChar* showAMD64ShiftOp ( AMD64ShiftOp ); -//.. /* --------- */ -//.. typedef -//.. enum { -//.. Xfp_INVALID, -//.. /* Binary */ +/* --------- */ +typedef + enum { + Afp_INVALID, + /* Binary */ //.. Xfp_ADD, Xfp_SUB, Xfp_MUL, Xfp_DIV, -//.. Xfp_SCALE, Xfp_ATAN, Xfp_YL2X, Xfp_YL2XP1, Xfp_PREM, Xfp_PREM1, -//.. /* Unary */ -//.. Xfp_SQRT, Xfp_ABS, Xfp_NEG, Xfp_MOV, Xfp_SIN, Xfp_COS, Xfp_TAN, -//.. Xfp_ROUND, Xfp_2XM1 -//.. } -//.. X86FpOp; -//.. -//.. extern HChar* showX86FpOp ( X86FpOp ); + Afp_SCALE, Afp_ATAN, Afp_YL2X, //Xfp_YL2XP1, Xfp_PREM, Xfp_PREM1, + /* Unary */ + Afp_SQRT, //Xfp_ABS, Xfp_NEG, Xfp_MOV, + Afp_SIN, Afp_COS, //Xfp_TAN, + Afp_ROUND, Afp_2XM1 + } + A87FpOp; + +extern HChar* showA87FpOp ( A87FpOp ); /* --------- */ @@ -357,25 +358,29 @@ extern HChar* showAMD64SseOp ( AMD64SseOp ); /* --------- */ typedef enum { - Ain_Imm64, /* Generate 64-bit literal to register */ - Ain_Alu64R, /* 64-bit mov/arith/logical, dst=REG */ - Ain_Alu64M, /* 64-bit mov/arith/logical, dst=MEM */ - Ain_Sh64, /* 64-bit shift/rotate, dst=REG or MEM */ - Ain_Test64, /* 64-bit test (AND, set flags, discard result) */ - Ain_Unary64, /* 64-bit not and neg */ - Ain_MulL, /* widening multiply */ - Ain_Div, /* div and mod */ + Ain_Imm64, /* Generate 64-bit literal to register */ + Ain_Alu64R, /* 64-bit mov/arith/logical, dst=REG */ + Ain_Alu64M, /* 64-bit mov/arith/logical, dst=MEM */ + Ain_Sh64, /* 64-bit shift/rotate, dst=REG or MEM */ + Ain_Test64, /* 64-bit test (AND, set flags, discard result) */ + Ain_Unary64, /* 64-bit not and neg */ + Ain_MulL, /* widening multiply */ + Ain_Div, /* div and mod */ //.. Xin_Sh3232, /* shldl or shrdl */ - Ain_Push, /* push 64-bit value on stack */ - Ain_Call, /* call to address in register */ - Ain_Goto, /* conditional/unconditional jmp to dst */ - Ain_CMov64, /* conditional move */ - Ain_MovZLQ, /* reg-reg move, zeroing out top half */ - Ain_LoadEX, /* mov{s,z}{b,w,l}q from mem to reg */ - Ain_Store, /* store 32/16/8 bit value in memory */ - Ain_Set64, /* convert condition code to 64-bit value */ - Ain_Bsfr64, /* 64-bit bsf/bsr */ - Ain_MFence, /* mem fence */ + Ain_Push, /* push 64-bit value on stack */ + Ain_Call, /* call to address in register */ + Ain_Goto, /* conditional/unconditional jmp to dst */ + Ain_CMov64, /* conditional move */ + Ain_MovZLQ, /* reg-reg move, zeroing out top half */ + Ain_LoadEX, /* mov{s,z}{b,w,l}q from mem to reg */ + Ain_Store, /* store 32/16/8 bit value in memory */ + Ain_Set64, /* convert condition code to 64-bit value */ + Ain_Bsfr64, /* 64-bit bsf/bsr */ + Ain_MFence, /* mem fence */ + Ain_A87Free, /* free up x87 registers */ + Ain_A87PushPop, /* x87 loads/stores */ + Ain_A87FpOp, /* x87 operations */ + Ain_A87LdCW, /* load x87 control word */ //.. //.. Xin_FpUnary, /* FP fake unary op */ //.. Xin_FpBinary, /* FP fake binary op */ @@ -383,25 +388,25 @@ typedef //.. Xin_FpLdStI, /* FP fake load/store, converting to/from Int */ //.. Xin_Fp64to32, /* FP round IEEE754 double to IEEE754 single */ //.. Xin_FpCMov, /* FP fake floating point conditional move */ - Ain_LdMXCSR, /* load %mxcsr */ + Ain_LdMXCSR, /* load %mxcsr */ //.. Xin_FpStSW_AX, /* fstsw %ax */ - Ain_SseUComIS, /* ucomisd/ucomiss, then get %rflags into int - register */ - Ain_SseSI2SF, /* scalar 32/64 int to 32/64 float conversion */ - Ain_SseSF2SI, /* scalar 32/64 float to 32/64 int conversion */ - Ain_SseSDSS, /* scalar float32 to/from float64 */ + Ain_SseUComIS, /* ucomisd/ucomiss, then get %rflags into int + register */ + Ain_SseSI2SF, /* scalar 32/64 int to 32/64 float conversion */ + Ain_SseSF2SI, /* scalar 32/64 float to 32/64 int conversion */ + Ain_SseSDSS, /* scalar float32 to/from float64 */ //.. //.. Xin_SseConst, /* Generate restricted SSE literal */ - Ain_SseLdSt, /* SSE load/store 32/64/128 bits, no alignment - constraints, upper 96/64/0 bits arbitrary */ - Ain_SseLdzLO, /* SSE load low 32/64 bits, zero remainder of reg */ - Ain_Sse32Fx4, /* SSE binary, 32Fx4 */ - Ain_Sse32FLo, /* SSE binary, 32F in lowest lane only */ - Ain_Sse64Fx2, /* SSE binary, 64Fx2 */ - Ain_Sse64FLo, /* SSE binary, 64F in lowest lane only */ - Ain_SseReRg, /* SSE binary general reg-reg, Re, Rg */ - Ain_SseCMov, /* SSE conditional move */ - Ain_SseShuf /* SSE2 shuffle (pshufd) */ + Ain_SseLdSt, /* SSE load/store 32/64/128 bits, no alignment + constraints, upper 96/64/0 bits arbitrary */ + Ain_SseLdzLO, /* SSE load low 32/64 bits, zero remainder of reg */ + Ain_Sse32Fx4, /* SSE binary, 32Fx4 */ + Ain_Sse32FLo, /* SSE binary, 32F in lowest lane only */ + Ain_Sse64Fx2, /* SSE binary, 64Fx2 */ + Ain_Sse64FLo, /* SSE binary, 64F in lowest lane only */ + Ain_SseReRg, /* SSE binary general reg-reg, Re, Rg */ + Ain_SseCMov, /* SSE conditional move */ + Ain_SseShuf /* SSE2 shuffle (pshufd) */ } AMD64InstrTag; @@ -519,47 +524,38 @@ typedef struct { } MFence; -//.. /* X86 Floating point (fake 3-operand, "flat reg file" insns) */ -//.. struct { -//.. X86FpOp op; -//.. HReg src; -//.. HReg dst; -//.. } FpUnary; -//.. struct { -//.. X86FpOp op; -//.. HReg srcL; -//.. HReg srcR; -//.. HReg dst; -//.. } FpBinary; -//.. struct { -//.. Bool isLoad; -//.. UChar sz; /* only 4 (IEEE single) or 8 (IEEE double) */ -//.. HReg reg; -//.. X86AMode* addr; -//.. } FpLdSt; -//.. /* Move 64-bit float to/from memory, converting to/from -//.. signed int on the way. Note the conversions will observe -//.. the host FPU rounding mode currently in force. */ -//.. struct { -//.. Bool isLoad; -//.. UChar sz; /* only 2, 4 or 8 */ -//.. HReg reg; -//.. X86AMode* addr; -//.. } FpLdStI; -//.. /* By observing the current FPU rounding mode, round (etc) -//.. src into dst given that dst should be interpreted as an -//.. IEEE754 32-bit (float) type. */ -//.. struct { -//.. HReg src; -//.. HReg dst; -//.. } Fp64to32; -//.. /* Mov src to dst on the given condition, which may not -//.. be the bogus Xcc_ALWAYS. */ -//.. struct { -//.. X86CondCode cond; -//.. HReg src; -//.. HReg dst; -//.. } FpCMov; + /* --- X87 --- */ + + /* A very minimal set of x87 insns, that operate exactly in a + stack-like way so no need to think about x87 registers. */ + + /* Do 'ffree' on %st(7) .. %st(7-nregs) */ + struct { + Int nregs; /* 1 <= nregs <= 7 */ + } A87Free; + + /* Push a 64-bit FP value from memory onto the stack, or move + a value from the stack to memory and remove it from the + stack. */ + struct { + AMD64AMode* addr; + Bool isPush; + } A87PushPop; + + /* Do an operation on the top-of-stack. This can be unary, in + which case it is %st0 = OP( %st0 ), or binary: %st0 = OP( + %st0, %st1 ). */ + struct { + A87FpOp op; + } A87FpOp; + + /* Load the FPU control word. */ + struct { + AMD64AMode* addr; + } A87LdCW; + + /* --- SSE --- */ + /* Load 32 bits into %mxcsr. */ struct { AMD64AMode* addr; @@ -656,26 +652,30 @@ typedef } AMD64Instr; -extern AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ); -extern AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp, AMD64RMI*, HReg ); -extern AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp, AMD64RI*, AMD64AMode* ); -extern AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, AMD64RM* dst ); -extern AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp, UInt, AMD64RM* ); -extern AMD64Instr* AMD64Instr_Test64 ( AMD64RI* src, AMD64RM* dst ); -extern AMD64Instr* AMD64Instr_MulL ( Bool syned, Int sz, AMD64RM* ); -extern AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* ); +extern AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst ); +extern AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp, AMD64RMI*, HReg ); +extern AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp, AMD64RI*, AMD64AMode* ); +extern AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, AMD64RM* dst ); +extern AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp, UInt, AMD64RM* ); +extern AMD64Instr* AMD64Instr_Test64 ( AMD64RI* src, AMD64RM* dst ); +extern AMD64Instr* AMD64Instr_MulL ( Bool syned, Int sz, AMD64RM* ); +extern AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* ); //.. extern AMD64Instr* AMD64Instr_Sh3232 ( AMD64ShiftOp, UInt amt, HReg src, HReg dst ); -extern AMD64Instr* AMD64Instr_Push ( AMD64RMI* ); -extern AMD64Instr* AMD64Instr_Call ( AMD64CondCode, Addr64, Int ); -extern AMD64Instr* AMD64Instr_Goto ( IRJumpKind, AMD64CondCode cond, AMD64RI* dst ); -extern AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode, AMD64RM* src, HReg dst ); -extern AMD64Instr* AMD64Instr_MovZLQ ( HReg src, HReg dst ); -extern AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned, - AMD64AMode* src, HReg dst ); -extern AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ); -extern AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ); -extern AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ); -extern AMD64Instr* AMD64Instr_MFence ( void ); +extern AMD64Instr* AMD64Instr_Push ( AMD64RMI* ); +extern AMD64Instr* AMD64Instr_Call ( AMD64CondCode, Addr64, Int ); +extern AMD64Instr* AMD64Instr_Goto ( IRJumpKind, AMD64CondCode cond, AMD64RI* dst ); +extern AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode, AMD64RM* src, HReg dst ); +extern AMD64Instr* AMD64Instr_MovZLQ ( HReg src, HReg dst ); +extern AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned, + AMD64AMode* src, HReg dst ); +extern AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst ); +extern AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst ); +extern AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst ); +extern AMD64Instr* AMD64Instr_MFence ( void ); +extern AMD64Instr* AMD64Instr_A87Free ( Int nregs ); +extern AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush ); +extern AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op ); +extern AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr ); //.. //.. extern AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, HReg dst ); //.. extern AMD64Instr* AMD64Instr_FpBinary ( AMD64FpOp op, HReg srcL, HReg srcR, HReg dst ); @@ -683,23 +683,23 @@ extern AMD64Instr* AMD64Instr_MFence ( void ); //.. extern AMD64Instr* AMD64Instr_FpLdStI ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* ); //.. extern AMD64Instr* AMD64Instr_Fp64to32 ( HReg src, HReg dst ); //.. extern AMD64Instr* AMD64Instr_FpCMov ( AMD64CondCode, HReg src, HReg dst ); -extern AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* ); +extern AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* ); //.. extern AMD64Instr* AMD64Instr_FpStSW_AX ( void ); -extern AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ); -extern AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ); -extern AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ); -extern AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst ); +extern AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ); +extern AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst ); +extern AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst ); +extern AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst ); //.. //.. extern AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst ); -extern AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz, HReg, AMD64AMode* ); -extern AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg, AMD64AMode* ); -extern AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp, HReg, HReg ); -extern AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp, HReg, HReg ); -extern AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp, HReg, HReg ); -extern AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp, HReg, HReg ); -extern AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp, HReg, HReg ); -extern AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode, HReg src, HReg dst ); -extern AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ); +extern AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz, HReg, AMD64AMode* ); +extern AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg, AMD64AMode* ); +extern AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp, HReg, HReg ); +extern AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp, HReg, HReg ); +extern AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp, HReg, HReg ); +extern AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp, HReg, HReg ); +extern AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp, HReg, HReg ); +extern AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode, HReg src, HReg dst ); +extern AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst ); extern void ppAMD64Instr ( AMD64Instr* ); diff --git a/VEX/priv/host-amd64/isel.c b/VEX/priv/host-amd64/isel.c index 1da246ace3..3b15c4a700 100644 --- a/VEX/priv/host-amd64/isel.c +++ b/VEX/priv/host-amd64/isel.c @@ -618,20 +618,19 @@ void set_SSE_rounding_default ( ISelEnv* env ) add_to_rsp(env, 8); } -//.. /* Mess with the FPU's rounding mode: set to the default rounding mode -//.. (DEFAULT_FPUCW). */ -//.. static -//.. void set_FPU_rounding_default ( ISelEnv* env ) -//.. { -//.. /* pushl $DEFAULT_FPUCW -//.. fldcw 0(%esp) -//.. addl $4, %esp -//.. */ -//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); -//.. addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW))); -//.. addInstr(env, X86Instr_FpLdStCW(True/*load*/, zero_esp)); -//.. add_to_esp(env, 4); -//.. } +/* Mess with the FPU's rounding mode: set to the default rounding mode + (DEFAULT_FPUCW). */ +static +void set_FPU_rounding_default ( ISelEnv* env ) +{ + /* movq $DEFAULT_FPUCW, -8(%rsp) + fldcw -8(%esp) + */ + AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); + addInstr(env, AMD64Instr_Alu64M( + Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp)); + addInstr(env, AMD64Instr_A87LdCW(m8_rsp)); +} /* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed @@ -669,34 +668,34 @@ void set_SSE_rounding_mode ( ISelEnv* env, IRExpr* mode ) } -//.. /* Mess with the FPU's rounding mode: 'mode' is an I32-typed -//.. expression denoting a value in the range 0 .. 3, indicating a round -//.. mode encoded as per type IRRoundingMode. Set the x87 FPU to have -//.. the same rounding. -//.. */ -//.. static -//.. void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode ) -//.. { -//.. HReg rrm = iselIntExpr_R(env, mode); -//.. HReg rrm2 = newVRegI(env); -//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP()); -//.. -//.. /* movl %rrm, %rrm2 -//.. andl $3, %rrm2 -- shouldn't be needed; paranoia -//.. shll $10, %rrm2 -//.. orl $DEFAULT_FPUCW, %rrm2 -//.. pushl %rrm2 -//.. fldcw 0(%esp) -//.. addl $4, %esp -//.. */ -//.. addInstr(env, mk_iMOVsd_RR(rrm, rrm2)); -//.. addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2)); -//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, X86RM_Reg(rrm2))); -//.. addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2)); -//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2))); -//.. addInstr(env, X86Instr_FpLdStCW(True/*load*/, zero_esp)); -//.. add_to_esp(env, 4); -//.. } +/* Mess with the FPU's rounding mode: 'mode' is an I32-typed + expression denoting a value in the range 0 .. 3, indicating a round + mode encoded as per type IRRoundingMode. Set the x87 FPU to have + the same rounding. +*/ +static +void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode ) +{ + HReg rrm = iselIntExpr_R(env, mode); + HReg rrm2 = newVRegI(env); + AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); + + /* movq %rrm, %rrm2 + andq $3, %rrm2 -- shouldn't be needed; paranoia + shlq $10, %rrm2 + orq $DEFAULT_FPUCW, %rrm2 + movq %rrm2, -8(%rsp) + fldcw -8(%esp) + */ + addInstr(env, mk_iMOVsd_RR(rrm, rrm2)); + addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2)); + addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, AMD64RM_Reg(rrm2))); + addInstr(env, AMD64Instr_Alu64R(Aalu_OR, + AMD64RMI_Imm(DEFAULT_FPUCW), rrm2)); + addInstr(env, AMD64Instr_Alu64M(Aalu_MOV, + AMD64RI_Reg(rrm2), m8_rsp)); + addInstr(env, AMD64Instr_A87LdCW(m8_rsp)); +} /* Generate !src into a new vector register. Amazing that there isn't @@ -1315,7 +1314,7 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e ) sub_from_rsp(env, 16); addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp0)); addInstr(env, AMD64Instr_Alu64R( Aalu_MOV, - AMD64RMI_Mem(rspN), dst )); + AMD64RMI_Mem(rspN), dst )); add_to_rsp(env, 16); return dst; } @@ -2811,25 +2810,73 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) //.. return res; //.. } //.. } -//.. -//.. if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64) { -//.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2); -//.. HReg dst = newVRegF(env); -//.. -//.. /* rf now holds the value to be rounded. The first thing to do -//.. is set the FPU's rounding mode accordingly. */ -//.. -//.. /* Set host rounding mode */ -//.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); -//.. -//.. /* grndint %rf, %dst */ -//.. addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst)); -//.. -//.. /* Restore default FPU rounding. */ -//.. set_FPU_rounding_default( env ); -//.. -//.. return dst; -//.. } + + if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64) { + AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); + HReg arg = iselDblExpr(env, e->Iex.Binop.arg2); + HReg dst = newVRegV(env); + + /* rf now holds the value to be rounded. The first thing to do + is set the FPU's rounding mode accordingly. */ + + /* Set host x87 rounding mode */ + set_FPU_rounding_mode( env, e->Iex.Binop.arg1 ); + + addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp)); + addInstr(env, AMD64Instr_A87Free(1)); + addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/)); + addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND)); + addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/)); + addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); + + /* Restore default x87 rounding. */ + set_FPU_rounding_default( env ); + + return dst; + } + + if (e->tag == Iex_Binop + && (e->Iex.Binop.op == Iop_ScaleF64 + || e->Iex.Binop.op == Iop_AtanF64 + || e->Iex.Binop.op == Iop_Yl2xF64) + ) { + AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); + HReg arg1 = iselDblExpr(env, e->Iex.Binop.arg1); + HReg arg2 = iselDblExpr(env, e->Iex.Binop.arg2); + HReg dst = newVRegV(env); + Bool arg2first = toBool(e->Iex.Binop.op == Iop_ScaleF64); + addInstr(env, AMD64Instr_A87Free(2)); + + /* one arg -> top of x87 stack */ + addInstr(env, AMD64Instr_SseLdSt( + False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp)); + addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/)); + + /* other arg -> top of x87 stack */ + addInstr(env, AMD64Instr_SseLdSt( + False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp)); + addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/)); + + /* do it */ + switch (e->Iex.Binop.op) { + case Iop_ScaleF64: + addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE)); + break; + case Iop_AtanF64: + addInstr(env, AMD64Instr_A87FpOp(Afp_ATAN)); + break; + case Iop_Yl2xF64: + addInstr(env, AMD64Instr_A87FpOp(Afp_YL2X)); + break; + default: + vassert(0); + } + + /* save result */ + addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/)); + addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); + return dst; + } if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64toF64) { HReg dst = newVRegV(env); @@ -2874,28 +2921,31 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) return dst; } -//.. if (e->tag == Iex_Unop) { -//.. X86FpOp fpop = Xfp_INVALID; -//.. switch (e->Iex.Unop.op) { + if (e->tag == Iex_Unop) { + A87FpOp fpop = Afp_INVALID; + switch (e->Iex.Unop.op) { //.. case Iop_NegF64: fpop = Xfp_NEG; break; //.. case Iop_AbsF64: fpop = Xfp_ABS; break; -//.. case Iop_SqrtF64: fpop = Xfp_SQRT; break; -//.. case Iop_SinF64: fpop = Xfp_SIN; break; -//.. case Iop_CosF64: fpop = Xfp_COS; break; + case Iop_SqrtF64: fpop = Afp_SQRT; break; + case Iop_SinF64: fpop = Afp_SIN; break; + case Iop_CosF64: fpop = Afp_COS; break; //.. case Iop_TanF64: fpop = Xfp_TAN; break; -//.. case Iop_2xm1F64: fpop = Xfp_2XM1; break; -//.. default: break; -//.. } -//.. if (fpop != Xfp_INVALID) { -//.. HReg res = newVRegF(env); -//.. HReg src = iselDblExpr(env, e->Iex.Unop.arg); -//.. addInstr(env, X86Instr_FpUnary(fpop,src,res)); -//.. if (fpop != Xfp_SQRT -//.. && fpop != Xfp_NEG && fpop != Xfp_ABS) -//.. roundToF64(env, res); -//.. return res; -//.. } -//.. } + case Iop_2xm1F64: fpop = Afp_2XM1; break; + default: break; + } + if (fpop != Afp_INVALID) { + AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP()); + HReg arg = iselDblExpr(env, e->Iex.Unop.arg); + HReg dst = newVRegV(env); + addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp)); + addInstr(env, AMD64Instr_A87Free(1)); + addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/)); + addInstr(env, AMD64Instr_A87FpOp(fpop)); + addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/)); + addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp)); + return dst; + } + } if (e->tag == Iex_Unop) { switch (e->Iex.Unop.op) {