#define AMD64G_CC_MASK_P (1 << AMD64G_CC_SHIFT_P)
/* FPU flag masks */
-//#define AMD64G_FC_MASK_C3 (1 << 14)
-//#define AMD64G_FC_MASK_C2 (1 << 10)
-//#define AMD64G_FC_MASK_C1 (1 << 9)
-//#define AMD64G_FC_MASK_C0 (1 << 8)
+#define AMD64G_FC_MASK_C3 (1 << 14)
+#define AMD64G_FC_MASK_C2 (1 << 10)
+#define AMD64G_FC_MASK_C1 (1 << 9)
+#define AMD64G_FC_MASK_C0 (1 << 8)
/* %RFLAGS thunk descriptors. A four-word thunk is used to record
details of the most recent flag-setting operation, so the flags can
#define OFFB_DFLAG offsetof(VexGuestAMD64State,guest_DFLAG)
#define OFFB_IDFLAG offsetof(VexGuestAMD64State,guest_IDFLAG)
#define OFFB_FTOP offsetof(VexGuestAMD64State,guest_FTOP)
-//.. #define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210)
+#define OFFB_FC3210 offsetof(VexGuestAMD64State,guest_FC3210)
#define OFFB_FPROUND offsetof(VexGuestAMD64State,guest_FPROUND)
//..
//.. #define OFFB_CS offsetof(VexGuestX86State,guest_CS)
stmt( IRStmt_Put( OFFB_FTOP, e ) );
}
-//.. /* --------- Get/put the C3210 bits. --------- */
-//..
-//.. static IRExpr* get_C3210 ( void )
-//.. {
-//.. return IRExpr_Get( OFFB_FC3210, Ity_I32 );
-//.. }
-//..
-//.. static void put_C3210 ( IRExpr* e )
-//.. {
-//.. stmt( IRStmt_Put( OFFB_FC3210, e ) );
-//.. }
+/* --------- Get/put the C3210 bits. --------- */
+
+static IRExpr* /* :: Ity_I64 */ get_C3210 ( void )
+{
+ return IRExpr_Get( OFFB_FC3210, Ity_I64 );
+}
+
+static void put_C3210 ( IRExpr* e /* :: Ity_I64 */ )
+{
+ vassert(typeOfIRExpr(irbb->tyenv, e) == Ity_I64);
+ stmt( IRStmt_Put( OFFB_FC3210, e ) );
+}
/* --------- Get/put the FPU rounding mode. --------- */
static IRExpr* /* :: Ity_I32 */ get_fpround ( void )
put_ftop( binop(Iop_Add32, get_ftop(), mkU32(1)) );
}
-//.. /* Clear the C2 bit of the FPU status register, for
-//.. sin/cos/tan/sincos. */
-//..
-//.. static void clear_C2 ( void )
-//.. {
-//.. put_C3210( binop(Iop_And32, get_C3210(), mkU32(~X86G_FC_MASK_C2)) );
-//.. }
+/* Clear the C2 bit of the FPU status register, for
+ sin/cos/tan/sincos. */
+
+static void clear_C2 ( void )
+{
+ put_C3210( binop(Iop_And64, get_C3210(), mkU64(~AMD64G_FC_MASK_C2)) );
+}
/* ------------------------------------------------------- */
put_ST(0, IRExpr_Const(IRConst_F64i(0x0000000000000000ULL)));
break;
-//.. case 0xF0: /* F2XM1 */
-//.. DIP("f2xm1\n");
-//.. put_ST_UNCHECKED(0, unop(Iop_2xm1F64, get_ST(0)));
-//.. break;
-//..
-//.. case 0xF1: /* FYL2X */
-//.. DIP("fyl2x\n");
-//.. put_ST_UNCHECKED(1, binop(Iop_Yl2xF64,
-//.. get_ST(1), get_ST(0)));
-//.. fp_pop();
-//.. break;
-//..
+ case 0xF0: /* F2XM1 */
+ DIP("f2xm1\n");
+ put_ST_UNCHECKED(0, unop(Iop_2xm1F64, get_ST(0)));
+ break;
+
+ case 0xF1: /* FYL2X */
+ DIP("fyl2x\n");
+ put_ST_UNCHECKED(1, binop(Iop_Yl2xF64,
+ get_ST(1), get_ST(0)));
+ fp_pop();
+ break;
+
//.. case 0xF2: /* FPTAN */
//.. DIP("ftan\n");
//.. put_ST_UNCHECKED(0, unop(Iop_TanF64, get_ST(0)));
//.. put_ST(0, IRExpr_Const(IRConst_F64(1.0)));
//.. clear_C2(); /* HACK */
//.. break;
-//..
-//.. case 0xF3: /* FPATAN */
-//.. DIP("fpatan\n");
-//.. put_ST_UNCHECKED(1, binop(Iop_AtanF64,
-//.. get_ST(1), get_ST(0)));
-//.. fp_pop();
-//.. break;
-//..
+
+ case 0xF3: /* FPATAN */
+ DIP("fpatan\n");
+ put_ST_UNCHECKED(1, binop(Iop_AtanF64,
+ get_ST(1), get_ST(0)));
+ fp_pop();
+ break;
+
//.. case 0xF5: { /* FPREM1 -- IEEE compliant */
//.. IRTemp a1 = newTemp(Ity_F64);
//.. IRTemp a2 = newTemp(Ity_F64);
put_ST_UNCHECKED(0, unop(Iop_SqrtF64, get_ST(0)));
break;
-//.. case 0xFB: { /* FSINCOS */
-//.. IRTemp a1 = newTemp(Ity_F64);
-//.. assign( a1, get_ST(0) );
-//.. DIP("fsincos\n");
-//.. put_ST_UNCHECKED(0, unop(Iop_SinF64, mkexpr(a1)));
-//.. fp_push();
-//.. put_ST(0, unop(Iop_CosF64, mkexpr(a1)));
-//.. clear_C2(); /* HACK */
-//.. break;
-//.. }
-//..
-//.. case 0xFC: /* FRNDINT */
-//.. DIP("frndint\n");
-//.. put_ST_UNCHECKED(0,
-//.. binop(Iop_RoundF64, get_roundingmode(), get_ST(0)) );
-//.. break;
-//..
-//.. case 0xFD: /* FSCALE */
-//.. DIP("fscale\n");
-//.. put_ST_UNCHECKED(0, binop(Iop_ScaleF64,
-//.. get_ST(0), get_ST(1)));
-//.. break;
-//..
-//.. case 0xFE: /* FSIN */
-//.. DIP("fsin\n");
-//.. put_ST_UNCHECKED(0, unop(Iop_SinF64, get_ST(0)));
-//.. clear_C2(); /* HACK */
-//.. break;
-//..
-//.. case 0xFF: /* FCOS */
-//.. DIP("fcos\n");
-//.. put_ST_UNCHECKED(0, unop(Iop_CosF64, get_ST(0)));
-//.. clear_C2(); /* HACK */
-//.. break;
+ case 0xFB: { /* FSINCOS */
+ IRTemp a1 = newTemp(Ity_F64);
+ assign( a1, get_ST(0) );
+ DIP("fsincos\n");
+ put_ST_UNCHECKED(0, unop(Iop_SinF64, mkexpr(a1)));
+ fp_push();
+ put_ST(0, unop(Iop_CosF64, mkexpr(a1)));
+ clear_C2(); /* HACK */
+ break;
+ }
+
+ case 0xFC: /* FRNDINT */
+ DIP("frndint\n");
+ put_ST_UNCHECKED(0,
+ binop(Iop_RoundF64, get_roundingmode(), get_ST(0)) );
+ break;
+
+ case 0xFD: /* FSCALE */
+ DIP("fscale\n");
+ put_ST_UNCHECKED(0, binop(Iop_ScaleF64,
+ get_ST(0), get_ST(1)));
+ break;
+
+ case 0xFE: /* FSIN */
+ DIP("fsin\n");
+ put_ST_UNCHECKED(0, unop(Iop_SinF64, get_ST(0)));
+ clear_C2(); /* HACK */
+ break;
+
+ case 0xFF: /* FCOS */
+ DIP("fcos\n");
+ put_ST_UNCHECKED(0, unop(Iop_CosF64, get_ST(0)));
+ clear_C2(); /* HACK */
+ break;
default:
goto decode_fail;
get_ST(0), get_ST(r_src)) );
break;
+ case 0xD8 ... 0xDF: /* FCMOVU ST(i), ST(0) */
+ r_src = (UInt)modrm - 0xD8;
+ DIP("fcmovu %%st(%u), %%st(0)\n", r_src);
+ put_ST_UNCHECKED(0,
+ IRExpr_Mux0X(
+ unop(Iop_1Uto8,
+ mk_amd64g_calculate_condition(AMD64CondP)),
+ get_ST(0), get_ST(r_src)) );
+ break;
+
//.. case 0xE9: /* FUCOMPP %st(0),%st(1) */
//.. DIP("fucompp %%st(0),%%st(1)\n");
//.. /* This forces C1 to zero, which isn't right. */
}
}
-//.. HChar* showAMD64FpOp ( AMD64FpOp op ) {
-//.. switch (op) {
+HChar* showA87FpOp ( A87FpOp op ) {
+ switch (op) {
//.. case Xfp_ADD: return "add";
//.. case Xfp_SUB: return "sub";
//.. case Xfp_MUL: return "mul";
//.. case Xfp_DIV: return "div";
-//.. case Xfp_SCALE: return "scale";
-//.. case Xfp_ATAN: return "atan";
-//.. case Xfp_YL2X: return "yl2x";
+ case Afp_SCALE: return "scale";
+ case Afp_ATAN: return "atan";
+ case Afp_YL2X: return "yl2x";
//.. case Xfp_YL2XP1: return "yl2xp1";
//.. case Xfp_PREM: return "prem";
//.. case Xfp_PREM1: return "prem1";
-//.. case Xfp_SQRT: return "sqrt";
+ case Afp_SQRT: return "sqrt";
//.. case Xfp_ABS: return "abs";
//.. case Xfp_NEG: return "chs";
//.. case Xfp_MOV: return "mov";
-//.. case Xfp_SIN: return "sin";
-//.. case Xfp_COS: return "cos";
+ case Afp_SIN: return "sin";
+ case Afp_COS: return "cos";
//.. case Xfp_TAN: return "tan";
-//.. case Xfp_ROUND: return "round";
-//.. case Xfp_2XM1: return "2xm1";
-//.. default: vpanic("showAMD64FpOp");
-//.. }
-//.. }
+ case Afp_ROUND: return "round";
+ case Afp_2XM1: return "2xm1";
+ default: vpanic("showA87FpOp");
+ }
+}
HChar* showAMD64SseOp ( AMD64SseOp op ) {
switch (op) {
}
AMD64Instr* AMD64Instr_MFence ( void )
{
- AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
- i->tag = Ain_MFence;
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_MFence;
+ return i;
+}
+AMD64Instr* AMD64Instr_A87Free ( Int nregs )
+{
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_A87Free;
+ i->Ain.A87Free.nregs = nregs;
+ vassert(nregs >= 1 && nregs <= 7);
+ return i;
+}
+AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush )
+{
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_A87PushPop;
+ i->Ain.A87PushPop.addr = addr;
+ i->Ain.A87PushPop.isPush = isPush;
+ return i;
+}
+AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op )
+{
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_A87FpOp;
+ i->Ain.A87FpOp.op = op;
+ return i;
+}
+AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr )
+{
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_A87LdCW;
+ i->Ain.A87LdCW.addr = addr;
return i;
}
case Ain_MFence:
vex_printf("mfence" );
return;
+ case Ain_A87Free:
+ vex_printf("ffree %%st(7..%d)\n", 7 - i->Ain.A87Free.nregs );
+ break;
+ case Ain_A87PushPop:
+ vex_printf(i->Ain.A87PushPop.isPush ? "fldl " : "fstpl ");
+ ppAMD64AMode(i->Ain.A87PushPop.addr);
+ break;
+ case Ain_A87FpOp:
+ vex_printf("f%s\n", showA87FpOp(i->Ain.A87FpOp.op));
+ break;
+ case Ain_A87LdCW:
+ vex_printf("fldcw ");
+ ppAMD64AMode(i->Ain.A87LdCW.addr);
+ break;
//.. case Xin_FpUnary:
//.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpUnary.op));
//.. ppHRegAMD64(i->Xin.FpUnary.src);
return;
case Ain_MFence:
return;
+ case Ain_A87Free:
+ return;
+ case Ain_A87PushPop:
+ addRegUsage_AMD64AMode(u, i->Ain.A87PushPop.addr);
+ return;
+ case Ain_A87FpOp:
+ return;
+ case Ain_A87LdCW:
+ addRegUsage_AMD64AMode(u, i->Ain.A87LdCW.addr);
+ return;
//.. case Xin_FpUnary:
//.. addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
//.. addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
}
/* local helper */
-static void mapReg(HRegRemap* m, HReg* r)
+static inline void mapReg(HRegRemap* m, HReg* r)
{
*r = lookupHRegRemap(m, *r);
}
return;
case Ain_MFence:
return;
+ case Ain_A87Free:
+ return;
+ case Ain_A87PushPop:
+ mapRegs_AMD64AMode(m, i->Ain.A87PushPop.addr);
+ return;
+ case Ain_A87FpOp:
+ return;
+ case Ain_A87LdCW:
+ mapRegs_AMD64AMode(m, i->Ain.A87LdCW.addr);
+ return;
//.. case Xin_FpUnary:
//.. mapReg(m, &i->Xin.FpUnary.src);
//.. mapReg(m, &i->Xin.FpUnary.dst);
}
-//.. /* Emit ffree %st(7) */
-//.. static UChar* do_ffree_st7 ( UChar* p )
-//.. {
-//.. *p++ = 0xDD;
-//.. *p++ = 0xC7;
-//.. return p;
-//.. }
-//..
+/* Emit ffree %st(N) */
+static UChar* do_ffree_st ( UChar* p, Int n )
+{
+ vassert(n >= 0 && n <= 7);
+ *p++ = 0xDD;
+ *p++ = toUChar(0xC0 + n);
+ return p;
+}
+
//.. /* Emit fstp %st(i), 1 <= i <= 7 */
//.. static UChar* do_fstp_st ( UChar* p, Int i )
//.. {
UChar rex;
UChar* p = &buf[0];
UChar* ptmp;
+ Int j;
vassert(nbuf >= 32);
/* Wrap an integer as a int register, for use assembling
*p++ = 0x0F; *p++ = 0xAE; *p++ = 0xF0;
goto done;
+ case Ain_A87Free:
+ vassert(i->Ain.A87Free.nregs > 0 && i->Ain.A87Free.nregs <= 7);
+ for (j = 0; j < i->Ain.A87Free.nregs; j++) {
+ p = do_ffree_st(p, 7-j);
+ }
+ goto done;
+
+ case Ain_A87PushPop:
+ if (i->Ain.A87PushPop.isPush) {
+ /* Load from memory into %st(0): fldl amode */
+ *p++ = clearWBit(
+ rexAMode_M(fake(0), i->Ain.A87PushPop.addr) );
+ *p++ = 0xDD;
+ p = doAMode_M(p, fake(0)/*subopcode*/, i->Ain.A87PushPop.addr);
+ } else {
+ /* Dump %st(0) to memory: fstpl amode */
+ *p++ = clearWBit(
+ rexAMode_M(fake(3), i->Ain.A87PushPop.addr) );
+ *p++ = 0xDD;
+ p = doAMode_M(p, fake(3)/*subopcode*/, i->Ain.A87PushPop.addr);
+ goto done;
+ }
+ goto done;
+
+ case Ain_A87FpOp:
+ switch (i->Ain.A87FpOp.op) {
+ case Afp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
+ case Afp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
+ case Afp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
+ case Afp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
+ case Afp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
+ case Afp_SCALE: *p++ = 0xD9; *p++ = 0xFD; break;
+ case Afp_ATAN: *p++ = 0xD9; *p++ = 0xF3; break;
+ case Afp_YL2X: *p++ = 0xD9; *p++ = 0xF1; break;
+ default: goto bad;
+ }
+ goto done;
+
+ case Ain_A87LdCW:
+ *p++ = clearWBit(
+ rexAMode_M(fake(5), i->Ain.A87LdCW.addr) );
+ *p++ = 0xD9;
+ p = doAMode_M(p, fake(5)/*subopcode*/, i->Ain.A87LdCW.addr);
+ goto done;
+
case Ain_Store:
if (i->Ain.Store.sz == 2) {
/* This just goes to show the crazyness of the instruction
extern HChar* showAMD64ShiftOp ( AMD64ShiftOp );
-//.. /* --------- */
-//.. typedef
-//.. enum {
-//.. Xfp_INVALID,
-//.. /* Binary */
+/* --------- */
+typedef
+ enum {
+ Afp_INVALID,
+ /* Binary */
//.. Xfp_ADD, Xfp_SUB, Xfp_MUL, Xfp_DIV,
-//.. Xfp_SCALE, Xfp_ATAN, Xfp_YL2X, Xfp_YL2XP1, Xfp_PREM, Xfp_PREM1,
-//.. /* Unary */
-//.. Xfp_SQRT, Xfp_ABS, Xfp_NEG, Xfp_MOV, Xfp_SIN, Xfp_COS, Xfp_TAN,
-//.. Xfp_ROUND, Xfp_2XM1
-//.. }
-//.. X86FpOp;
-//..
-//.. extern HChar* showX86FpOp ( X86FpOp );
+ Afp_SCALE, Afp_ATAN, Afp_YL2X, //Xfp_YL2XP1, Xfp_PREM, Xfp_PREM1,
+ /* Unary */
+ Afp_SQRT, //Xfp_ABS, Xfp_NEG, Xfp_MOV,
+ Afp_SIN, Afp_COS, //Xfp_TAN,
+ Afp_ROUND, Afp_2XM1
+ }
+ A87FpOp;
+
+extern HChar* showA87FpOp ( A87FpOp );
/* --------- */
/* --------- */
typedef
enum {
- Ain_Imm64, /* Generate 64-bit literal to register */
- Ain_Alu64R, /* 64-bit mov/arith/logical, dst=REG */
- Ain_Alu64M, /* 64-bit mov/arith/logical, dst=MEM */
- Ain_Sh64, /* 64-bit shift/rotate, dst=REG or MEM */
- Ain_Test64, /* 64-bit test (AND, set flags, discard result) */
- Ain_Unary64, /* 64-bit not and neg */
- Ain_MulL, /* widening multiply */
- Ain_Div, /* div and mod */
+ Ain_Imm64, /* Generate 64-bit literal to register */
+ Ain_Alu64R, /* 64-bit mov/arith/logical, dst=REG */
+ Ain_Alu64M, /* 64-bit mov/arith/logical, dst=MEM */
+ Ain_Sh64, /* 64-bit shift/rotate, dst=REG or MEM */
+ Ain_Test64, /* 64-bit test (AND, set flags, discard result) */
+ Ain_Unary64, /* 64-bit not and neg */
+ Ain_MulL, /* widening multiply */
+ Ain_Div, /* div and mod */
//.. Xin_Sh3232, /* shldl or shrdl */
- Ain_Push, /* push 64-bit value on stack */
- Ain_Call, /* call to address in register */
- Ain_Goto, /* conditional/unconditional jmp to dst */
- Ain_CMov64, /* conditional move */
- Ain_MovZLQ, /* reg-reg move, zeroing out top half */
- Ain_LoadEX, /* mov{s,z}{b,w,l}q from mem to reg */
- Ain_Store, /* store 32/16/8 bit value in memory */
- Ain_Set64, /* convert condition code to 64-bit value */
- Ain_Bsfr64, /* 64-bit bsf/bsr */
- Ain_MFence, /* mem fence */
+ Ain_Push, /* push 64-bit value on stack */
+ Ain_Call, /* call to address in register */
+ Ain_Goto, /* conditional/unconditional jmp to dst */
+ Ain_CMov64, /* conditional move */
+ Ain_MovZLQ, /* reg-reg move, zeroing out top half */
+ Ain_LoadEX, /* mov{s,z}{b,w,l}q from mem to reg */
+ Ain_Store, /* store 32/16/8 bit value in memory */
+ Ain_Set64, /* convert condition code to 64-bit value */
+ Ain_Bsfr64, /* 64-bit bsf/bsr */
+ Ain_MFence, /* mem fence */
+ Ain_A87Free, /* free up x87 registers */
+ Ain_A87PushPop, /* x87 loads/stores */
+ Ain_A87FpOp, /* x87 operations */
+ Ain_A87LdCW, /* load x87 control word */
//..
//.. Xin_FpUnary, /* FP fake unary op */
//.. Xin_FpBinary, /* FP fake binary op */
//.. Xin_FpLdStI, /* FP fake load/store, converting to/from Int */
//.. Xin_Fp64to32, /* FP round IEEE754 double to IEEE754 single */
//.. Xin_FpCMov, /* FP fake floating point conditional move */
- Ain_LdMXCSR, /* load %mxcsr */
+ Ain_LdMXCSR, /* load %mxcsr */
//.. Xin_FpStSW_AX, /* fstsw %ax */
- Ain_SseUComIS, /* ucomisd/ucomiss, then get %rflags into int
- register */
- Ain_SseSI2SF, /* scalar 32/64 int to 32/64 float conversion */
- Ain_SseSF2SI, /* scalar 32/64 float to 32/64 int conversion */
- Ain_SseSDSS, /* scalar float32 to/from float64 */
+ Ain_SseUComIS, /* ucomisd/ucomiss, then get %rflags into int
+ register */
+ Ain_SseSI2SF, /* scalar 32/64 int to 32/64 float conversion */
+ Ain_SseSF2SI, /* scalar 32/64 float to 32/64 int conversion */
+ Ain_SseSDSS, /* scalar float32 to/from float64 */
//..
//.. Xin_SseConst, /* Generate restricted SSE literal */
- Ain_SseLdSt, /* SSE load/store 32/64/128 bits, no alignment
- constraints, upper 96/64/0 bits arbitrary */
- Ain_SseLdzLO, /* SSE load low 32/64 bits, zero remainder of reg */
- Ain_Sse32Fx4, /* SSE binary, 32Fx4 */
- Ain_Sse32FLo, /* SSE binary, 32F in lowest lane only */
- Ain_Sse64Fx2, /* SSE binary, 64Fx2 */
- Ain_Sse64FLo, /* SSE binary, 64F in lowest lane only */
- Ain_SseReRg, /* SSE binary general reg-reg, Re, Rg */
- Ain_SseCMov, /* SSE conditional move */
- Ain_SseShuf /* SSE2 shuffle (pshufd) */
+ Ain_SseLdSt, /* SSE load/store 32/64/128 bits, no alignment
+ constraints, upper 96/64/0 bits arbitrary */
+ Ain_SseLdzLO, /* SSE load low 32/64 bits, zero remainder of reg */
+ Ain_Sse32Fx4, /* SSE binary, 32Fx4 */
+ Ain_Sse32FLo, /* SSE binary, 32F in lowest lane only */
+ Ain_Sse64Fx2, /* SSE binary, 64Fx2 */
+ Ain_Sse64FLo, /* SSE binary, 64F in lowest lane only */
+ Ain_SseReRg, /* SSE binary general reg-reg, Re, Rg */
+ Ain_SseCMov, /* SSE conditional move */
+ Ain_SseShuf /* SSE2 shuffle (pshufd) */
}
AMD64InstrTag;
struct {
} MFence;
-//.. /* X86 Floating point (fake 3-operand, "flat reg file" insns) */
-//.. struct {
-//.. X86FpOp op;
-//.. HReg src;
-//.. HReg dst;
-//.. } FpUnary;
-//.. struct {
-//.. X86FpOp op;
-//.. HReg srcL;
-//.. HReg srcR;
-//.. HReg dst;
-//.. } FpBinary;
-//.. struct {
-//.. Bool isLoad;
-//.. UChar sz; /* only 4 (IEEE single) or 8 (IEEE double) */
-//.. HReg reg;
-//.. X86AMode* addr;
-//.. } FpLdSt;
-//.. /* Move 64-bit float to/from memory, converting to/from
-//.. signed int on the way. Note the conversions will observe
-//.. the host FPU rounding mode currently in force. */
-//.. struct {
-//.. Bool isLoad;
-//.. UChar sz; /* only 2, 4 or 8 */
-//.. HReg reg;
-//.. X86AMode* addr;
-//.. } FpLdStI;
-//.. /* By observing the current FPU rounding mode, round (etc)
-//.. src into dst given that dst should be interpreted as an
-//.. IEEE754 32-bit (float) type. */
-//.. struct {
-//.. HReg src;
-//.. HReg dst;
-//.. } Fp64to32;
-//.. /* Mov src to dst on the given condition, which may not
-//.. be the bogus Xcc_ALWAYS. */
-//.. struct {
-//.. X86CondCode cond;
-//.. HReg src;
-//.. HReg dst;
-//.. } FpCMov;
+ /* --- X87 --- */
+
+ /* A very minimal set of x87 insns, that operate exactly in a
+ stack-like way so no need to think about x87 registers. */
+
+ /* Do 'ffree' on %st(7) .. %st(7-nregs) */
+ struct {
+ Int nregs; /* 1 <= nregs <= 7 */
+ } A87Free;
+
+ /* Push a 64-bit FP value from memory onto the stack, or move
+ a value from the stack to memory and remove it from the
+ stack. */
+ struct {
+ AMD64AMode* addr;
+ Bool isPush;
+ } A87PushPop;
+
+ /* Do an operation on the top-of-stack. This can be unary, in
+ which case it is %st0 = OP( %st0 ), or binary: %st0 = OP(
+ %st0, %st1 ). */
+ struct {
+ A87FpOp op;
+ } A87FpOp;
+
+ /* Load the FPU control word. */
+ struct {
+ AMD64AMode* addr;
+ } A87LdCW;
+
+ /* --- SSE --- */
+
/* Load 32 bits into %mxcsr. */
struct {
AMD64AMode* addr;
}
AMD64Instr;
-extern AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst );
-extern AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp, AMD64RMI*, HReg );
-extern AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp, AMD64RI*, AMD64AMode* );
-extern AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, AMD64RM* dst );
-extern AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp, UInt, AMD64RM* );
-extern AMD64Instr* AMD64Instr_Test64 ( AMD64RI* src, AMD64RM* dst );
-extern AMD64Instr* AMD64Instr_MulL ( Bool syned, Int sz, AMD64RM* );
-extern AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* );
+extern AMD64Instr* AMD64Instr_Imm64 ( ULong imm64, HReg dst );
+extern AMD64Instr* AMD64Instr_Alu64R ( AMD64AluOp, AMD64RMI*, HReg );
+extern AMD64Instr* AMD64Instr_Alu64M ( AMD64AluOp, AMD64RI*, AMD64AMode* );
+extern AMD64Instr* AMD64Instr_Unary64 ( AMD64UnaryOp op, AMD64RM* dst );
+extern AMD64Instr* AMD64Instr_Sh64 ( AMD64ShiftOp, UInt, AMD64RM* );
+extern AMD64Instr* AMD64Instr_Test64 ( AMD64RI* src, AMD64RM* dst );
+extern AMD64Instr* AMD64Instr_MulL ( Bool syned, Int sz, AMD64RM* );
+extern AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* );
//.. extern AMD64Instr* AMD64Instr_Sh3232 ( AMD64ShiftOp, UInt amt, HReg src, HReg dst );
-extern AMD64Instr* AMD64Instr_Push ( AMD64RMI* );
-extern AMD64Instr* AMD64Instr_Call ( AMD64CondCode, Addr64, Int );
-extern AMD64Instr* AMD64Instr_Goto ( IRJumpKind, AMD64CondCode cond, AMD64RI* dst );
-extern AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode, AMD64RM* src, HReg dst );
-extern AMD64Instr* AMD64Instr_MovZLQ ( HReg src, HReg dst );
-extern AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
- AMD64AMode* src, HReg dst );
-extern AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst );
-extern AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst );
-extern AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst );
-extern AMD64Instr* AMD64Instr_MFence ( void );
+extern AMD64Instr* AMD64Instr_Push ( AMD64RMI* );
+extern AMD64Instr* AMD64Instr_Call ( AMD64CondCode, Addr64, Int );
+extern AMD64Instr* AMD64Instr_Goto ( IRJumpKind, AMD64CondCode cond, AMD64RI* dst );
+extern AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode, AMD64RM* src, HReg dst );
+extern AMD64Instr* AMD64Instr_MovZLQ ( HReg src, HReg dst );
+extern AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
+ AMD64AMode* src, HReg dst );
+extern AMD64Instr* AMD64Instr_Store ( UChar sz, HReg src, AMD64AMode* dst );
+extern AMD64Instr* AMD64Instr_Set64 ( AMD64CondCode cond, HReg dst );
+extern AMD64Instr* AMD64Instr_Bsfr64 ( Bool isFwds, HReg src, HReg dst );
+extern AMD64Instr* AMD64Instr_MFence ( void );
+extern AMD64Instr* AMD64Instr_A87Free ( Int nregs );
+extern AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush );
+extern AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op );
+extern AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr );
//..
//.. extern AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, HReg dst );
//.. extern AMD64Instr* AMD64Instr_FpBinary ( AMD64FpOp op, HReg srcL, HReg srcR, HReg dst );
//.. extern AMD64Instr* AMD64Instr_FpLdStI ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* );
//.. extern AMD64Instr* AMD64Instr_Fp64to32 ( HReg src, HReg dst );
//.. extern AMD64Instr* AMD64Instr_FpCMov ( AMD64CondCode, HReg src, HReg dst );
-extern AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* );
+extern AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* );
//.. extern AMD64Instr* AMD64Instr_FpStSW_AX ( void );
-extern AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst );
-extern AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst );
-extern AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst );
-extern AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst );
+extern AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst );
+extern AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst );
+extern AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst );
+extern AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst );
//..
//.. extern AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst );
-extern AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz, HReg, AMD64AMode* );
-extern AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg, AMD64AMode* );
-extern AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp, HReg, HReg );
-extern AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp, HReg, HReg );
-extern AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp, HReg, HReg );
-extern AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp, HReg, HReg );
-extern AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp, HReg, HReg );
-extern AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode, HReg src, HReg dst );
-extern AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst );
+extern AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz, HReg, AMD64AMode* );
+extern AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg, AMD64AMode* );
+extern AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp, HReg, HReg );
+extern AMD64Instr* AMD64Instr_Sse32FLo ( AMD64SseOp, HReg, HReg );
+extern AMD64Instr* AMD64Instr_Sse64Fx2 ( AMD64SseOp, HReg, HReg );
+extern AMD64Instr* AMD64Instr_Sse64FLo ( AMD64SseOp, HReg, HReg );
+extern AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp, HReg, HReg );
+extern AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode, HReg src, HReg dst );
+extern AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst );
extern void ppAMD64Instr ( AMD64Instr* );
add_to_rsp(env, 8);
}
-//.. /* Mess with the FPU's rounding mode: set to the default rounding mode
-//.. (DEFAULT_FPUCW). */
-//.. static
-//.. void set_FPU_rounding_default ( ISelEnv* env )
-//.. {
-//.. /* pushl $DEFAULT_FPUCW
-//.. fldcw 0(%esp)
-//.. addl $4, %esp
-//.. */
-//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
-//.. addInstr(env, X86Instr_Push(X86RMI_Imm(DEFAULT_FPUCW)));
-//.. addInstr(env, X86Instr_FpLdStCW(True/*load*/, zero_esp));
-//.. add_to_esp(env, 4);
-//.. }
+/* Mess with the FPU's rounding mode: set to the default rounding mode
+ (DEFAULT_FPUCW). */
+static
+void set_FPU_rounding_default ( ISelEnv* env )
+{
+ /* movq $DEFAULT_FPUCW, -8(%rsp)
+ fldcw -8(%esp)
+ */
+ AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
+ addInstr(env, AMD64Instr_Alu64M(
+ Aalu_MOV, AMD64RI_Imm(DEFAULT_FPUCW), m8_rsp));
+ addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
+}
/* Mess with the SSE unit's rounding mode: 'mode' is an I32-typed
}
-//.. /* Mess with the FPU's rounding mode: 'mode' is an I32-typed
-//.. expression denoting a value in the range 0 .. 3, indicating a round
-//.. mode encoded as per type IRRoundingMode. Set the x87 FPU to have
-//.. the same rounding.
-//.. */
-//.. static
-//.. void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
-//.. {
-//.. HReg rrm = iselIntExpr_R(env, mode);
-//.. HReg rrm2 = newVRegI(env);
-//.. X86AMode* zero_esp = X86AMode_IR(0, hregX86_ESP());
-//..
-//.. /* movl %rrm, %rrm2
-//.. andl $3, %rrm2 -- shouldn't be needed; paranoia
-//.. shll $10, %rrm2
-//.. orl $DEFAULT_FPUCW, %rrm2
-//.. pushl %rrm2
-//.. fldcw 0(%esp)
-//.. addl $4, %esp
-//.. */
-//.. addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
-//.. addInstr(env, X86Instr_Alu32R(Xalu_AND, X86RMI_Imm(3), rrm2));
-//.. addInstr(env, X86Instr_Sh32(Xsh_SHL, 10, X86RM_Reg(rrm2)));
-//.. addInstr(env, X86Instr_Alu32R(Xalu_OR, X86RMI_Imm(DEFAULT_FPUCW), rrm2));
-//.. addInstr(env, X86Instr_Push(X86RMI_Reg(rrm2)));
-//.. addInstr(env, X86Instr_FpLdStCW(True/*load*/, zero_esp));
-//.. add_to_esp(env, 4);
-//.. }
+/* Mess with the FPU's rounding mode: 'mode' is an I32-typed
+ expression denoting a value in the range 0 .. 3, indicating a round
+ mode encoded as per type IRRoundingMode. Set the x87 FPU to have
+ the same rounding.
+*/
+static
+void set_FPU_rounding_mode ( ISelEnv* env, IRExpr* mode )
+{
+ HReg rrm = iselIntExpr_R(env, mode);
+ HReg rrm2 = newVRegI(env);
+ AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
+
+ /* movq %rrm, %rrm2
+ andq $3, %rrm2 -- shouldn't be needed; paranoia
+ shlq $10, %rrm2
+ orq $DEFAULT_FPUCW, %rrm2
+ movq %rrm2, -8(%rsp)
+ fldcw -8(%esp)
+ */
+ addInstr(env, mk_iMOVsd_RR(rrm, rrm2));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_AND, AMD64RMI_Imm(3), rrm2));
+ addInstr(env, AMD64Instr_Sh64(Ash_SHL, 10, AMD64RM_Reg(rrm2)));
+ addInstr(env, AMD64Instr_Alu64R(Aalu_OR,
+ AMD64RMI_Imm(DEFAULT_FPUCW), rrm2));
+ addInstr(env, AMD64Instr_Alu64M(Aalu_MOV,
+ AMD64RI_Reg(rrm2), m8_rsp));
+ addInstr(env, AMD64Instr_A87LdCW(m8_rsp));
+}
/* Generate !src into a new vector register. Amazing that there isn't
sub_from_rsp(env, 16);
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 16, vec, rsp0));
addInstr(env, AMD64Instr_Alu64R( Aalu_MOV,
- AMD64RMI_Mem(rspN), dst ));
+ AMD64RMI_Mem(rspN), dst ));
add_to_rsp(env, 16);
return dst;
}
//.. return res;
//.. }
//.. }
-//..
-//.. if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64) {
-//.. HReg rf = iselDblExpr(env, e->Iex.Binop.arg2);
-//.. HReg dst = newVRegF(env);
-//..
-//.. /* rf now holds the value to be rounded. The first thing to do
-//.. is set the FPU's rounding mode accordingly. */
-//..
-//.. /* Set host rounding mode */
-//.. set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
-//..
-//.. /* grndint %rf, %dst */
-//.. addInstr(env, X86Instr_FpUnary(Xfp_ROUND, rf, dst));
-//..
-//.. /* Restore default FPU rounding. */
-//.. set_FPU_rounding_default( env );
-//..
-//.. return dst;
-//.. }
+
+ if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF64) {
+ AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
+ HReg arg = iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+
+ /* rf now holds the value to be rounded. The first thing to do
+ is set the FPU's rounding mode accordingly. */
+
+ /* Set host x87 rounding mode */
+ set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
+ addInstr(env, AMD64Instr_A87Free(1));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+ addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
+ addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
+
+ /* Restore default x87 rounding. */
+ set_FPU_rounding_default( env );
+
+ return dst;
+ }
+
+ if (e->tag == Iex_Binop
+ && (e->Iex.Binop.op == Iop_ScaleF64
+ || e->Iex.Binop.op == Iop_AtanF64
+ || e->Iex.Binop.op == Iop_Yl2xF64)
+ ) {
+ AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
+ HReg arg1 = iselDblExpr(env, e->Iex.Binop.arg1);
+ HReg arg2 = iselDblExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+ Bool arg2first = toBool(e->Iex.Binop.op == Iop_ScaleF64);
+ addInstr(env, AMD64Instr_A87Free(2));
+
+ /* one arg -> top of x87 stack */
+ addInstr(env, AMD64Instr_SseLdSt(
+ False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+
+ /* other arg -> top of x87 stack */
+ addInstr(env, AMD64Instr_SseLdSt(
+ False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+
+ /* do it */
+ switch (e->Iex.Binop.op) {
+ case Iop_ScaleF64:
+ addInstr(env, AMD64Instr_A87FpOp(Afp_SCALE));
+ break;
+ case Iop_AtanF64:
+ addInstr(env, AMD64Instr_A87FpOp(Afp_ATAN));
+ break;
+ case Iop_Yl2xF64:
+ addInstr(env, AMD64Instr_A87FpOp(Afp_YL2X));
+ break;
+ default:
+ vassert(0);
+ }
+
+ /* save result */
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
+ addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
+ return dst;
+ }
if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_I64toF64) {
HReg dst = newVRegV(env);
return dst;
}
-//.. if (e->tag == Iex_Unop) {
-//.. X86FpOp fpop = Xfp_INVALID;
-//.. switch (e->Iex.Unop.op) {
+ if (e->tag == Iex_Unop) {
+ A87FpOp fpop = Afp_INVALID;
+ switch (e->Iex.Unop.op) {
//.. case Iop_NegF64: fpop = Xfp_NEG; break;
//.. case Iop_AbsF64: fpop = Xfp_ABS; break;
-//.. case Iop_SqrtF64: fpop = Xfp_SQRT; break;
-//.. case Iop_SinF64: fpop = Xfp_SIN; break;
-//.. case Iop_CosF64: fpop = Xfp_COS; break;
+ case Iop_SqrtF64: fpop = Afp_SQRT; break;
+ case Iop_SinF64: fpop = Afp_SIN; break;
+ case Iop_CosF64: fpop = Afp_COS; break;
//.. case Iop_TanF64: fpop = Xfp_TAN; break;
-//.. case Iop_2xm1F64: fpop = Xfp_2XM1; break;
-//.. default: break;
-//.. }
-//.. if (fpop != Xfp_INVALID) {
-//.. HReg res = newVRegF(env);
-//.. HReg src = iselDblExpr(env, e->Iex.Unop.arg);
-//.. addInstr(env, X86Instr_FpUnary(fpop,src,res));
-//.. if (fpop != Xfp_SQRT
-//.. && fpop != Xfp_NEG && fpop != Xfp_ABS)
-//.. roundToF64(env, res);
-//.. return res;
-//.. }
-//.. }
+ case Iop_2xm1F64: fpop = Afp_2XM1; break;
+ default: break;
+ }
+ if (fpop != Afp_INVALID) {
+ AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
+ HReg arg = iselDblExpr(env, e->Iex.Unop.arg);
+ HReg dst = newVRegV(env);
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
+ addInstr(env, AMD64Instr_A87Free(1));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+ addInstr(env, AMD64Instr_A87FpOp(fpop));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
+ addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
+ return dst;
+ }
+ }
if (e->tag == Iex_Unop) {
switch (e->Iex.Unop.op) {