the rounding mode is specified within the instruction itself).
git-svn-id: svn://svn.valgrind.org/vex/trunk@1986
}
- /* 66 0f 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1 (Partial
- implementation only -- only deal with cases where the rounding
- mode is specified directly by the immediate byte. */
- if (have66noF2noF3( pfx )
+ /* 66 0F 3A 0B /r ib = ROUNDSD imm8, xmm2/m64, xmm1
+ (Partial implementation only -- only deal with cases where
+ the rounding mode is specified directly by the immediate byte.)
+ 66 0F 3A 0A /r ib = ROUNDSS imm8, xmm2/m32, xmm1
+ (Limitations ditto)
+ */
+ if (have66noF2noF3(pfx)
&& sz == 2
- && insn[0] == 0x0F && insn[1] == 0x3A && insn[2] == 0x0B) {
-
- modrm = insn[3];
+ && insn[0] == 0x0F && insn[1] == 0x3A
+ && (insn[2] == 0x0B || insn[2] == 0x0A)) {
- IRTemp src = newTemp(Ity_F64);
- IRTemp res = newTemp(Ity_F64);
+ Bool isD = insn[2] == 0x0B;
+ IRTemp src = newTemp(isD ? Ity_F64 : Ity_F32);
+ IRTemp res = newTemp(isD ? Ity_F64 : Ity_F32);
Int imm = 0;
+ modrm = insn[3];
+
if (epartIsReg(modrm)) {
- assign( src, getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 ) );
+ assign( src,
+ isD ? getXMMRegLane64F( eregOfRexRM(pfx, modrm), 0 )
+ : getXMMRegLane32F( eregOfRexRM(pfx, modrm), 0 ) );
imm = insn[3+1];
if (imm & ~3) goto decode_failure;
delta += 3+1+1;
- DIP( "roundsd $%d,%s,%s\n",
+ DIP( "rounds%c $%d,%s,%s\n",
+ isD ? 'd' : 's',
imm, nameXMMReg( eregOfRexRM(pfx, modrm) ),
nameXMMReg( gregOfRexRM(pfx, modrm) ) );
} else {
addr = disAMode( &alen, vbi, pfx, delta+3, dis_buf, 0 );
- assign( src, loadLE( Ity_F64, mkexpr(addr) ));
+ assign( src, loadLE( isD ? Ity_F64 : Ity_F32, mkexpr(addr) ));
imm = insn[3+alen];
if (imm & ~3) goto decode_failure;
delta += 3+alen+1;
that encoding is the same as the encoding for IRRoundingMode,
we can use that value directly in the IR as a rounding
mode. */
- assign(res, binop(Iop_RoundF64toInt, mkU32(imm & 3), mkexpr(src)) );
+ assign(res, binop(isD ? Iop_RoundF64toInt : Iop_RoundF32toInt,
+ mkU32(imm & 3), mkexpr(src)) );
- putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
+ if (isD)
+ putXMMRegLane64F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
+ else
+ putXMMRegLane32F( gregOfRexRM(pfx, modrm), 0, mkexpr(res) );
goto decode_success;
}
vassert(nregs >= 1 && nregs <= 7);
return i;
}
-AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush )
+AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB )
{
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
i->tag = Ain_A87PushPop;
i->Ain.A87PushPop.addr = addr;
i->Ain.A87PushPop.isPush = isPush;
+ i->Ain.A87PushPop.szB = szB;
+ vassert(szB == 8 || szB == 4);
return i;
}
AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op )
vex_printf("ffree %%st(7..%d)", 8 - i->Ain.A87Free.nregs );
break;
case Ain_A87PushPop:
- vex_printf(i->Ain.A87PushPop.isPush ? "fldl " : "fstpl ");
+ vex_printf(i->Ain.A87PushPop.isPush ? "fld%c " : "fstp%c ",
+ i->Ain.A87PushPop.szB == 4 ? 's' : 'l');
ppAMD64AMode(i->Ain.A87PushPop.addr);
break;
case Ain_A87FpOp:
goto done;
case Ain_A87PushPop:
+ vassert(i->Ain.A87PushPop.szB == 8 || i->Ain.A87PushPop.szB == 4);
if (i->Ain.A87PushPop.isPush) {
- /* Load from memory into %st(0): fldl amode */
+ /* Load from memory into %st(0): flds/fldl amode */
*p++ = clearWBit(
rexAMode_M(fake(0), i->Ain.A87PushPop.addr) );
- *p++ = 0xDD;
+ *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
p = doAMode_M(p, fake(0)/*subopcode*/, i->Ain.A87PushPop.addr);
} else {
- /* Dump %st(0) to memory: fstpl amode */
+ /* Dump %st(0) to memory: fstps/fstpl amode */
*p++ = clearWBit(
rexAMode_M(fake(3), i->Ain.A87PushPop.addr) );
- *p++ = 0xDD;
+ *p++ = i->Ain.A87PushPop.szB == 4 ? 0xD9 : 0xDD;
p = doAMode_M(p, fake(3)/*subopcode*/, i->Ain.A87PushPop.addr);
goto done;
}
Int nregs; /* 1 <= nregs <= 7 */
} A87Free;
- /* Push a 64-bit FP value from memory onto the stack, or move
- a value from the stack to memory and remove it from the
- stack. */
+ /* Push a 32- or 64-bit FP value from memory onto the stack,
+ or move a value from the stack to memory and remove it
+ from the stack. */
struct {
AMD64AMode* addr;
Bool isPush;
+ UChar szB; /* 4 or 8 */
} A87PushPop;
/* Do an operation on the top-of-stack. This can be unary, in
extern AMD64Instr* AMD64Instr_DACAS ( AMD64AMode* addr, UChar sz );
extern AMD64Instr* AMD64Instr_A87Free ( Int nregs );
-extern AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush );
+extern AMD64Instr* AMD64Instr_A87PushPop ( AMD64AMode* addr, Bool isPush, UChar szB );
extern AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op );
extern AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr );
extern AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr );
/* one arg -> top of x87 stack */
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg2, m8_rsp));
- addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
/* other arg -> top of x87 stack */
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg1, m8_rsp));
- addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
switch (e->Iex.Triop.op) {
case Iop_PRemC3210F64:
return dst;
}
+ if (e->tag == Iex_Binop && e->Iex.Binop.op == Iop_RoundF32toInt) {
+ AMD64AMode* m8_rsp = AMD64AMode_IR(-8, hregAMD64_RSP());
+ HReg arg = iselFltExpr(env, e->Iex.Binop.arg2);
+ HReg dst = newVRegV(env);
+
+ /* rf now holds the value to be rounded. The first thing to do
+ is set the FPU's rounding mode accordingly. */
+
+ /* Set host x87 rounding mode */
+ set_FPU_rounding_mode( env, e->Iex.Binop.arg1 );
+
+ addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 4, arg, m8_rsp));
+ addInstr(env, AMD64Instr_A87Free(1));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 4));
+ addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 4));
+ addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 4, dst, m8_rsp));
+
+ /* Restore default x87 rounding. */
+ set_FPU_rounding_default( env );
+
+ return dst;
+ }
+
ppIRExpr(e);
vpanic("iselFltExpr_wrk");
}
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
addInstr(env, AMD64Instr_A87Free(1));
- addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
addInstr(env, AMD64Instr_A87FpOp(Afp_ROUND));
- addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
/* Restore default x87 rounding. */
/* one arg -> top of x87 stack */
addInstr(env, AMD64Instr_SseLdSt(
False/*store*/, 8, arg2first ? arg2 : arg1, m8_rsp));
- addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
/* other arg -> top of x87 stack */
addInstr(env, AMD64Instr_SseLdSt(
False/*store*/, 8, arg2first ? arg1 : arg2, m8_rsp));
- addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
/* do it */
/* XXXROUNDINGFIXME */
}
/* save result */
- addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
return dst;
}
Int nNeeded = e->Iex.Binop.op==Iop_TanF64 ? 2 : 1;
addInstr(env, AMD64Instr_SseLdSt(False/*store*/, 8, arg, m8_rsp));
addInstr(env, AMD64Instr_A87Free(nNeeded));
- addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, True/*push*/, 8));
/* XXXROUNDINGFIXME */
/* set roundingmode here */
addInstr(env, AMD64Instr_A87FpOp(fpop));
if (e->Iex.Binop.op==Iop_TanF64) {
/* get rid of the extra 1.0 that fptan pushes */
- addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
}
- addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/));
+ addInstr(env, AMD64Instr_A87PushPop(m8_rsp, False/*pop*/, 8));
addInstr(env, AMD64Instr_SseLdSt(True/*load*/, 8, dst, m8_rsp));
return dst;
}
case Iop_F64toF32: vex_printf("F64toF32"); return;
case Iop_RoundF64toInt: vex_printf("RoundF64toInt"); return;
+ case Iop_RoundF32toInt: vex_printf("RoundF32toInt"); return;
case Iop_RoundF64toF32: vex_printf("RoundF64toF32"); return;
case Iop_ReinterpF64asI64: vex_printf("ReinterpF64asI64"); return;
BINARY(ity_RMode,Ity_F64, Ity_F64);
case Iop_SqrtF32:
+ case Iop_RoundF32toInt:
BINARY(ity_RMode,Ity_F32, Ity_F32);
case Iop_CmpF64:
Iop_2xm1F64, /* (2^arg - 1.0) */
Iop_RoundF64toInt, /* F64 value to nearest integral value (still
as F64) */
+ Iop_RoundF32toInt, /* F32 value to nearest integral value (still
+ as F32) */
/* --- guest ppc32/64 specifics, not mandated by 754. --- */