From: Julian Seward Date: Mon, 14 Feb 2011 13:48:03 +0000 (+0000) Subject: Merge from trunk, r2084 (Implement rex.W/FXSAVE and also both variants X-Git-Tag: svn/VALGRIND_3_6_1^2~5 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b79c72ed2af1462b40b2aebcce3a6096588dca27;p=thirdparty%2Fvalgrind.git Merge from trunk, r2084 (Implement rex.W/FXSAVE and also both variants of FXRSTOR.) git-svn-id: svn://svn.valgrind.org/vex/branches/VEX_3_6_BRANCH@2098 --- diff --git a/VEX/priv/guest_amd64_defs.h b/VEX/priv/guest_amd64_defs.h index 74ce1e6153..33450c3a2d 100644 --- a/VEX/priv/guest_amd64_defs.h +++ b/VEX/priv/guest_amd64_defs.h @@ -154,7 +154,8 @@ extern void amd64g_dirtyhelper_CPUID_sse42_and_cx16 ( VexGuestAMD64State* st ); extern void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* ); -extern void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State*, HWord ); +extern void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State*, HWord ); +extern VexEmWarn amd64g_dirtyhelper_FXRSTOR ( VexGuestAMD64State*, HWord ); extern ULong amd64g_dirtyhelper_RDTSC ( void ); diff --git a/VEX/priv/guest_amd64_helpers.c b/VEX/priv/guest_amd64_helpers.c index c4917fda7a..f626f0a8b1 100644 --- a/VEX/priv/guest_amd64_helpers.c +++ b/VEX/priv/guest_amd64_helpers.c @@ -1454,6 +1454,68 @@ ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl ) } +/* This is used to implement both 'frstor' and 'fldenv'. The latter + appears to differ from the former only in that the 8 FP registers + themselves are not transferred into the guest state. */ +static +VexEmWarn do_put_x87 ( Bool moveRegs, + /*IN*/UChar* x87_state, + /*OUT*/VexGuestAMD64State* vex_state ) +{ + Int stno, preg; + UInt tag; + ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); + UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); + Fpu_State* x87 = (Fpu_State*)x87_state; + UInt ftop = (x87->env[FP_ENV_STAT] >> 11) & 7; + UInt tagw = x87->env[FP_ENV_TAG]; + UInt fpucw = x87->env[FP_ENV_CTRL]; + UInt c3210 = x87->env[FP_ENV_STAT] & 0x4700; + VexEmWarn ew; + UInt fpround; + ULong pair; + + /* Copy registers and tags */ + for (stno = 0; stno < 8; stno++) { + preg = (stno + ftop) & 7; + tag = (tagw >> (2*preg)) & 3; + if (tag == 3) { + /* register is empty */ + /* hmm, if it's empty, does it still get written? Probably + safer to say it does. If we don't, memcheck could get out + of sync, in that it thinks all FP registers are defined by + this helper, but in reality some have not been updated. */ + if (moveRegs) + vexRegs[preg] = 0; /* IEEE754 64-bit zero */ + vexTags[preg] = 0; + } else { + /* register is non-empty */ + if (moveRegs) + convert_f80le_to_f64le( &x87->reg[10*stno], + (UChar*)&vexRegs[preg] ); + vexTags[preg] = 1; + } + } + + /* stack pointer */ + vex_state->guest_FTOP = ftop; + + /* status word */ + vex_state->guest_FC3210 = c3210; + + /* handle the control word, setting FPROUND and detecting any + emulation warnings. */ + pair = amd64g_check_fldcw ( (ULong)fpucw ); + fpround = (UInt)pair; + ew = (VexEmWarn)(pair >> 32); + + vex_state->guest_FPROUND = fpround & 3; + + /* emulation warnings --> caller */ + return ew; +} + + /* Create an x87 FPU state from the guest state, as close as we can approximate it. */ static @@ -1610,6 +1672,94 @@ void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr ) } +/* CALLED FROM GENERATED CODE */ +/* DIRTY HELPER (writes guest state, reads guest mem) */ +VexEmWarn amd64g_dirtyhelper_FXRSTOR ( VexGuestAMD64State* gst, HWord addr ) +{ + Fpu_State tmp; + VexEmWarn warnX87 = EmWarn_NONE; + VexEmWarn warnXMM = EmWarn_NONE; + UShort* addrS = (UShort*)addr; + UChar* addrC = (UChar*)addr; + U128* xmm = (U128*)(addr + 160); + UShort fp_tags; + Int r, stno, i; + + /* Restore %xmm0 .. %xmm15. If the host is big-endian, these need + to be byte-swapped. */ + vassert(host_is_little_endian()); + +# define COPY_U128(_dst,_src) \ + do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ + _dst[2] = _src[2]; _dst[3] = _src[3]; } \ + while (0) + + COPY_U128( gst->guest_XMM0, xmm[0] ); + COPY_U128( gst->guest_XMM1, xmm[1] ); + COPY_U128( gst->guest_XMM2, xmm[2] ); + COPY_U128( gst->guest_XMM3, xmm[3] ); + COPY_U128( gst->guest_XMM4, xmm[4] ); + COPY_U128( gst->guest_XMM5, xmm[5] ); + COPY_U128( gst->guest_XMM6, xmm[6] ); + COPY_U128( gst->guest_XMM7, xmm[7] ); + COPY_U128( gst->guest_XMM8, xmm[8] ); + COPY_U128( gst->guest_XMM9, xmm[9] ); + COPY_U128( gst->guest_XMM10, xmm[10] ); + COPY_U128( gst->guest_XMM11, xmm[11] ); + COPY_U128( gst->guest_XMM12, xmm[12] ); + COPY_U128( gst->guest_XMM13, xmm[13] ); + COPY_U128( gst->guest_XMM14, xmm[14] ); + COPY_U128( gst->guest_XMM15, xmm[15] ); + +# undef COPY_U128 + + /* Copy the x87 registers out of the image, into a temporary + Fpu_State struct. */ + for (i = 0; i < 14; i++) tmp.env[i] = 0; + for (i = 0; i < 80; i++) tmp.reg[i] = 0; + /* fill in tmp.reg[0..7] */ + for (stno = 0; stno < 8; stno++) { + UShort* dstS = (UShort*)(&tmp.reg[10*stno]); + UShort* srcS = (UShort*)(&addrS[16 + 8*stno]); + dstS[0] = srcS[0]; + dstS[1] = srcS[1]; + dstS[2] = srcS[2]; + dstS[3] = srcS[3]; + dstS[4] = srcS[4]; + } + /* fill in tmp.env[0..13] */ + tmp.env[FP_ENV_CTRL] = addrS[0]; /* FCW: fpu control word */ + tmp.env[FP_ENV_STAT] = addrS[1]; /* FCW: fpu status word */ + + fp_tags = 0; + for (r = 0; r < 8; r++) { + if (addrC[4] & (1<> 32); + + gst->guest_SSEROUND = w64 & 0xFFFFFFFFULL; + } + + /* Prefer an X87 emwarn over an XMM one, if both exist. */ + if (warnX87 != EmWarn_NONE) + return warnX87; + else + return warnXMM; +} + + /* DIRTY HELPER (writes guest state) */ /* Initialise the x87 FPU state as per 'finit'. */ void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst ) diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c index 442acd3ec5..489ada2a9e 100644 --- a/VEX/priv/guest_amd64_toIR.c +++ b/VEX/priv/guest_amd64_toIR.c @@ -9193,26 +9193,27 @@ DisResult disInstr_AMD64_WRK ( thusly placed in guest-x86/toIR.c. */ /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory. - Note that REX.W 0F AE /0 writes a slightly different format and - we don't handle that here. */ - if (haveNo66noF2noF3(pfx) && sz == 4 + Note that the presence or absence of REX.W slightly affects the + written format: whether the saved FPU IP and DP pointers are 64 + or 32 bits. But the helper function we call simply writes zero + bits in the relevant fields (which are 64 bits regardless of + what REX.W is) and so it's good enough (iow, equally broken) in + both cases. */ + if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) && insn[0] == 0x0F && insn[1] == 0xAE && !epartIsReg(insn[2]) && gregOfRexRM(pfx,insn[2]) == 0) { IRDirty* d; modrm = getUChar(delta+2); - vassert(sz == 4); vassert(!epartIsReg(modrm)); - /* REX.W must not be set. That should be assured us by sz == 4 - above. */ - vassert(!(pfx & PFX_REXW)); addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); delta += 2+alen; + gen_SEGV_if_not_16_aligned(addr); - DIP("fxsave %s\n", dis_buf); + DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf); /* Uses dirty helper: - void amd64g_do_FXSAVE ( VexGuestAMD64State*, UInt ) */ + void amd64g_do_FXSAVE ( VexGuestAMD64State*, ULong ) */ d = unsafeIRDirty_0_N ( 0/*regparms*/, "amd64g_dirtyhelper_FXSAVE", @@ -9268,6 +9269,82 @@ DisResult disInstr_AMD64_WRK ( goto decode_success; } + /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory. + As with FXSAVE above we ignore the value of REX.W since we're + not bothering with the FPU DP and IP fields. */ + if (haveNo66noF2noF3(pfx) && (sz == 4 || sz == 8) + && insn[0] == 0x0F && insn[1] == 0xAE + && !epartIsReg(insn[2]) && gregOfRexRM(pfx,insn[2]) == 1) { + IRDirty* d; + modrm = getUChar(delta+2); + vassert(!epartIsReg(modrm)); + + addr = disAMode ( &alen, vbi, pfx, delta+2, dis_buf, 0 ); + delta += 2+alen; + gen_SEGV_if_not_16_aligned(addr); + + DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf); + + /* Uses dirty helper: + VexEmWarn amd64g_do_FXRSTOR ( VexGuestAMD64State*, ULong ) + NOTE: + the VexEmWarn value is simply ignored + */ + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "amd64g_dirtyhelper_FXRSTOR", + &amd64g_dirtyhelper_FXRSTOR, + mkIRExprVec_1( mkexpr(addr) ) + ); + d->needsBBP = True; + + /* declare we're reading memory */ + d->mFx = Ifx_Read; + d->mAddr = mkexpr(addr); + d->mSize = 512; + + /* declare we're writing guest state */ + d->nFxState = 7; + + d->fxState[0].fx = Ifx_Write; + d->fxState[0].offset = OFFB_FTOP; + d->fxState[0].size = sizeof(UInt); + + d->fxState[1].fx = Ifx_Write; + d->fxState[1].offset = OFFB_FPREGS; + d->fxState[1].size = 8 * sizeof(ULong); + + d->fxState[2].fx = Ifx_Write; + d->fxState[2].offset = OFFB_FPTAGS; + d->fxState[2].size = 8 * sizeof(UChar); + + d->fxState[3].fx = Ifx_Write; + d->fxState[3].offset = OFFB_FPROUND; + d->fxState[3].size = sizeof(ULong); + + d->fxState[4].fx = Ifx_Write; + d->fxState[4].offset = OFFB_FC3210; + d->fxState[4].size = sizeof(ULong); + + d->fxState[5].fx = Ifx_Write; + d->fxState[5].offset = OFFB_XMM0; + d->fxState[5].size = 16 * sizeof(U128); + + d->fxState[6].fx = Ifx_Write; + d->fxState[6].offset = OFFB_SSEROUND; + d->fxState[6].size = sizeof(ULong); + + /* Be paranoid ... this assertion tries to ensure the 16 %xmm + images are packed back-to-back. If not, the value of + d->fxState[5].size is wrong. */ + vassert(16 == sizeof(U128)); + vassert(OFFB_XMM15 == (OFFB_XMM0 + 15 * 16)); + + stmt( IRStmt_Dirty(d) ); + + goto decode_success; + } + /* ------ SSE decoder main ------ */ /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */