From: Julian Seward Date: Wed, 10 Jan 2007 05:22:03 +0000 (+0000) Subject: Merge r1722 (Implement FXSAVE on amd64.) X-Git-Tag: svn/VALGRIND_3_2_3^2~15 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7cac4dafb3d14b49e9f2bd6b3d99a475cffdbade;p=thirdparty%2Fvalgrind.git Merge r1722 (Implement FXSAVE on amd64.) git-svn-id: svn://svn.valgrind.org/vex/branches/VEX_3_2_BRANCH@1724 --- diff --git a/VEX/priv/guest-amd64/gdefs.h b/VEX/priv/guest-amd64/gdefs.h index fccbb01d49..dd7c957b4b 100644 --- a/VEX/priv/guest-amd64/gdefs.h +++ b/VEX/priv/guest-amd64/gdefs.h @@ -149,6 +149,8 @@ extern void amd64g_dirtyhelper_CPUID ( VexGuestAMD64State* st ); extern void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* ); +extern void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State*, HWord ); + extern ULong amd64g_dirtyhelper_RDTSC ( void ); //extern void amd64g_dirtyhelper_CPUID_sse0 ( VexGuestAMD64State* ); @@ -165,7 +167,6 @@ extern ULong amd64g_dirtyhelper_RDTSC ( void ); //extern VexEmWarn // amd64g_dirtyhelper_FLDENV ( VexGuestAMD64State*, HWord ); -//extern void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State*, HWord ); /*---------------------------------------------------------*/ diff --git a/VEX/priv/guest-amd64/ghelpers.c b/VEX/priv/guest-amd64/ghelpers.c index e27c79276a..bdb5d6c4d3 100644 --- a/VEX/priv/guest-amd64/ghelpers.c +++ b/VEX/priv/guest-amd64/ghelpers.c @@ -1391,6 +1391,162 @@ ULong amd64g_calculate_FXAM ( ULong tag, ULong dbl ) } +/* Create an x87 FPU state from the guest state, as close as + we can approximate it. */ +static +void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state, + /*OUT*/UChar* x87_state ) +{ + Int i, stno, preg; + UInt tagw; + ULong* vexRegs = (ULong*)(&vex_state->guest_FPREG[0]); + UChar* vexTags = (UChar*)(&vex_state->guest_FPTAG[0]); + Fpu_State* x87 = (Fpu_State*)x87_state; + UInt ftop = vex_state->guest_FTOP; + UInt c3210 = vex_state->guest_FC3210; + + for (i = 0; i < 14; i++) + x87->env[i] = 0; + + x87->env[1] = x87->env[3] = x87->env[5] = x87->env[13] = 0xFFFF; + x87->env[FP_ENV_STAT] + = toUShort(((ftop & 7) << 11) | (c3210 & 0x4700)); + x87->env[FP_ENV_CTRL] + = toUShort(amd64g_create_fpucw( vex_state->guest_FPROUND )); + + /* Dump the register stack in ST order. */ + tagw = 0; + for (stno = 0; stno < 8; stno++) { + preg = (stno + ftop) & 7; + if (vexTags[preg] == 0) { + /* register is empty */ + tagw |= (3 << (2*preg)); + convert_f64le_to_f80le( (UChar*)&vexRegs[preg], + &x87->reg[10*stno] ); + } else { + /* register is full. */ + tagw |= (0 << (2*preg)); + convert_f64le_to_f80le( (UChar*)&vexRegs[preg], + &x87->reg[10*stno] ); + } + } + x87->env[FP_ENV_TAG] = toUShort(tagw); +} + + +/* CALLED FROM GENERATED CODE */ +/* DIRTY HELPER (reads guest state, writes guest mem) */ +/* NOTE: only handles 32-bit format (no REX.W on the insn) */ +void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr ) +{ + /* Derived from values obtained from + vendor_id : AuthenticAMD + cpu family : 15 + model : 12 + model name : AMD Athlon(tm) 64 Processor 3200+ + stepping : 0 + cpu MHz : 2200.000 + cache size : 512 KB + */ + /* Somewhat roundabout, but at least it's simple. */ + Fpu_State tmp; + UShort* addrS = (UShort*)addr; + UChar* addrC = (UChar*)addr; + U128* xmm = (U128*)(addr + 160); + UInt mxcsr; + UShort fp_tags; + UInt summary_tags; + Int r, stno; + UShort *srcS, *dstS; + + do_get_x87( gst, (UChar*)&tmp ); + mxcsr = amd64g_create_mxcsr( gst->guest_SSEROUND ); + + /* Now build the proper fxsave image from the x87 image we just + made. */ + + addrS[0] = tmp.env[FP_ENV_CTRL]; /* FCW: fpu control word */ + addrS[1] = tmp.env[FP_ENV_STAT]; /* FCW: fpu status word */ + + /* set addrS[2] in an endian-independent way */ + summary_tags = 0; + fp_tags = tmp.env[FP_ENV_TAG]; + for (r = 0; r < 8; r++) { + if ( ((fp_tags >> (2*r)) & 3) != 3 ) + summary_tags |= (1 << r); + } + addrC[4] = toUChar(summary_tags); /* FTW: tag summary byte */ + addrC[5] = 0; /* pad */ + + /* FOP: faulting fpu opcode. From experimentation, the real CPU + does not write this field. (?!) */ + addrS[3] = 0; /* BOGUS */ + + /* RIP (Last x87 instruction pointer). From experimentation, the + real CPU does not write this field. (?!) */ + addrS[4] = 0; /* BOGUS */ + addrS[5] = 0; /* BOGUS */ + addrS[6] = 0; /* BOGUS */ + addrS[7] = 0; /* BOGUS */ + + /* RDP (Last x87 data pointer). From experimentation, the real CPU + does not write this field. (?!) */ + addrS[8] = 0; /* BOGUS */ + addrS[9] = 0; /* BOGUS */ + addrS[10] = 0; /* BOGUS */ + addrS[11] = 0; /* BOGUS */ + + addrS[12] = toUShort(mxcsr); /* MXCSR */ + addrS[13] = toUShort(mxcsr >> 16); + + addrS[14] = 0xFFFF; /* MXCSR mask (lo16) */ + addrS[15] = 0x0000; /* MXCSR mask (hi16) */ + + /* Copy in the FP registers, in ST order. */ + for (stno = 0; stno < 8; stno++) { + srcS = (UShort*)(&tmp.reg[10*stno]); + dstS = (UShort*)(&addrS[16 + 8*stno]); + dstS[0] = srcS[0]; + dstS[1] = srcS[1]; + dstS[2] = srcS[2]; + dstS[3] = srcS[3]; + dstS[4] = srcS[4]; + dstS[5] = 0; + dstS[6] = 0; + dstS[7] = 0; + } + + /* That's the first 160 bytes of the image done. Now only %xmm0 + .. %xmm15 remain to be copied. If the host is big-endian, these + need to be byte-swapped. */ + vassert(host_is_little_endian()); + +# define COPY_U128(_dst,_src) \ + do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ + _dst[2] = _src[2]; _dst[3] = _src[3]; } \ + while (0) + + COPY_U128( xmm[0], gst->guest_XMM0 ); + COPY_U128( xmm[1], gst->guest_XMM1 ); + COPY_U128( xmm[2], gst->guest_XMM2 ); + COPY_U128( xmm[3], gst->guest_XMM3 ); + COPY_U128( xmm[4], gst->guest_XMM4 ); + COPY_U128( xmm[5], gst->guest_XMM5 ); + COPY_U128( xmm[6], gst->guest_XMM6 ); + COPY_U128( xmm[7], gst->guest_XMM7 ); + COPY_U128( xmm[8], gst->guest_XMM8 ); + COPY_U128( xmm[9], gst->guest_XMM9 ); + COPY_U128( xmm[10], gst->guest_XMM10 ); + COPY_U128( xmm[11], gst->guest_XMM11 ); + COPY_U128( xmm[12], gst->guest_XMM12 ); + COPY_U128( xmm[13], gst->guest_XMM13 ); + COPY_U128( xmm[14], gst->guest_XMM14 ); + COPY_U128( xmm[15], gst->guest_XMM15 ); + +# undef COPY_U128 +} + + /* DIRTY HELPER (writes guest state) */ /* Initialise the x87 FPU state as per 'finit'. */ void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* gst ) diff --git a/VEX/priv/guest-amd64/toIR.c b/VEX/priv/guest-amd64/toIR.c index 1fa73c4cd7..f5440552e5 100644 --- a/VEX/priv/guest-amd64/toIR.c +++ b/VEX/priv/guest-amd64/toIR.c @@ -8415,89 +8415,86 @@ DisResult disInstr_AMD64_WRK ( insn = (UChar*)&guest_code[delta]; -//.. /* Treat fxsave specially. It should be doable even on an SSE0 -//.. (Pentium-II class) CPU. Hence be prepared to handle it on -//.. any subarchitecture variant. -//.. */ -//.. -//.. /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory */ -//.. if (sz == 4 && insn[0] == 0x0F && insn[1] == 0xAE -//.. && !epartIsReg(insn[2]) && gregOfRM(insn[2]) == 0) { -//.. modrm = getUChar(delta+2); -//.. vassert(sz == 4); -//.. vassert(!epartIsReg(modrm)); -//.. -//.. addr = disAMode ( &alen, sorb, delta+2, dis_buf ); -//.. delta += 2+alen; -//.. -//.. DIP("fxsave %s\n", dis_buf); -//.. -//.. /* Uses dirty helper: -//.. void x86g_do_FXSAVE ( VexGuestX86State*, UInt ) */ -//.. IRDirty* d = unsafeIRDirty_0_N ( -//.. 0/*regparms*/, -//.. "x86g_dirtyhelper_FXSAVE", -//.. &x86g_dirtyhelper_FXSAVE, -//.. mkIRExprVec_1( mkexpr(addr) ) -//.. ); -//.. d->needsBBP = True; -//.. -//.. /* declare we're writing memory */ -//.. d->mFx = Ifx_Write; -//.. d->mAddr = mkexpr(addr); -//.. d->mSize = 512; -//.. -//.. /* declare we're reading guest state */ -//.. d->nFxState = 7; -//.. -//.. d->fxState[0].fx = Ifx_Read; -//.. d->fxState[0].offset = OFFB_FTOP; -//.. d->fxState[0].size = sizeof(UInt); -//.. -//.. d->fxState[1].fx = Ifx_Read; -//.. d->fxState[1].offset = OFFB_FPREGS; -//.. d->fxState[1].size = 8 * sizeof(ULong); -//.. -//.. d->fxState[2].fx = Ifx_Read; -//.. d->fxState[2].offset = OFFB_FPTAGS; -//.. d->fxState[2].size = 8 * sizeof(UChar); -//.. -//.. d->fxState[3].fx = Ifx_Read; -//.. d->fxState[3].offset = OFFB_FPROUND; -//.. d->fxState[3].size = sizeof(UInt); -//.. -//.. d->fxState[4].fx = Ifx_Read; -//.. d->fxState[4].offset = OFFB_FC3210; -//.. d->fxState[4].size = sizeof(UInt); -//.. -//.. d->fxState[5].fx = Ifx_Read; -//.. d->fxState[5].offset = OFFB_XMM0; -//.. d->fxState[5].size = 8 * sizeof(U128); -//.. -//.. d->fxState[6].fx = Ifx_Read; -//.. d->fxState[6].offset = OFFB_SSEROUND; -//.. d->fxState[6].size = sizeof(UInt); -//.. -//.. /* Be paranoid ... this assertion tries to ensure the 8 %xmm -//.. images are packed back-to-back. If not, the value of -//.. d->fxState[5].size is wrong. */ -//.. vassert(16 == sizeof(U128)); -//.. vassert(OFFB_XMM7 == (OFFB_XMM0 + 7 * 16)); -//.. -//.. stmt( IRStmt_Dirty(d) ); -//.. -//.. goto decode_success; -//.. } -//.. -//.. /* ------ SSE decoder main ------ */ -//.. -//.. /* Skip parts of the decoder which don't apply given the stated -//.. guest subarchitecture. */ -//.. if (subarch == VexSubArchX86_sse0) -//.. goto after_sse_decoders; -//.. -//.. /* Otherwise we must be doing sse1 or sse2, so we can at least try -//.. for SSE1 here. */ + /* FXSAVE is spuriously at the start here only because it is + thusly placed in guest-x86/toIR.c. */ + + /* 0F AE /0 = FXSAVE m512 -- write x87 and SSE state to memory. + Note that REX.W 0F AE /0 writes a slightly different format and + we don't handle that here. */ + if (haveNo66noF2noF3(pfx) && sz == 4 + && insn[0] == 0x0F && insn[1] == 0xAE + && !epartIsReg(insn[2]) && gregOfRexRM(pfx,insn[2]) == 0) { + IRDirty* d; + modrm = getUChar(delta+2); + vassert(sz == 4); + vassert(!epartIsReg(modrm)); + /* REX.W must not be set. That should be assured us by sz == 4 + above. */ + vassert(!(pfx & PFX_REXW)); + + addr = disAMode ( &alen, pfx, delta+2, dis_buf, 0 ); + delta += 2+alen; + + DIP("fxsave %s\n", dis_buf); + + /* Uses dirty helper: + void amd64g_do_FXSAVE ( VexGuestAMD64State*, UInt ) */ + d = unsafeIRDirty_0_N ( + 0/*regparms*/, + "amd64g_dirtyhelper_FXSAVE", + &amd64g_dirtyhelper_FXSAVE, + mkIRExprVec_1( mkexpr(addr) ) + ); + d->needsBBP = True; + + /* declare we're writing memory */ + d->mFx = Ifx_Write; + d->mAddr = mkexpr(addr); + d->mSize = 512; + + /* declare we're reading guest state */ + d->nFxState = 7; + + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = OFFB_FTOP; + d->fxState[0].size = sizeof(UInt); + + d->fxState[1].fx = Ifx_Read; + d->fxState[1].offset = OFFB_FPREGS; + d->fxState[1].size = 8 * sizeof(ULong); + + d->fxState[2].fx = Ifx_Read; + d->fxState[2].offset = OFFB_FPTAGS; + d->fxState[2].size = 8 * sizeof(UChar); + + d->fxState[3].fx = Ifx_Read; + d->fxState[3].offset = OFFB_FPROUND; + d->fxState[3].size = sizeof(ULong); + + d->fxState[4].fx = Ifx_Read; + d->fxState[4].offset = OFFB_FC3210; + d->fxState[4].size = sizeof(ULong); + + d->fxState[5].fx = Ifx_Read; + d->fxState[5].offset = OFFB_XMM0; + d->fxState[5].size = 16 * sizeof(U128); + + d->fxState[6].fx = Ifx_Read; + d->fxState[6].offset = OFFB_SSEROUND; + d->fxState[6].size = sizeof(ULong); + + /* Be paranoid ... this assertion tries to ensure the 16 %xmm + images are packed back-to-back. If not, the value of + d->fxState[5].size is wrong. */ + vassert(16 == sizeof(U128)); + vassert(OFFB_XMM15 == (OFFB_XMM0 + 15 * 16)); + + stmt( IRStmt_Dirty(d) ); + + goto decode_success; + } + + /* ------ SSE decoder main ------ */ /* 0F 58 = ADDPS -- add 32Fx4 from R/M to R */ if (haveNo66noF2noF3(pfx) && sz == 4