From: Julian Seward Date: Sun, 7 Sep 2014 23:23:17 +0000 (+0000) Subject: Change how FXSAVE and FXRSTOR are done, so as to avoid pushing the XMM X-Git-Tag: svn/VALGRIND_3_10_1^2~23 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6cf07be0c74e690cad7328f493a444d06ee74964;p=thirdparty%2Fvalgrind.git Change how FXSAVE and FXRSTOR are done, so as to avoid pushing the XMM register contents themselves through the helper functions. This avoids the false positives reported in #291310. git-svn-id: svn://svn.valgrind.org/vex/trunk@2949 --- diff --git a/VEX/priv/guest_amd64_defs.h b/VEX/priv/guest_amd64_defs.h index 008638e7ef..c321127065 100644 --- a/VEX/priv/guest_amd64_defs.h +++ b/VEX/priv/guest_amd64_defs.h @@ -170,8 +170,10 @@ extern void amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st ); extern void amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* ); -extern void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State*, HWord ); -extern VexEmNote amd64g_dirtyhelper_FXRSTOR ( VexGuestAMD64State*, HWord ); +extern void amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM + ( VexGuestAMD64State*, HWord ); +extern VexEmNote amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM + ( VexGuestAMD64State*, HWord ); extern ULong amd64g_dirtyhelper_RDTSC ( void ); extern void amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* st ); diff --git a/VEX/priv/guest_amd64_helpers.c b/VEX/priv/guest_amd64_helpers.c index 6ca67e9f34..4ed9fddba6 100644 --- a/VEX/priv/guest_amd64_helpers.c +++ b/VEX/priv/guest_amd64_helpers.c @@ -1723,7 +1723,8 @@ void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state, /* CALLED FROM GENERATED CODE */ /* DIRTY HELPER (reads guest state, writes guest mem) */ /* NOTE: only handles 32-bit format (no REX.W on the insn) */ -void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr ) +void amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM ( VexGuestAMD64State* gst, + HWord addr ) { /* Derived from values obtained from vendor_id : AuthenticAMD @@ -1738,7 +1739,6 @@ void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr ) Fpu_State tmp; UShort* addrS = (UShort*)addr; UChar* addrC = (UChar*)addr; - U128* xmm = (U128*)(addr + 160); UInt mxcsr; UShort fp_tags; UInt summary_tags; @@ -1803,76 +1803,30 @@ void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr ) } /* That's the first 160 bytes of the image done. Now only %xmm0 - .. %xmm15 remain to be copied. If the host is big-endian, these - need to be byte-swapped. */ - vassert(host_is_little_endian()); - -# define COPY_U128(_dst,_src) \ - do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ - _dst[2] = _src[2]; _dst[3] = _src[3]; } \ - while (0) - - COPY_U128( xmm[0], gst->guest_YMM0 ); - COPY_U128( xmm[1], gst->guest_YMM1 ); - COPY_U128( xmm[2], gst->guest_YMM2 ); - COPY_U128( xmm[3], gst->guest_YMM3 ); - COPY_U128( xmm[4], gst->guest_YMM4 ); - COPY_U128( xmm[5], gst->guest_YMM5 ); - COPY_U128( xmm[6], gst->guest_YMM6 ); - COPY_U128( xmm[7], gst->guest_YMM7 ); - COPY_U128( xmm[8], gst->guest_YMM8 ); - COPY_U128( xmm[9], gst->guest_YMM9 ); - COPY_U128( xmm[10], gst->guest_YMM10 ); - COPY_U128( xmm[11], gst->guest_YMM11 ); - COPY_U128( xmm[12], gst->guest_YMM12 ); - COPY_U128( xmm[13], gst->guest_YMM13 ); - COPY_U128( xmm[14], gst->guest_YMM14 ); - COPY_U128( xmm[15], gst->guest_YMM15 ); - -# undef COPY_U128 + .. %xmm15 remain to be copied, and we let the generated IR do + that, so as to make Memcheck's definedness flow for the non-XMM + parts independant from that of the all the other control and + status words in the structure. This avoids the false positives + shown in #291310. */ } /* CALLED FROM GENERATED CODE */ /* DIRTY HELPER (writes guest state, reads guest mem) */ -VexEmNote amd64g_dirtyhelper_FXRSTOR ( VexGuestAMD64State* gst, HWord addr ) +VexEmNote amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM ( VexGuestAMD64State* gst, + HWord addr ) { Fpu_State tmp; VexEmNote warnX87 = EmNote_NONE; VexEmNote warnXMM = EmNote_NONE; UShort* addrS = (UShort*)addr; UChar* addrC = (UChar*)addr; - U128* xmm = (U128*)(addr + 160); UShort fp_tags; Int r, stno, i; - /* Restore %xmm0 .. %xmm15. If the host is big-endian, these need - to be byte-swapped. */ - vassert(host_is_little_endian()); - -# define COPY_U128(_dst,_src) \ - do { _dst[0] = _src[0]; _dst[1] = _src[1]; \ - _dst[2] = _src[2]; _dst[3] = _src[3]; } \ - while (0) - - COPY_U128( gst->guest_YMM0, xmm[0] ); - COPY_U128( gst->guest_YMM1, xmm[1] ); - COPY_U128( gst->guest_YMM2, xmm[2] ); - COPY_U128( gst->guest_YMM3, xmm[3] ); - COPY_U128( gst->guest_YMM4, xmm[4] ); - COPY_U128( gst->guest_YMM5, xmm[5] ); - COPY_U128( gst->guest_YMM6, xmm[6] ); - COPY_U128( gst->guest_YMM7, xmm[7] ); - COPY_U128( gst->guest_YMM8, xmm[8] ); - COPY_U128( gst->guest_YMM9, xmm[9] ); - COPY_U128( gst->guest_YMM10, xmm[10] ); - COPY_U128( gst->guest_YMM11, xmm[11] ); - COPY_U128( gst->guest_YMM12, xmm[12] ); - COPY_U128( gst->guest_YMM13, xmm[13] ); - COPY_U128( gst->guest_YMM14, xmm[14] ); - COPY_U128( gst->guest_YMM15, xmm[15] ); - -# undef COPY_U128 + /* Don't restore %xmm0 .. %xmm15, for the same reasons that + amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM doesn't save them. See + comment in that function for details. */ /* Copy the x87 registers out of the image, into a temporary Fpu_State struct. */ diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c index 78df883773..0f85ecab13 100644 --- a/VEX/priv/guest_amd64_toIR.c +++ b/VEX/priv/guest_amd64_toIR.c @@ -13575,11 +13575,12 @@ Long dis_ESC_0F__SSE2 ( Bool* decode_OK, DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf); /* Uses dirty helper: - void amd64g_do_FXSAVE ( VexGuestAMD64State*, ULong ) */ + void amd64g_do_FXSAVE_ALL_EXCEPT_XMM ( VexGuestAMD64State*, + ULong ) */ d = unsafeIRDirty_0_N ( 0/*regparms*/, - "amd64g_dirtyhelper_FXSAVE", - &amd64g_dirtyhelper_FXSAVE, + "amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM", + &amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM, mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) ); @@ -13589,7 +13590,7 @@ Long dis_ESC_0F__SSE2 ( Bool* decode_OK, d->mSize = 464; /* according to recent Intel docs */ /* declare we're reading guest state */ - d->nFxState = 7; + d->nFxState = 6; vex_bzero(&d->fxState, sizeof(d->fxState)); d->fxState[0].fx = Ifx_Read; @@ -13613,24 +13614,25 @@ Long dis_ESC_0F__SSE2 ( Bool* decode_OK, d->fxState[4].size = sizeof(ULong); d->fxState[5].fx = Ifx_Read; - d->fxState[5].offset = OFFB_YMM0; - d->fxState[5].size = sizeof(U128); - /* plus 15 more of the above, spaced out in YMM sized steps */ - d->fxState[5].nRepeats = 15; - d->fxState[5].repeatLen = sizeof(U256); - - d->fxState[6].fx = Ifx_Read; - d->fxState[6].offset = OFFB_SSEROUND; - d->fxState[6].size = sizeof(ULong); - - /* Be paranoid ... this assertion tries to ensure the 16 %ymm - images are packed back-to-back. If not, the settings for - d->fxState[5] are wrong. */ - vassert(32 == sizeof(U256)); - vassert(OFFB_YMM15 == (OFFB_YMM0 + 15 * 32)); - + d->fxState[5].offset = OFFB_SSEROUND; + d->fxState[5].size = sizeof(ULong); + + /* Call the helper. This creates all parts of the in-memory + image except for the XMM[0..15] array, which we do + separately, in order that any undefinedness in the XMM + registers is tracked separately by Memcheck and does not + "infect" the in-memory shadow for the other parts of the + image (FPTOP, FPREGS, FPTAGS, FPROUND, FC3210, + SSEROUND). */ stmt( IRStmt_Dirty(d) ); + /* And now the XMMs themselves. */ + UInt xmm; + for (xmm = 0; xmm < 16; xmm++) { + storeLE( binop(Iop_Add64, mkexpr(addr), mkU64(160 + xmm * 16)), + getXMMReg(xmm) ); + } + goto decode_success; } /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory. @@ -13650,14 +13652,15 @@ Long dis_ESC_0F__SSE2 ( Bool* decode_OK, DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf); /* Uses dirty helper: - VexEmNote amd64g_do_FXRSTOR ( VexGuestAMD64State*, ULong ) + VexEmNote amd64g_do_FXRSTOR_ALL_EXCEPT_XMM ( VexGuestAMD64State*, + ULong ) NOTE: - the VexEmNote value is simply ignored + the VexEmNote value is simply ignored */ d = unsafeIRDirty_0_N ( 0/*regparms*/, - "amd64g_dirtyhelper_FXRSTOR", - &amd64g_dirtyhelper_FXRSTOR, + "amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM", + &amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM, mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) ) ); @@ -13667,7 +13670,7 @@ Long dis_ESC_0F__SSE2 ( Bool* decode_OK, d->mSize = 464; /* according to recent Intel docs */ /* declare we're writing guest state */ - d->nFxState = 7; + d->nFxState = 6; vex_bzero(&d->fxState, sizeof(d->fxState)); d->fxState[0].fx = Ifx_Write; @@ -13691,24 +13694,26 @@ Long dis_ESC_0F__SSE2 ( Bool* decode_OK, d->fxState[4].size = sizeof(ULong); d->fxState[5].fx = Ifx_Write; - d->fxState[5].offset = OFFB_YMM0; - d->fxState[5].size = sizeof(U128); - /* plus 15 more of the above, spaced out in YMM sized steps */ - d->fxState[5].nRepeats = 15; - d->fxState[5].repeatLen = sizeof(U256); - - d->fxState[6].fx = Ifx_Write; - d->fxState[6].offset = OFFB_SSEROUND; - d->fxState[6].size = sizeof(ULong); - - /* Be paranoid ... this assertion tries to ensure the 16 %ymm - images are packed back-to-back. If not, the settings for - d->fxState[5] are wrong. */ - vassert(32 == sizeof(U256)); - vassert(OFFB_YMM15 == (OFFB_YMM0 + 15 * 32)); - + d->fxState[5].offset = OFFB_SSEROUND; + d->fxState[5].size = sizeof(ULong); + + /* Call the helper. This reads all parts of the in-memory + image except for the XMM[0..15] array, which we do + separately, in order that any undefinedness in the XMM + registers is tracked separately by Memcheck and does not + "infect" the in-guest-state shadow for the other parts of the + image (FPTOP, FPREGS, FPTAGS, FPROUND, FC3210, + SSEROUND). */ stmt( IRStmt_Dirty(d) ); + /* And now the XMMs themselves. */ + UInt xmm; + for (xmm = 0; xmm < 16; xmm++) { + putXMMReg(xmm, loadLE(Ity_V128, + binop(Iop_Add64, mkexpr(addr), + mkU64(160 + xmm * 16)))); + } + goto decode_success; } break;