From: Julian Seward <jseward@acm.org>
Date: Sun, 7 Sep 2014 23:23:17 +0000 (+0000)
Subject: Change how FXSAVE and FXRSTOR are done, so as to avoid pushing the XMM
X-Git-Tag: svn/VALGRIND_3_10_1^2~23
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6cf07be0c74e690cad7328f493a444d06ee74964;p=thirdparty%2Fvalgrind.git

Change how FXSAVE and FXRSTOR are done, so as to avoid pushing the XMM
register contents themselves through the helper functions.  This
avoids the false positives reported in #291310.


git-svn-id: svn://svn.valgrind.org/vex/trunk@2949
---

diff --git a/VEX/priv/guest_amd64_defs.h b/VEX/priv/guest_amd64_defs.h
index 008638e7ef..c321127065 100644
--- a/VEX/priv/guest_amd64_defs.h
+++ b/VEX/priv/guest_amd64_defs.h
@@ -170,8 +170,10 @@ extern void  amd64g_dirtyhelper_CPUID_avx_and_cx16 ( VexGuestAMD64State* st );
 
 extern void  amd64g_dirtyhelper_FINIT ( VexGuestAMD64State* );
 
-extern void      amd64g_dirtyhelper_FXSAVE  ( VexGuestAMD64State*, HWord );
-extern VexEmNote amd64g_dirtyhelper_FXRSTOR ( VexGuestAMD64State*, HWord );
+extern void      amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM
+                    ( VexGuestAMD64State*, HWord );
+extern VexEmNote amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM
+                    ( VexGuestAMD64State*, HWord );
 
 extern ULong amd64g_dirtyhelper_RDTSC ( void );
 extern void  amd64g_dirtyhelper_RDTSCP ( VexGuestAMD64State* st );
diff --git a/VEX/priv/guest_amd64_helpers.c b/VEX/priv/guest_amd64_helpers.c
index 6ca67e9f34..4ed9fddba6 100644
--- a/VEX/priv/guest_amd64_helpers.c
+++ b/VEX/priv/guest_amd64_helpers.c
@@ -1723,7 +1723,8 @@ void do_get_x87 ( /*IN*/VexGuestAMD64State* vex_state,
 /* CALLED FROM GENERATED CODE */
 /* DIRTY HELPER (reads guest state, writes guest mem) */
 /* NOTE: only handles 32-bit format (no REX.W on the insn) */
-void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr )
+void amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM ( VexGuestAMD64State* gst,
+                                                HWord addr )
 {
    /* Derived from values obtained from
       vendor_id       : AuthenticAMD
@@ -1738,7 +1739,6 @@ void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr )
    Fpu_State tmp;
    UShort*   addrS = (UShort*)addr;
    UChar*    addrC = (UChar*)addr;
-   U128*     xmm   = (U128*)(addr + 160);
    UInt      mxcsr;
    UShort    fp_tags;
    UInt      summary_tags;
@@ -1803,76 +1803,30 @@ void amd64g_dirtyhelper_FXSAVE ( VexGuestAMD64State* gst, HWord addr )
    }
 
    /* That's the first 160 bytes of the image done.  Now only %xmm0
-      .. %xmm15 remain to be copied.  If the host is big-endian, these
-      need to be byte-swapped. */
-   vassert(host_is_little_endian());
-
-#  define COPY_U128(_dst,_src)                       \
-      do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
-           _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
-      while (0)
-
-   COPY_U128( xmm[0],  gst->guest_YMM0 );
-   COPY_U128( xmm[1],  gst->guest_YMM1 );
-   COPY_U128( xmm[2],  gst->guest_YMM2 );
-   COPY_U128( xmm[3],  gst->guest_YMM3 );
-   COPY_U128( xmm[4],  gst->guest_YMM4 );
-   COPY_U128( xmm[5],  gst->guest_YMM5 );
-   COPY_U128( xmm[6],  gst->guest_YMM6 );
-   COPY_U128( xmm[7],  gst->guest_YMM7 );
-   COPY_U128( xmm[8],  gst->guest_YMM8 );
-   COPY_U128( xmm[9],  gst->guest_YMM9 );
-   COPY_U128( xmm[10], gst->guest_YMM10 );
-   COPY_U128( xmm[11], gst->guest_YMM11 );
-   COPY_U128( xmm[12], gst->guest_YMM12 );
-   COPY_U128( xmm[13], gst->guest_YMM13 );
-   COPY_U128( xmm[14], gst->guest_YMM14 );
-   COPY_U128( xmm[15], gst->guest_YMM15 );
-
-#  undef COPY_U128
+      .. %xmm15 remain to be copied, and we let the generated IR do
+      that, so as to make Memcheck's definedness flow for the non-XMM
+      parts independant from that of the all the other control and
+      status words in the structure.  This avoids the false positives
+      shown in #291310. */
 }
 
 
 /* CALLED FROM GENERATED CODE */
 /* DIRTY HELPER (writes guest state, reads guest mem) */
-VexEmNote amd64g_dirtyhelper_FXRSTOR ( VexGuestAMD64State* gst, HWord addr )
+VexEmNote amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM ( VexGuestAMD64State* gst,
+                                                      HWord addr )
 {
    Fpu_State tmp;
    VexEmNote warnX87 = EmNote_NONE;
    VexEmNote warnXMM = EmNote_NONE;
    UShort*   addrS   = (UShort*)addr;
    UChar*    addrC   = (UChar*)addr;
-   U128*     xmm     = (U128*)(addr + 160);
    UShort    fp_tags;
    Int       r, stno, i;
 
-   /* Restore %xmm0 .. %xmm15.  If the host is big-endian, these need
-      to be byte-swapped. */
-   vassert(host_is_little_endian());
-
-#  define COPY_U128(_dst,_src)                       \
-      do { _dst[0] = _src[0]; _dst[1] = _src[1];     \
-           _dst[2] = _src[2]; _dst[3] = _src[3]; }   \
-      while (0)
-
-   COPY_U128( gst->guest_YMM0, xmm[0] );
-   COPY_U128( gst->guest_YMM1, xmm[1] );
-   COPY_U128( gst->guest_YMM2, xmm[2] );
-   COPY_U128( gst->guest_YMM3, xmm[3] );
-   COPY_U128( gst->guest_YMM4, xmm[4] );
-   COPY_U128( gst->guest_YMM5, xmm[5] );
-   COPY_U128( gst->guest_YMM6, xmm[6] );
-   COPY_U128( gst->guest_YMM7, xmm[7] );
-   COPY_U128( gst->guest_YMM8, xmm[8] );
-   COPY_U128( gst->guest_YMM9, xmm[9] );
-   COPY_U128( gst->guest_YMM10, xmm[10] );
-   COPY_U128( gst->guest_YMM11, xmm[11] );
-   COPY_U128( gst->guest_YMM12, xmm[12] );
-   COPY_U128( gst->guest_YMM13, xmm[13] );
-   COPY_U128( gst->guest_YMM14, xmm[14] );
-   COPY_U128( gst->guest_YMM15, xmm[15] );
-
-#  undef COPY_U128
+   /* Don't restore %xmm0 .. %xmm15, for the same reasons that
+      amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM doesn't save them.  See
+      comment in that function for details. */
 
    /* Copy the x87 registers out of the image, into a temporary
       Fpu_State struct. */
diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c
index 78df883773..0f85ecab13 100644
--- a/VEX/priv/guest_amd64_toIR.c
+++ b/VEX/priv/guest_amd64_toIR.c
@@ -13575,11 +13575,12 @@ Long dis_ESC_0F__SSE2 ( Bool* decode_OK,
          DIP("%sfxsave %s\n", sz==8 ? "rex64/" : "", dis_buf);
 
          /* Uses dirty helper: 
-               void amd64g_do_FXSAVE ( VexGuestAMD64State*, ULong ) */
+              void amd64g_do_FXSAVE_ALL_EXCEPT_XMM ( VexGuestAMD64State*,
+                                                     ULong ) */
          d = unsafeIRDirty_0_N ( 
                 0/*regparms*/, 
-                "amd64g_dirtyhelper_FXSAVE", 
-                &amd64g_dirtyhelper_FXSAVE,
+                "amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM",
+                &amd64g_dirtyhelper_FXSAVE_ALL_EXCEPT_XMM,
                 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
              );
 
@@ -13589,7 +13590,7 @@ Long dis_ESC_0F__SSE2 ( Bool* decode_OK,
          d->mSize = 464; /* according to recent Intel docs */
 
          /* declare we're reading guest state */
-         d->nFxState = 7;
+         d->nFxState = 6;
          vex_bzero(&d->fxState, sizeof(d->fxState));
 
          d->fxState[0].fx     = Ifx_Read;
@@ -13613,24 +13614,25 @@ Long dis_ESC_0F__SSE2 ( Bool* decode_OK,
          d->fxState[4].size   = sizeof(ULong);
 
          d->fxState[5].fx     = Ifx_Read;
-         d->fxState[5].offset = OFFB_YMM0;
-         d->fxState[5].size   = sizeof(U128);
-         /* plus 15 more of the above, spaced out in YMM sized steps */
-         d->fxState[5].nRepeats  = 15; 
-         d->fxState[5].repeatLen = sizeof(U256);
-
-         d->fxState[6].fx     = Ifx_Read;
-         d->fxState[6].offset = OFFB_SSEROUND;
-         d->fxState[6].size   = sizeof(ULong);
-
-         /* Be paranoid ... this assertion tries to ensure the 16 %ymm
-            images are packed back-to-back.  If not, the settings for
-            d->fxState[5] are wrong. */
-         vassert(32 == sizeof(U256));
-         vassert(OFFB_YMM15 == (OFFB_YMM0 + 15 * 32));
-
+         d->fxState[5].offset = OFFB_SSEROUND;
+         d->fxState[5].size   = sizeof(ULong);
+
+         /* Call the helper.  This creates all parts of the in-memory
+            image except for the XMM[0..15] array, which we do
+            separately, in order that any undefinedness in the XMM
+            registers is tracked separately by Memcheck and does not
+            "infect" the in-memory shadow for the other parts of the
+            image (FPTOP, FPREGS, FPTAGS, FPROUND, FC3210,
+            SSEROUND). */
          stmt( IRStmt_Dirty(d) );
 
+         /* And now the XMMs themselves. */
+         UInt xmm;
+         for (xmm = 0; xmm < 16; xmm++) {
+            storeLE( binop(Iop_Add64, mkexpr(addr), mkU64(160 + xmm * 16)),
+                     getXMMReg(xmm) );
+         }
+
          goto decode_success;
       }
       /* 0F AE /1 = FXRSTOR m512 -- read x87 and SSE state from memory.
@@ -13650,14 +13652,15 @@ Long dis_ESC_0F__SSE2 ( Bool* decode_OK,
          DIP("%sfxrstor %s\n", sz==8 ? "rex64/" : "", dis_buf);
 
          /* Uses dirty helper: 
-               VexEmNote amd64g_do_FXRSTOR ( VexGuestAMD64State*, ULong )
+              VexEmNote amd64g_do_FXRSTOR_ALL_EXCEPT_XMM ( VexGuestAMD64State*,
+                                                           ULong )
             NOTE:
-               the VexEmNote value is simply ignored
+              the VexEmNote value is simply ignored
          */
          d = unsafeIRDirty_0_N ( 
                 0/*regparms*/, 
-                "amd64g_dirtyhelper_FXRSTOR", 
-                &amd64g_dirtyhelper_FXRSTOR,
+                "amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM", 
+                &amd64g_dirtyhelper_FXRSTOR_ALL_EXCEPT_XMM,
                 mkIRExprVec_2( IRExpr_BBPTR(), mkexpr(addr) )
              );
 
@@ -13667,7 +13670,7 @@ Long dis_ESC_0F__SSE2 ( Bool* decode_OK,
          d->mSize = 464; /* according to recent Intel docs */
 
          /* declare we're writing guest state */
-         d->nFxState = 7;
+         d->nFxState = 6;
          vex_bzero(&d->fxState, sizeof(d->fxState));
 
          d->fxState[0].fx     = Ifx_Write;
@@ -13691,24 +13694,26 @@ Long dis_ESC_0F__SSE2 ( Bool* decode_OK,
          d->fxState[4].size   = sizeof(ULong);
 
          d->fxState[5].fx     = Ifx_Write;
-         d->fxState[5].offset = OFFB_YMM0;
-         d->fxState[5].size   = sizeof(U128);
-         /* plus 15 more of the above, spaced out in YMM sized steps */
-         d->fxState[5].nRepeats  = 15; 
-         d->fxState[5].repeatLen = sizeof(U256);
-
-         d->fxState[6].fx     = Ifx_Write;
-         d->fxState[6].offset = OFFB_SSEROUND;
-         d->fxState[6].size   = sizeof(ULong);
-
-         /* Be paranoid ... this assertion tries to ensure the 16 %ymm
-            images are packed back-to-back.  If not, the settings for
-            d->fxState[5] are wrong. */
-         vassert(32 == sizeof(U256));
-         vassert(OFFB_YMM15 == (OFFB_YMM0 + 15 * 32));
-
+         d->fxState[5].offset = OFFB_SSEROUND;
+         d->fxState[5].size   = sizeof(ULong);
+
+         /* Call the helper.  This reads all parts of the in-memory
+            image except for the XMM[0..15] array, which we do
+            separately, in order that any undefinedness in the XMM
+            registers is tracked separately by Memcheck and does not
+            "infect" the in-guest-state shadow for the other parts of the
+            image (FPTOP, FPREGS, FPTAGS, FPROUND, FC3210,
+            SSEROUND). */
          stmt( IRStmt_Dirty(d) );
 
+         /* And now the XMMs themselves. */
+         UInt xmm;
+         for (xmm = 0; xmm < 16; xmm++) {
+            putXMMReg(xmm, loadLE(Ity_V128,
+                                  binop(Iop_Add64, mkexpr(addr),
+                                                   mkU64(160 + xmm * 16))));
+         }
+
          goto decode_success;
       }
       break;