]> git.ipfire.org Git - thirdparty/glibc.git/commitdiff
Preserve SSE registers in runtime relocations on x86-64.
authorUlrich Drepper <drepper@redhat.com>
Wed, 29 Jul 2009 15:33:03 +0000 (08:33 -0700)
committerUlrich Drepper <drepper@redhat.com>
Wed, 29 Jul 2009 15:33:03 +0000 (08:33 -0700)
SSE registers are used for passing parameters and must be preserved
in runtime relocations.  This is inside ld.so enforced through the
tests in tst-xmmymm.sh.  But the malloc routines used after startup
come from libc.so and can be arbitrarily complex.  It's overkill
to save the SSE registers all the time because of that.  These calls
are rare.  Instead we save them on demand.  The new infrastructure
put in place in this patch makes this possible and efficient.

ChangeLog
elf/dl-lookup.c
elf/dl-runtime.c
nptl/ChangeLog
nptl/sysdeps/x86_64/tcb-offsets.sym
nptl/sysdeps/x86_64/tls.h
stdio-common/scanf15.c
stdio-common/scanf17.c
sysdeps/x86_64/dl-trampoline.S
sysdeps/x86_64/tst-xmmymm.sh

index ff34e5f5d554f22f4f5f3772db0e5721a98e6da8..23e6906d06832075f663308686a64d6254315391 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2009-07-29  Ulrich Drepper  <drepper@redhat.com>
+
+       * elf/dl-runtime.c (_dl_fixup): Indicate before _dl_lookup_symbol_x
+       call that registers used in calling conventions need to be preserved.
+       * elf/dl-lookup.c (do_lookup_x): Use RTLD_*_FOREIGN_CALL macros
+       to preserve register content if necessary.
+       * sysdeps/x86_64/dl-trampoline.S (_dl_x86_64_save_sse): New function.
+       (_dl_x86_64_restore_sse): New function.
+       * sysdeps/x86_64/tst-xmmymm.sh: There is now one more function that
+       is allowed to modify xmm/ymm registers.
+
+       * stdio-common/scanf15.c: Undefine _LIBC.  We want to test from an
+       application's perspective.
+       * stdio-common/scanf17.c: Likewise.
+
 2009-07-28  Ulrich Drepper  <drepper@redhat.com>
 
        * csu/libc-tls.c (__libc_setup_tls) [TLS_TCB_AT_TP]: Don't add TCB
index 1d68d67a35e18218ce0cd0be1ef521035b17f502..56724c9b4d72cc83307b90150fd468c5797458d4 100644 (file)
@@ -380,6 +380,10 @@ do_lookup_x (const char *undef_name, uint_fast32_t new_hash,
                  if (size * 3 <= tab->n_elements * 4)
                    {
                      /* Expand the table.  */
+#ifdef RTLD_CHECK_FOREIGN_CALL
+                     /* This must not happen during runtime relocations.  */
+                     assert (!RTLD_CHECK_FOREIGN_CALL);
+#endif
                      size_t newsize = _dl_higher_prime_number (size + 1);
                      struct unique_sym *newentries
                        = calloc (sizeof (struct unique_sym), newsize);
@@ -405,6 +409,11 @@ do_lookup_x (const char *undef_name, uint_fast32_t new_hash,
                }
              else
                {
+#ifdef RTLD_CHECK_FOREIGN_CALL
+                 /* This must not happen during runtime relocations.  */
+                 assert (!RTLD_CHECK_FOREIGN_CALL);
+#endif
+
 #define INITIAL_NUNIQUE_SYM_TABLE 31
                  size = INITIAL_NUNIQUE_SYM_TABLE;
                  entries = calloc (sizeof (struct unique_sym), size);
@@ -600,6 +609,10 @@ add_dependency (struct link_map *undef_map, struct link_map *map, int flags)
          unsigned int max
            = undef_map->l_reldepsmax ? undef_map->l_reldepsmax * 2 : 10;
 
+#ifdef RTLD_PREPARE_FOREIGN_CALL
+         RTLD_PREPARE_FOREIGN_CALL;
+#endif
+
          newp = malloc (sizeof (*newp) + max * sizeof (struct link_map *));
          if (newp == NULL)
            {
index 0eb7d4e3b9759e29f053cfa2f4f91231bd73f738..a52120d1217bbe7f86eb576e1cd9896d879f0524 100644 (file)
@@ -111,6 +111,10 @@ _dl_fixup (
          flags |= DL_LOOKUP_GSCOPE_LOCK;
        }
 
+#ifdef RTLD_ENABLE_FOREIGN_CALL
+      RTLD_ENABLE_FOREIGN_CALL;
+#endif
+
       result = _dl_lookup_symbol_x (strtab + sym->st_name, l, &sym, l->l_scope,
                                    version, ELF_RTYPE_CLASS_PLT, flags, NULL);
 
@@ -118,6 +122,10 @@ _dl_fixup (
       if (!RTLD_SINGLE_THREAD_P)
        THREAD_GSCOPE_RESET_FLAG ();
 
+#ifdef RTLD_FINALIZE_FOREIGN_CALL
+      RTLD_FINALIZE_FOREIGN_CALL;
+#endif
+
       /* Currently result contains the base load address (or link map)
         of the object that defines sym.  Now add in the symbol
         offset.  */
index 8f37da79361d6febc954d22c676b49a6b11e186e..24fd28a0dc0533029ba29afab155f98cc13424dd 100644 (file)
@@ -1,3 +1,11 @@
+2009-07-29  Ulrich Drepper  <drepper@redhat.com>
+
+       * sysdeps/x86_64/tls.h (tcbhead_t): Add room for SSE registers the
+       dynamic linker might have to save.  Define RTLD_CHECK_FOREIGN_CALL,
+       RTLD_ENABLE_FOREIGN_CALL, RTLD_PREPARE_FOREIGN_CALL, and
+       RTLD_FINALIZE_FOREIGN_CALL.  Pretty printing.
+       * sysdeps/x86_64/tcb-offsets.sym: Add RTLD_SAVESPACE_SSE.
+
 2009-07-28  Ulrich Drepper  <drepper@redhat.com>
 
        * pthread_mutex_lock.c [NO_INCR] (__pthread_mutex_cond_lock_adjust):
index 1c70c6bde70b573d67cfae9bd44bd7ec1932bd9a..51f35c61cf3ee18c09110bff54aa20786e6395ca 100644 (file)
@@ -15,3 +15,4 @@ VGETCPU_CACHE_OFFSET  offsetof (tcbhead_t, vgetcpu_cache)
 #ifndef __ASSUME_PRIVATE_FUTEX
 PRIVATE_FUTEX          offsetof (tcbhead_t, private_futex)
 #endif
+RTLD_SAVESPACE_SSE     offsetof (tcbhead_t, rtld_savespace_sse)
index ea89f3b1a29f5a8b89ea5211c21ccca7e362acef..a51b77052ae0138b8f8b802073946402af8509a1 100644 (file)
@@ -29,6 +29,7 @@
 # include <sysdep.h>
 # include <kernel-features.h>
 # include <bits/wordsize.h>
+# include <xmmintrin.h>
 
 
 /* Type for the dtv.  */
@@ -55,16 +56,23 @@ typedef struct
   uintptr_t stack_guard;
   uintptr_t pointer_guard;
   unsigned long int vgetcpu_cache[2];
-#ifndef __ASSUME_PRIVATE_FUTEX
+# ifndef __ASSUME_PRIVATE_FUTEX
   int private_futex;
-#else
+# else
   int __unused1;
-#endif
-#if __WORDSIZE == 64
-  int __pad1;
-#endif
+# endif
+# if __WORDSIZE == 64
+  int rtld_must_xmm_save;
+# endif
   /* Reservation of some values for the TM ABI.  */
   void *__private_tm[5];
+# if __WORDSIZE == 64
+  long int __unused2;
+  /* Have space for the post-AVX register size.  */
+  __m128 rtld_savespace_sse[8][4];
+
+  void *__padding[8];
+# endif
 } tcbhead_t;
 
 #else /* __ASSEMBLER__ */
@@ -298,7 +306,7 @@ typedef struct
 
 
 /* Atomic compare and exchange on TLS, returning old value.  */
-#define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) \
+# define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) \
   ({ __typeof (descr->member) __ret;                                         \
      __typeof (oldval) __old = (oldval);                                     \
      if (sizeof (descr->member) == 4)                                        \
@@ -313,7 +321,7 @@ typedef struct
 
 
 /* Atomic logical and.  */
-#define THREAD_ATOMIC_AND(descr, member, val) \
+# define THREAD_ATOMIC_AND(descr, member, val) \
   (void) ({ if (sizeof ((descr)->member) == 4)                               \
              asm volatile (LOCK_PREFIX "andl %1, %%fs:%P0"                   \
                            :: "i" (offsetof (struct pthread, member)),       \
@@ -324,7 +332,7 @@ typedef struct
 
 
 /* Atomic set bit.  */
-#define THREAD_ATOMIC_BIT_SET(descr, member, bit) \
+# define THREAD_ATOMIC_BIT_SET(descr, member, bit) \
   (void) ({ if (sizeof ((descr)->member) == 4)                               \
              asm volatile (LOCK_PREFIX "orl %1, %%fs:%P0"                    \
                            :: "i" (offsetof (struct pthread, member)),       \
@@ -334,7 +342,7 @@ typedef struct
              abort (); })
 
 
-#define CALL_THREAD_FCT(descr) \
+# define CALL_THREAD_FCT(descr) \
   ({ void *__res;                                                            \
      asm volatile ("movq %%fs:%P2, %%rdi\n\t"                                \
                   "callq *%%fs:%P1"                                          \
@@ -355,18 +363,18 @@ typedef struct
 
 
 /* Set the pointer guard field in the TCB head.  */
-#define THREAD_SET_POINTER_GUARD(value) \
+# define THREAD_SET_POINTER_GUARD(value) \
   THREAD_SETMEM (THREAD_SELF, header.pointer_guard, value)
-#define THREAD_COPY_POINTER_GUARD(descr) \
+# define THREAD_COPY_POINTER_GUARD(descr) \
   ((descr)->header.pointer_guard                                             \
    = THREAD_GETMEM (THREAD_SELF, header.pointer_guard))
 
 
 /* Get and set the global scope generation counter in the TCB head.  */
-#define THREAD_GSCOPE_FLAG_UNUSED 0
-#define THREAD_GSCOPE_FLAG_USED   1
-#define THREAD_GSCOPE_FLAG_WAIT   2
-#define THREAD_GSCOPE_RESET_FLAG() \
+# define THREAD_GSCOPE_FLAG_UNUSED 0
+# define THREAD_GSCOPE_FLAG_USED   1
+# define THREAD_GSCOPE_FLAG_WAIT   2
+# define THREAD_GSCOPE_RESET_FLAG() \
   do                                                                         \
     { int __res;                                                             \
       asm volatile ("xchgl %0, %%fs:%P1"                                     \
@@ -377,11 +385,40 @@ typedef struct
        lll_futex_wake (&THREAD_SELF->header.gscope_flag, 1, LLL_PRIVATE);    \
     }                                                                        \
   while (0)
-#define THREAD_GSCOPE_SET_FLAG() \
+# define THREAD_GSCOPE_SET_FLAG() \
   THREAD_SETMEM (THREAD_SELF, header.gscope_flag, THREAD_GSCOPE_FLAG_USED)
-#define THREAD_GSCOPE_WAIT() \
+# define THREAD_GSCOPE_WAIT() \
   GL(dl_wait_lookup_done) ()
 
+
+# ifdef SHARED
+/* Defined in dl-trampoline.S.  */
+extern void _dl_x86_64_save_sse (void);
+extern void _dl_x86_64_restore_sse (void);
+
+# define RTLD_CHECK_FOREIGN_CALL \
+  (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) != 0)
+
+#  define RTLD_ENABLE_FOREIGN_CALL \
+  THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 1)
+
+#  define RTLD_PREPARE_FOREIGN_CALL \
+  do if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save))             \
+    {                                                                        \
+      _dl_x86_64_save_sse ();                                                \
+      THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 0);             \
+    }                                                                        \
+  while (0)
+
+#  define RTLD_FINALIZE_FOREIGN_CALL \
+  do {                                                                       \
+    if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) == 0)         \
+      _dl_x86_64_restore_sse ();                                             \
+    THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 0);               \
+  } while (0)
+# endif
+
+
 #endif /* __ASSEMBLER__ */
 
 #endif /* tls.h */
index c56715c4868481d5c007e8bf156f298d561e7ef9..851466b3a9b1c49c383d598a09dd353ed92dfcd9 100644 (file)
@@ -1,5 +1,6 @@
 #undef _GNU_SOURCE
 #define _XOPEN_SOURCE 600
+#undef _LIBC
 /* The following macro definitions are a hack.  They word around disabling
    the GNU extension while still using a few internal headers.  */
 #define u_char unsigned char
index ee9024f9b7aa4ca581eecc32ff38c120bafb3dfe..4478a7022f34be60ff1745552cbf8b4b99e7932f 100644 (file)
@@ -1,5 +1,6 @@
 #undef _GNU_SOURCE
 #define _XOPEN_SOURCE 600
+#undef _LIBC
 /* The following macro definitions are a hack.  They word around disabling
    the GNU extension while still using a few internal headers.  */
 #define u_char unsigned char
index 49d239f075349e3c541fdd754657f7a7870181d9..7ecf1b0c64e2f5e058ff2564ed76711482f7599a 100644 (file)
@@ -390,3 +390,85 @@ L(no_avx4):
        cfi_endproc
        .size _dl_runtime_profile, .-_dl_runtime_profile
 #endif
+
+
+#ifdef SHARED
+       .globl _dl_x86_64_save_sse
+       .type _dl_x86_64_save_sse, @function
+       .align 16
+       cfi_startproc
+_dl_x86_64_save_sse:
+# ifdef HAVE_AVX_SUPPORT
+       cmpl    $0, L(have_avx)(%rip)
+       jne     1f
+       movq    %rbx, %r11              # Save rbx
+       movl    $1, %eax
+       cpuid
+       movq    %r11,%rbx               # Restore rbx
+       movl    $1, %eax
+       testl   $(1 << 28), %ecx
+       jne     2f
+       negl    %eax
+2:     movl    %eax, L(have_avx)(%rip)
+       cmpl    $0, %eax
+
+1:     js      L(no_avx5)
+
+#  define YMM_SIZE 32
+       vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE
+       vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE
+       vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE
+       vmovdqa %ymm3, %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE
+       vmovdqa %ymm4, %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE
+       vmovdqa %ymm5, %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE
+       vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE
+       vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE
+       ret
+L(no_avx5):
+# endif
+# define YMM_SIZE 16
+       movdqa  %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE
+       movdqa  %xmm1, %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE
+       movdqa  %xmm2, %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE
+       movdqa  %xmm3, %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE
+       movdqa  %xmm4, %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE
+       movdqa  %xmm5, %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE
+       movdqa  %xmm6, %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE
+       movdqa  %xmm7, %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE
+       ret
+       cfi_endproc
+       .size _dl_x86_64_save_sse, .-_dl_x86_64_save_sse
+
+
+       .globl _dl_x86_64_restore_sse
+       .type _dl_x86_64_restore_sse, @function
+       .align 16
+       cfi_startproc
+_dl_x86_64_restore_sse:
+# ifdef HAVE_AVX_SUPPORT
+       cmpl    $0, L(have_avx)(%rip)
+       js      L(no_avx6)
+
+       vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0
+       vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1
+       vmovdqa %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE, %ymm2
+       vmovdqa %fs:RTLD_SAVESPACE_SSE+3*YMM_SIZE, %ymm3
+       vmovdqa %fs:RTLD_SAVESPACE_SSE+4*YMM_SIZE, %ymm4
+       vmovdqa %fs:RTLD_SAVESPACE_SSE+5*YMM_SIZE, %ymm5
+       vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6
+       vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7
+       ret
+L(no_avx6):
+# endif
+       movdqa  %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0
+       movdqa  %fs:RTLD_SAVESPACE_SSE+1*XMM_SIZE, %xmm1
+       movdqa  %fs:RTLD_SAVESPACE_SSE+2*XMM_SIZE, %xmm2
+       movdqa  %fs:RTLD_SAVESPACE_SSE+3*XMM_SIZE, %xmm3
+       movdqa  %fs:RTLD_SAVESPACE_SSE+4*XMM_SIZE, %xmm4
+       movdqa  %fs:RTLD_SAVESPACE_SSE+5*XMM_SIZE, %xmm5
+       movdqa  %fs:RTLD_SAVESPACE_SSE+6*XMM_SIZE, %xmm6
+       movdqa  %fs:RTLD_SAVESPACE_SSE+7*XMM_SIZE, %xmm7
+       ret
+       cfi_endproc
+       .size _dl_x86_64_restore_sse, .-_dl_x86_64_restore_sse
+#endif
index a576e7da0d5b771cf448d8bad5e01fd0d4e9b3d5..da8af7e686960cca6f8a42d080d4c8093580ac3e 100755 (executable)
@@ -59,10 +59,11 @@ for f in $tocheck; do
   objdump -d "$objpfx"../*/"$f" |
   awk 'BEGIN { last="" } /^[[:xdigit:]]* <[_[:alnum:]]*>:$/ { fct=substr($2, 2, length($2)-3) } /,%[xy]mm[[:digit:]]*$/ { if (last != fct) { print fct; last=fct} }' |
   while read fct; do
-    if test "$fct" != "_dl_runtime_profile"; then
-      echo "function $fct in $f modifies xmm/ymm" >> "$tmp"
-      result=1
+    if test "$fct" = "_dl_runtime_profile" -o "$fct" = "_dl_x86_64_restore_sse"; then
+      continue;
     fi
+    echo "function $fct in $f modifies xmm/ymm" >> "$tmp"
+    result=1
   done
 done