/* Definition for thread-local data handling. nptl/x86_64 version.
- Copyright (C) 2002-2007, 2008, 2009 Free Software Foundation, Inc.
+ Copyright (C) 2002-2014 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
#ifndef _TLS_H
#define _TLS_H 1
# include <stdint.h>
# include <stdlib.h>
# include <sysdep.h>
+# include <libc-internal.h>
# include <kernel-features.h>
-# include <bits/wordsize.h>
+
+/* Replacement type for __m128 since this file is included by ld.so,
+ which is compiled with -mno-sse. It must not change the alignment
+ of rtld_savespace_sse. */
+typedef struct
+{
+ int i[4];
+} __128bits;
/* Type for the dtv. */
uintptr_t stack_guard;
uintptr_t pointer_guard;
unsigned long int vgetcpu_cache[2];
-#ifndef __ASSUME_PRIVATE_FUTEX
+# ifndef __ASSUME_PRIVATE_FUTEX
int private_futex;
-#else
- int __unused1;
-#endif
-#if __WORDSIZE == 64
- int __pad1;
-#endif
+# else
+ int __glibc_reserved1;
+# endif
+ int rtld_must_xmm_save;
/* Reservation of some values for the TM ABI. */
- void *__private_tm[5];
+ void *__private_tm[4];
+ /* GCC split stack support. */
+ void *__private_ss;
+ long int __glibc_reserved2;
+ /* Have space for the post-AVX register size. */
+ __128bits rtld_savespace_sse[8][4] __attribute__ ((aligned (32)));
+
+ void *__padding[8];
} tcbhead_t;
#else /* __ASSEMBLER__ */
#endif
-/* We require TLS support in the tools. */
-#ifndef HAVE_TLS_SUPPORT
-# error "TLS support is required."
-#endif
-
/* Alignment requirement for the stack. */
#define STACK_ALIGN 16
/* Get system call information. */
# include <sysdep.h>
-
-/* Get the thread descriptor definition. */
-# include <nptl/descr.h>
-
#ifndef LOCK_PREFIX
# ifdef UP
# define LOCK_PREFIX /* nothing */
/* The TCB can have any size and the memory following the address the
thread pointer points to is unspecified. Allocate the TCB there. */
# define TLS_TCB_AT_TP 1
+# define TLS_DTV_AT_TP 0
+
+/* Get the thread descriptor definition. */
+# include <nptl/descr.h>
/* Install the dtv pointer. The pointer passed is to the element with
(((tcbhead_t *) (descr))->dtv)
-/* Macros to load from and store into segment registers. */
-# define TLS_GET_FS() \
- ({ int __seg; __asm ("movl %%fs, %0" : "=q" (__seg)); __seg; })
-# define TLS_SET_FS(val) \
- __asm ("movl %0, %%fs" :: "q" (val))
-
-
/* Code to initially initialize the thread pointer. This might need
special attention since 'errno' is not yet available and if the
operation can cause a failure 'errno' must not be touched.
The contained asm must *not* be marked volatile since otherwise
assignments like
- pthread_descr self = thread_self();
+ pthread_descr self = thread_self();
do not get optimized away. */
# define THREAD_SELF \
({ struct pthread *__self; \
- asm ("movq %%fs:%c1,%q0" : "=r" (__self) \
+ asm ("mov %%fs:%c1,%0" : "=r" (__self) \
: "i" (offsetof (struct pthread, header.self))); \
__self;})
abort (); \
\
asm volatile ("movq %q0,%%fs:%P1" : \
- : IMM_MODE ((unsigned long int) value), \
+ : IMM_MODE ((uint64_t) cast_to_integer (value)), \
"i" (offsetof (struct pthread, member))); \
}})
abort (); \
\
asm volatile ("movq %q0,%%fs:%P1(,%q2,8)" : \
- : IMM_MODE ((unsigned long int) value), \
+ : IMM_MODE ((uint64_t) cast_to_integer (value)), \
"i" (offsetof (struct pthread, member[0])), \
"r" (idx)); \
}})
/* Atomic compare and exchange on TLS, returning old value. */
-#define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) \
+# define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) \
({ __typeof (descr->member) __ret; \
__typeof (oldval) __old = (oldval); \
if (sizeof (descr->member) == 4) \
/* Atomic logical and. */
-#define THREAD_ATOMIC_AND(descr, member, val) \
+# define THREAD_ATOMIC_AND(descr, member, val) \
(void) ({ if (sizeof ((descr)->member) == 4) \
asm volatile (LOCK_PREFIX "andl %1, %%fs:%P0" \
:: "i" (offsetof (struct pthread, member)), \
/* Atomic set bit. */
-#define THREAD_ATOMIC_BIT_SET(descr, member, bit) \
+# define THREAD_ATOMIC_BIT_SET(descr, member, bit) \
(void) ({ if (sizeof ((descr)->member) == 4) \
asm volatile (LOCK_PREFIX "orl %1, %%fs:%P0" \
:: "i" (offsetof (struct pthread, member)), \
abort (); })
-#define CALL_THREAD_FCT(descr) \
+# define CALL_THREAD_FCT(descr) \
({ void *__res; \
asm volatile ("movq %%fs:%P2, %%rdi\n\t" \
"callq *%%fs:%P1" \
/* Set the pointer guard field in the TCB head. */
-#define THREAD_SET_POINTER_GUARD(value) \
+# define THREAD_SET_POINTER_GUARD(value) \
THREAD_SETMEM (THREAD_SELF, header.pointer_guard, value)
-#define THREAD_COPY_POINTER_GUARD(descr) \
+# define THREAD_COPY_POINTER_GUARD(descr) \
((descr)->header.pointer_guard \
= THREAD_GETMEM (THREAD_SELF, header.pointer_guard))
/* Get and set the global scope generation counter in the TCB head. */
-#define THREAD_GSCOPE_FLAG_UNUSED 0
-#define THREAD_GSCOPE_FLAG_USED 1
-#define THREAD_GSCOPE_FLAG_WAIT 2
-#define THREAD_GSCOPE_RESET_FLAG() \
+# define THREAD_GSCOPE_FLAG_UNUSED 0
+# define THREAD_GSCOPE_FLAG_USED 1
+# define THREAD_GSCOPE_FLAG_WAIT 2
+# define THREAD_GSCOPE_RESET_FLAG() \
do \
{ int __res; \
asm volatile ("xchgl %0, %%fs:%P1" \
lll_futex_wake (&THREAD_SELF->header.gscope_flag, 1, LLL_PRIVATE); \
} \
while (0)
-#define THREAD_GSCOPE_SET_FLAG() \
+# define THREAD_GSCOPE_SET_FLAG() \
THREAD_SETMEM (THREAD_SELF, header.gscope_flag, THREAD_GSCOPE_FLAG_USED)
-#define THREAD_GSCOPE_WAIT() \
+# define THREAD_GSCOPE_WAIT() \
GL(dl_wait_lookup_done) ()
+
+# ifdef SHARED
+/* Defined in dl-trampoline.S. */
+extern void _dl_x86_64_save_sse (void);
+extern void _dl_x86_64_restore_sse (void);
+
+# define RTLD_CHECK_FOREIGN_CALL \
+ (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) != 0)
+
+/* NB: Don't use the xchg operation because that would imply a lock
+ prefix which is expensive and unnecessary. The cache line is also
+ not contested at all. */
+# define RTLD_ENABLE_FOREIGN_CALL \
+ int old_rtld_must_xmm_save = THREAD_GETMEM (THREAD_SELF, \
+ header.rtld_must_xmm_save); \
+ THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 1)
+
+# define RTLD_PREPARE_FOREIGN_CALL \
+ do if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save)) \
+ { \
+ _dl_x86_64_save_sse (); \
+ THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, 0); \
+ } \
+ while (0)
+
+# define RTLD_FINALIZE_FOREIGN_CALL \
+ do { \
+ if (THREAD_GETMEM (THREAD_SELF, header.rtld_must_xmm_save) == 0) \
+ _dl_x86_64_restore_sse (); \
+ THREAD_SETMEM (THREAD_SELF, header.rtld_must_xmm_save, \
+ old_rtld_must_xmm_save); \
+ } while (0)
+# endif
+
+
#endif /* __ASSEMBLER__ */
#endif /* tls.h */