Add per-thread cache to malloc

[thirdparty/glibc.git] / nptl / allocatestack.c
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c

index 23d2ce559c420fdae56c1767170380bf345a234b..ec7d42e027ba0ef76cc289d7bf0833c433685193 100644 (file)
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2007, 2009, 2010, 2011 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2017 Free Software Foundation, Inc.
     This file is part of the GNU C Library.
     Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
  
@@ -13,9 +13,8 @@
     Lesser General Public License for more details.
  
     You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
  
  #include <assert.h>
  #include <errno.h>
@@ -28,8 +27,11 @@
  #include <dl-sysdep.h>
  #include <dl-tls.h>
  #include <tls.h>
+#include <list.h>
  #include <lowlevellock.h>
+#include <futex-internal.h>
  #include <kernel-features.h>
+#include <stack-aliasing.h>
  
  
  #ifndef NEED_SEPARATE_REGISTER_STACK
@@ -123,11 +125,6 @@ static uintptr_t in_flight_stack;
  list_t __stack_user __attribute__ ((nocommon));
  hidden_data_def (__stack_user)
  
-#if COLORING_INCREMENT != 0
-/* Number of threads created.  */
-static unsigned int nptl_ncreated;
-#endif
-
  
  /* Check whether the stack is still used or not.  */
  #define FREE_P(descr) ((descr)->tid <= 0)
@@ -243,9 +240,7 @@ get_cached_stack (size_t *sizep, void **memp)
    /* Clear the DTV.  */
    dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
    for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
-    if (! dtv[1 + cnt].pointer.is_static
-       && dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
-      free (dtv[1 + cnt].pointer.val);
+    free (dtv[1 + cnt].pointer.to_free);
    memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
  
    /* Re-initialize the TLS.  */
@@ -283,7 +278,7 @@ __free_stacks (size_t limit)
  
           /* Remove this block.  This should never fail.  If it does
              something is really wrong.  */
-         if (munmap (curr->stackblock, curr->stackblock_size) != 0)
+         if (__munmap (curr->stackblock, curr->stackblock_size) != 0)
             abort ();
  
           /* Maybe we have freed enough.  */
@@ -306,7 +301,7 @@ queue_stack (struct pthread *stack)
    stack_list_add (&stack->list, &stack_cache);
  
    stack_cache_actsize += stack->stackblock_size;
-  if (__builtin_expect (stack_cache_actsize > stack_cache_maxsize, 0))
+  if (__glibc_unlikely (stack_cache_actsize > stack_cache_maxsize))
      __free_stacks (stack_cache_maxsize);
  }
  
@@ -333,13 +328,54 @@ change_stack_perm (struct pthread *pd
  #else
  # error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
  #endif
-  if (mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
+  if (__mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
      return errno;
  
    return 0;
  }
  
+/* Return the guard page position on allocated stack.  */
+static inline char *
+__attribute ((always_inline))
+guard_position (void *mem, size_t size, size_t guardsize, struct pthread *pd,
+               size_t pagesize_m1)
+{
+#ifdef NEED_SEPARATE_REGISTER_STACK
+  return mem + (((size - guardsize) / 2) & ~pagesize_m1);
+#elif _STACK_GROWS_DOWN
+  return mem;
+#elif _STACK_GROWS_UP
+  return (char *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
+#endif
+}
+
+/* Based on stack allocated with PROT_NONE, setup the required portions with
+   'prot' flags based on the guard page position.  */
+static inline int
+setup_stack_prot (char *mem, size_t size, char *guard, size_t guardsize,
+                 const int prot)
+{
+  char *guardend = guard + guardsize;
+#if _STACK_GROWS_DOWN
+  /* As defined at guard_position, for architectures with downward stack
+     the guard page is always at start of the allocated area.  */
+  if (__mprotect (guardend, size - guardsize, prot) != 0)
+    return errno;
+#else
+  size_t mprots1 = (uintptr_t) guard - (uintptr_t) mem;
+  if (__mprotect (mem, mprots1, prot) != 0)
+    return errno;
+  size_t mprots2 = ((uintptr_t) mem + size) - (uintptr_t) guardend;
+  if (__mprotect (guardend, mprots2, prot) != 0)
+    return errno;
+#endif
+  return 0;
+}
  
+/* Returns a usable stack for a new thread either by allocating a
+   new stack or reusing a cached stack of sufficient size.
+   ATTR must be non-NULL and point to a valid pthread_attr.
+   PDP must be non-NULL.  */
  static int
  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
                 ALLOCATE_STACK_PARMS)
@@ -347,20 +383,32 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
    struct pthread *pd;
    size_t size;
    size_t pagesize_m1 = __getpagesize () - 1;
-  void *stacktop;
  
-  assert (attr != NULL);
    assert (powerof2 (pagesize_m1 + 1));
    assert (TCB_ALIGNMENT >= STACK_ALIGN);
  
    /* Get the stack size from the attribute if it is set.  Otherwise we
       use the default we determined at start time.  */
-  size = attr->stacksize ?: __default_stacksize;
+  if (attr->stacksize != 0)
+    size = attr->stacksize;
+  else
+    {
+      lll_lock (__default_pthread_attr_lock, LLL_PRIVATE);
+      size = __default_pthread_attr.stacksize;
+      lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
+    }
  
    /* Get memory for the stack.  */
-  if (__builtin_expect (attr->flags & ATTR_FLAG_STACKADDR, 0))
+  if (__glibc_unlikely (attr->flags & ATTR_FLAG_STACKADDR))
      {
        uintptr_t adj;
+      char *stackaddr = (char *) attr->stackaddr;
+
+      /* Assume the same layout as the _STACK_GROWS_DOWN case, with struct
+        pthread at the top of the stack block.  Later we adjust the guard
+        location and stack address to match the _STACK_GROWS_UP case.  */
+      if (_STACK_GROWS_UP)
+       stackaddr += attr->stacksize;
  
        /* If the user also specified the size of the stack make sure it
          is large enough.  */
@@ -370,11 +418,11 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
  
        /* Adjust stack size for alignment of the TLS block.  */
  #if TLS_TCB_AT_TP
-      adj = ((uintptr_t) attr->stackaddr - TLS_TCB_SIZE)
+      adj = ((uintptr_t) stackaddr - TLS_TCB_SIZE)
             & __static_tls_align_m1;
        assert (size > adj + TLS_TCB_SIZE);
  #elif TLS_DTV_AT_TP
-      adj = ((uintptr_t) attr->stackaddr - __static_tls_size)
+      adj = ((uintptr_t) stackaddr - __static_tls_size)
             & __static_tls_align_m1;
        assert (size > adj);
  #endif
@@ -384,10 +432,10 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
          the stack.  It is the user's responsibility to do this if it
          is wanted.  */
  #if TLS_TCB_AT_TP
-      pd = (struct pthread *) ((uintptr_t) attr->stackaddr
+      pd = (struct pthread *) ((uintptr_t) stackaddr
                                - TLS_TCB_SIZE - adj);
  #elif TLS_DTV_AT_TP
-      pd = (struct pthread *) (((uintptr_t) attr->stackaddr
+      pd = (struct pthread *) (((uintptr_t) stackaddr
                                 - __static_tls_size - adj)
                                - TLS_PRE_TCB_SIZE);
  #endif
@@ -399,7 +447,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
        pd->specific[0] = pd->specific_1stblock;
  
        /* Remember the stack-related values.  */
-      pd->stackblock = (char *) attr->stackaddr - size;
+      pd->stackblock = (char *) stackaddr - size;
        pd->stackblock_size = size;
  
        /* This is a user-provided stack.  It will not be queued in the
@@ -419,13 +467,9 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
  #endif
  
  #ifdef NEED_DL_SYSINFO
-      /* Copy the sysinfo value from the parent.  */
-      THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
+      SETUP_THREAD_SYSINFO (pd);
  #endif
  
-      /* The process ID is also the same as that of the caller.  */
-      pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
-
        /* Don't allow setxid until cloned.  */
        pd->setxid_futex = -1;
  
@@ -434,7 +478,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
         {
           /* Something went wrong.  */
           assert (errno == ENOMEM);
-         return EAGAIN;
+         return errno;
         }
  
  
@@ -455,14 +499,6 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
        const int prot = (PROT_READ | PROT_WRITE
                         | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
  
-#if COLORING_INCREMENT != 0
-      /* Add one more page for stack coloring.  Don't do it for stacks
-        with 16 times pagesize or larger.  This might just cause
-        unnecessary misalignment.  */
-      if (size <= 16 * pagesize_m1)
-       size += pagesize_m1 + 1;
-#endif
-
        /* Adjust the stack size for alignment.  */
        size &= ~__static_tls_align_m1;
        assert (size != 0);
@@ -491,57 +527,47 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
             size += pagesize_m1 + 1;
  #endif
  
-         mem = mmap (NULL, size, prot,
-                     MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+         /* If a guard page is required, avoid committing memory by first
+            allocate with PROT_NONE and then reserve with required permission
+            excluding the guard page.  */
+         mem = __mmap (NULL, size, (guardsize == 0) ? prot : PROT_NONE,
+                       MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
  
-         if (__builtin_expect (mem == MAP_FAILED, 0))
-           {
-             if (errno == ENOMEM)
-               __set_errno (EAGAIN);
-
-              return errno;
-           }
+         if (__glibc_unlikely (mem == MAP_FAILED))
+           return errno;
  
           /* SIZE is guaranteed to be greater than zero.
              So we can never get a null pointer back from mmap.  */
           assert (mem != NULL);
  
-#if COLORING_INCREMENT != 0
-         /* Atomically increment NCREATED.  */
-         unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
-
-         /* We chose the offset for coloring by incrementing it for
-            every new thread by a fixed amount.  The offset used
-            module the page size.  Even if coloring would be better
-            relative to higher alignment values it makes no sense to
-            do it since the mmap() interface does not allow us to
-            specify any alignment for the returned memory block.  */
-         size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
-
-         /* Make sure the coloring offsets does not disturb the alignment
-            of the TCB and static TLS block.  */
-         if (__builtin_expect ((coloring & __static_tls_align_m1) != 0, 0))
-           coloring = (((coloring + __static_tls_align_m1)
-                        & ~(__static_tls_align_m1))
-                       & ~pagesize_m1);
-#else
-         /* Unless specified we do not make any adjustments.  */
-# define coloring 0
-#endif
-
           /* Place the thread descriptor at the end of the stack.  */
  #if TLS_TCB_AT_TP
-         pd = (struct pthread *) ((char *) mem + size - coloring) - 1;
+         pd = (struct pthread *) ((char *) mem + size) - 1;
  #elif TLS_DTV_AT_TP
-         pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
+         pd = (struct pthread *) ((((uintptr_t) mem + size
                                     - __static_tls_size)
                                     & ~__static_tls_align_m1)
                                    - TLS_PRE_TCB_SIZE);
  #endif
  
+         /* Now mprotect the required region excluding the guard area.  */
+         if (__glibc_likely (guardsize > 0))
+           {
+             char *guard = guard_position (mem, size, guardsize, pd,
+                                           pagesize_m1);
+             if (setup_stack_prot (mem, size, guard, guardsize, prot) != 0)
+               {
+                 __munmap (mem, size);
+                 return errno;
+               }
+           }
+
           /* Remember the stack-related values.  */
           pd->stackblock = mem;
           pd->stackblock_size = size;
+         /* Update guardsize for newly allocated guardsize to avoid
+            an mprotect in guard resize below.  */
+         pd->guardsize = guardsize;
  
           /* We allocated the first block thread-specific data array.
              This address will not change for the lifetime of this
@@ -561,16 +587,12 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
  #endif
  
  #ifdef NEED_DL_SYSINFO
-         /* Copy the sysinfo value from the parent.  */
-         THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
+         SETUP_THREAD_SYSINFO (pd);
  #endif
  
           /* Don't allow setxid until cloned.  */
           pd->setxid_futex = -1;
  
-         /* The process ID is also the same as that of the caller.  */
-         pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
-
           /* Allocate the DTV for this thread.  */
           if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
             {
@@ -578,9 +600,9 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
               assert (errno == ENOMEM);
  
               /* Free the stack memory we just allocated.  */
-             (void) munmap (mem, size);
+             (void) __munmap (mem, size);
  
-             return EAGAIN;
+             return errno;
             }
  
  
@@ -608,7 +630,7 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
               if (err != 0)
                 {
                   /* Free the stack memory we just allocated.  */
-                 (void) munmap (mem, size);
+                 (void) __munmap (mem, size);
  
                   return err;
                 }
@@ -624,21 +646,13 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
         }
  
        /* Create or resize the guard area if necessary.  */
-      if (__builtin_expect (guardsize > pd->guardsize, 0))
+      if (__glibc_unlikely (guardsize > pd->guardsize))
         {
-#ifdef NEED_SEPARATE_REGISTER_STACK
-         char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
-#elif _STACK_GROWS_DOWN
-         char *guard = mem;
-# elif _STACK_GROWS_UP
-         char *guard = (char *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
-#endif
-         if (mprotect (guard, guardsize, PROT_NONE) != 0)
+         char *guard = guard_position (mem, size, guardsize, pd,
+                                       pagesize_m1);
+         if (__mprotect (guard, guardsize, PROT_NONE) != 0)
             {
-             int err;
             mprot_error:
-             err = errno == ENOMEM ? EAGAIN : errno;
-
               lll_lock (stack_cache_lock, LLL_PRIVATE);
  
               /* Remove the thread from the list.  */
@@ -654,9 +668,9 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
                  of memory caused problems we better do not use it
                  anymore.  Uh, and we ignore possible errors.  There
                  is nothing we could do.  */
-             (void) munmap (mem, size);
+             (void) __munmap (mem, size);
  
-             return err;
+             return errno;
             }
  
           pd->guardsize = guardsize;
@@ -671,19 +685,19 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
           char *oldguard = mem + (((size - pd->guardsize) / 2) & ~pagesize_m1);
  
           if (oldguard < guard
-             && mprotect (oldguard, guard - oldguard, prot) != 0)
+             && __mprotect (oldguard, guard - oldguard, prot) != 0)
             goto mprot_error;
  
-         if (mprotect (guard + guardsize,
+         if (__mprotect (guard + guardsize,
                         oldguard + pd->guardsize - guard - guardsize,
                         prot) != 0)
             goto mprot_error;
  #elif _STACK_GROWS_DOWN
-         if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
+         if (__mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
                         prot) != 0)
             goto mprot_error;
  #elif _STACK_GROWS_UP
-         if (mprotect ((char *) pd - pd->guardsize,
+         if (__mprotect ((char *) pd - pd->guardsize,
                         pd->guardsize - guardsize, prot) != 0)
             goto mprot_error;
  #endif
@@ -715,21 +729,24 @@ allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
    /* We place the thread descriptor at the end of the stack.  */
    *pdp = pd;
  
-#if TLS_TCB_AT_TP
+#if _STACK_GROWS_DOWN
+  void *stacktop;
+
+# if TLS_TCB_AT_TP
    /* The stack begins before the TCB and the static TLS block.  */
    stacktop = ((char *) (pd + 1) - __static_tls_size);
-#elif TLS_DTV_AT_TP
+# elif TLS_DTV_AT_TP
    stacktop = (char *) (pd - 1);
-#endif
+# endif
  
-#ifdef NEED_SEPARATE_REGISTER_STACK
+# ifdef NEED_SEPARATE_REGISTER_STACK
    *stack = pd->stackblock;
    *stacksize = stacktop - *stack;
-#elif _STACK_GROWS_DOWN
+# else
    *stack = stacktop;
-#elif _STACK_GROWS_UP
+# endif
+#else
    *stack = pd->stackblock;
-  assert (*stack > 0);
  #endif
  
    return 0;
@@ -750,7 +767,7 @@ __deallocate_stack (struct pthread *pd)
       not reset the 'used' flag in the 'tid' field.  This is done by
       the kernel.  If no thread has been created yet this field is
       still zero.  */
-  if (__builtin_expect (! pd->user_stack, 1))
+  if (__glibc_likely (! pd->user_stack))
      (void) queue_stack (pd);
    else
      /* Free the memory associated with the ELF TLS.  */
@@ -828,26 +845,23 @@ __reclaim_stacks (void)
  
        if (add_p)
         {
-         /* We always add at the beginning of the list.  So in this
-            case we only need to check the beginning of these lists.  */
-         int check_list (list_t *l)
-         {
-           if (l->next->prev != l)
-             {
-               assert (l->next->prev == elem);
-
-               elem->next = l->next;
-               elem->prev = l;
-               l->next = elem;
-
-               return 1;
-             }
-
-           return 0;
-         }
-
-         if (check_list (&stack_used) == 0)
-           (void) check_list (&stack_cache);
+         /* We always add at the beginning of the list.  So in this case we
+            only need to check the beginning of these lists to see if the
+            pointers at the head of the list are inconsistent.  */
+         list_t *l = NULL;
+
+         if (stack_used.next->prev != &stack_used)
+           l = &stack_used;
+         else if (stack_cache.next->prev != &stack_cache)
+           l = &stack_cache;
+
+         if (l != NULL)
+           {
+             assert (l->next->prev == elem);
+             elem->next = l->next;
+             elem->prev = l;
+             l->next = elem;
+           }
         }
        else
         {
@@ -867,9 +881,6 @@ __reclaim_stacks (void)
           /* This marks the stack as free.  */
           curp->tid = 0;
  
-         /* The PID field must be initialized for the new process.  */
-         curp->pid = self->pid;
-
           /* Account for the size of the stack.  */
           stack_cache_actsize += curp->stackblock_size;
  
@@ -895,13 +906,6 @@ __reclaim_stacks (void)
         }
      }
  
-  /* Reset the PIDs in any cached stacks.  */
-  list_for_each (runp, &stack_cache)
-    {
-      struct pthread *curp = list_entry (runp, struct pthread, list);
-      curp->pid = self->pid;
-    }
-
    /* Add the stack of all running threads to the cache.  */
    list_splice (&stack_used, &stack_cache);
  
@@ -914,7 +918,7 @@ __reclaim_stacks (void)
    INIT_LIST_HEAD (&stack_used);
    INIT_LIST_HEAD (&__stack_user);
  
-  if (__builtin_expect (THREAD_GETMEM (self, user_stack), 0))
+  if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))
      list_add (&self->list, &__stack_user);
    else
      list_add (&self->list, &stack_used);
@@ -924,8 +928,9 @@ __reclaim_stacks (void)
  
    in_flight_stack = 0;
  
-  /* Initialize the lock.  */
+  /* Initialize locks.  */
    stack_cache_lock = LLL_LOCK_INITIALIZER;
+  __default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
  }
  
  
@@ -977,6 +982,7 @@ __find_thread_by_id (pid_t tid)
  #endif
  
  
+#ifdef SIGSETXID
  static void
  internal_function
  setxid_mark_thread (struct xid_command *cmdp, struct pthread *t)
@@ -987,7 +993,7 @@ setxid_mark_thread (struct xid_command *cmdp, struct pthread *t)
    if (t->setxid_futex == -1
        && ! atomic_compare_and_exchange_bool_acq (&t->setxid_futex, -2, -1))
      do
-      lll_futex_wait (&t->setxid_futex, -2, LLL_PRIVATE);
+      futex_wait_simple (&t->setxid_futex, -2, FUTEX_PRIVATE);
      while (t->setxid_futex == -2);
  
    /* Don't let the thread exit before the setxid handler runs.  */
@@ -1005,7 +1011,7 @@ setxid_mark_thread (struct xid_command *cmdp, struct pthread *t)
           if ((ch & SETXID_BITMASK) == 0)
             {
               t->setxid_futex = 1;
-             lll_futex_wake (&t->setxid_futex, 1, LLL_PRIVATE);
+             futex_wake (&t->setxid_futex, 1, FUTEX_PRIVATE);
             }
           return;
         }
@@ -1032,7 +1038,7 @@ setxid_unmark_thread (struct xid_command *cmdp, struct pthread *t)
  
    /* Release the futex just in case.  */
    t->setxid_futex = 1;
-  lll_futex_wake (&t->setxid_futex, 1, LLL_PRIVATE);
+  futex_wake (&t->setxid_futex, 1, FUTEX_PRIVATE);
  }
  
  
@@ -1044,19 +1050,9 @@ setxid_signal_thread (struct xid_command *cmdp, struct pthread *t)
      return 0;
  
    int val;
+  pid_t pid = __getpid ();
    INTERNAL_SYSCALL_DECL (err);
-#if __ASSUME_TGKILL
-  val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid),
-                         t->tid, SIGSETXID);
-#else
-# ifdef __NR_tgkill
-  val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid),
-                         t->tid, SIGSETXID);
-  if (INTERNAL_SYSCALL_ERROR_P (val, err)
-      && INTERNAL_SYSCALL_ERRNO (val, err) == ENOSYS)
-# endif
-    val = INTERNAL_SYSCALL (tkill, err, 2, t->tid, SIGSETXID);
-#endif
+  val = INTERNAL_SYSCALL_CALL (tgkill, err, pid, t->tid, SIGSETXID);
  
    /* If this failed, it must have had not started yet or else exited.  */
    if (!INTERNAL_SYSCALL_ERROR_P (val, err))
@@ -1068,6 +1064,25 @@ setxid_signal_thread (struct xid_command *cmdp, struct pthread *t)
      return 0;
  }
  
+/* Check for consistency across set*id system call results.  The abort
+   should not happen as long as all privileges changes happen through
+   the glibc wrappers.  ERROR must be 0 (no error) or an errno
+   code.  */
+void
+attribute_hidden
+__nptl_setxid_error (struct xid_command *cmdp, int error)
+{
+  do
+    {
+      int olderror = cmdp->error;
+      if (olderror == error)
+       break;
+      if (olderror != -1)
+       /* Mismatch between current and previous results.  */
+       abort ();
+    }
+  while (atomic_compare_and_exchange_bool_acq (&cmdp->error, error, -1));
+}
  
  int
  attribute_hidden
@@ -1079,6 +1094,7 @@ __nptl_setxid (struct xid_command *cmdp)
  
    __xidcmd = cmdp;
    cmdp->cntr = 0;
+  cmdp->error = -1;
  
    struct pthread *self = THREAD_SELF;
  
@@ -1131,7 +1147,8 @@ __nptl_setxid (struct xid_command *cmdp)
        int cur = cmdp->cntr;
        while (cur != 0)
         {
-         lll_futex_wait (&cmdp->cntr, cur, LLL_PRIVATE);
+         futex_wait_simple ((unsigned int *) &cmdp->cntr, cur,
+                            FUTEX_PRIVATE);
           cur = cmdp->cntr;
         }
      }
@@ -1162,20 +1179,24 @@ __nptl_setxid (struct xid_command *cmdp)
    INTERNAL_SYSCALL_DECL (err);
    result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, 3,
                                  cmdp->id[0], cmdp->id[1], cmdp->id[2]);
-  if (INTERNAL_SYSCALL_ERROR_P (result, err))
+  int error = 0;
+  if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (result, err)))
      {
-      __set_errno (INTERNAL_SYSCALL_ERRNO (result, err));
+      error = INTERNAL_SYSCALL_ERRNO (result, err);
+      __set_errno (error);
        result = -1;
      }
+  __nptl_setxid_error (cmdp, error);
  
    lll_unlock (stack_cache_lock, LLL_PRIVATE);
    return result;
  }
+#endif  /* SIGSETXID.  */
+
  
  static inline void __attribute__((always_inline))
  init_one_static_tls (struct pthread *curp, struct link_map *map)
  {
-  dtv_t *dtv = GET_DTV (TLS_TPADJ (curp));
  # if TLS_TCB_AT_TP
    void *dest = (char *) curp - map->l_tls_offset;
  # elif TLS_DTV_AT_TP
@@ -1184,10 +1205,6 @@ init_one_static_tls (struct pthread *curp, struct link_map *map)
  #  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
  # endif
  
-  /* Fill in the DTV slot so that a later LD/GD access will find it.  */
-  dtv[map->l_tls_modid].pointer.val = dest;
-  dtv[map->l_tls_modid].pointer.is_static = true;
-
    /* Initialize the memory.  */
    memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
           '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
@@ -1239,7 +1256,8 @@ __wait_lookup_done (void)
         continue;
  
        do
-       lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE);
+       futex_wait_simple ((unsigned int *) gscope_flagp,
+                          THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
        while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
      }
  
@@ -1261,7 +1279,8 @@ __wait_lookup_done (void)
         continue;
  
        do
-       lll_futex_wait (gscope_flagp, THREAD_GSCOPE_FLAG_WAIT, LLL_PRIVATE);
+       futex_wait_simple ((unsigned int *) gscope_flagp,
+                          THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
        while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
      }