nptl/allocatestack.c

   1 /* Copyright (C) 2002, 2003, 2004, 2005
   2    Free Software Foundation, Inc.
   3    This file is part of the GNU C Library.
   4    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
   5
   6    The GNU C Library is free software; you can redistribute it and/or
   7    modify it under the terms of the GNU Lesser General Public
   8    License as published by the Free Software Foundation; either
   9    version 2.1 of the License, or (at your option) any later version.
  10
  11    The GNU C Library is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14    Lesser General Public License for more details.
  15
  16    You should have received a copy of the GNU Lesser General Public
  17    License along with the GNU C Library; if not, write to the Free
  18    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19    02111-1307 USA.  */
  20
  21 #include <assert.h>
  22 #include <errno.h>
  23 #include <signal.h>
  24 #include <stdint.h>
  25 #include <string.h>
  26 #include <unistd.h>
  27 #include <sys/mman.h>
  28 #include <sys/param.h>
  29 #include <dl-sysdep.h>
  30 #include <tls.h>
  31 #include <lowlevellock.h>
  32
  33
  34 #ifndef NEED_SEPARATE_REGISTER_STACK
  35
  36 /* Most architectures have exactly one stack pointer.  Some have more.  */
  37 # define STACK_VARIABLES void *stackaddr = NULL
  38
  39 /* How to pass the values to the 'create_thread' function.  */
  40 # define STACK_VARIABLES_ARGS stackaddr
  41
  42 /* How to declare function which gets there parameters.  */
  43 # define STACK_VARIABLES_PARMS void *stackaddr
  44
  45 /* How to declare allocate_stack.  */
  46 # define ALLOCATE_STACK_PARMS void **stack
  47
  48 /* This is how the function is called.  We do it this way to allow
  49    other variants of the function to have more parameters.  */
  50 # define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
  51
  52 #else
  53
  54 /* We need two stacks.  The kernel will place them but we have to tell
  55    the kernel about the size of the reserved address space.  */
  56 # define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
  57
  58 /* How to pass the values to the 'create_thread' function.  */
  59 # define STACK_VARIABLES_ARGS stackaddr, stacksize
  60
  61 /* How to declare function which gets there parameters.  */
  62 # define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
  63
  64 /* How to declare allocate_stack.  */
  65 # define ALLOCATE_STACK_PARMS void **stack, size_t *stacksize
  66
  67 /* This is how the function is called.  We do it this way to allow
  68    other variants of the function to have more parameters.  */
  69 # define ALLOCATE_STACK(attr, pd) \
  70   allocate_stack (attr, pd, &stackaddr, &stacksize)
  71
  72 #endif
  73
  74
  75 /* Default alignment of stack.  */
  76 #ifndef STACK_ALIGN
  77 # define STACK_ALIGN __alignof__ (long double)
  78 #endif
  79
  80 /* Default value for minimal stack size after allocating thread
  81    descriptor and guard.  */
  82 #ifndef MINIMAL_REST_STACK
  83 # define MINIMAL_REST_STACK     4096
  84 #endif
  85
  86
  87 /* Let the architecture add some flags to the mmap() call used to
  88    allocate stacks.  */
  89 #ifndef ARCH_MAP_FLAGS
  90 # define ARCH_MAP_FLAGS 0
  91 #endif
  92
  93 /* This yields the pointer that TLS support code calls the thread pointer.  */
  94 #if TLS_TCB_AT_TP
  95 # define TLS_TPADJ(pd) (pd)
  96 #elif TLS_DTV_AT_TP
  97 # define TLS_TPADJ(pd) ((struct pthread *)((char *) (pd) + TLS_PRE_TCB_SIZE))
  98 #endif
  99
 100 /* Cache handling for not-yet free stacks.  */
 101
 102 /* Maximum size in kB of cache.  */
 103 static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default.  */
 104 static size_t stack_cache_actsize;
 105
 106 /* Mutex protecting this variable.  */
 107 static lll_lock_t stack_cache_lock = LLL_LOCK_INITIALIZER;
 108
 109 /* List of queued stack frames.  */
 110 static LIST_HEAD (stack_cache);
 111
 112 /* List of the stacks in use.  */
 113 static LIST_HEAD (stack_used);
 114
 115 /* List of the threads with user provided stacks in use.  No need to
 116    initialize this, since it's done in __pthread_initialize_minimal.  */
 117 list_t __stack_user __attribute__ ((nocommon));
 118 hidden_data_def (__stack_user)
 119
 120 #if COLORING_INCREMENT != 0
 121 /* Number of threads created.  */
 122 static unsigned int nptl_ncreated;
 123 #endif
 124
 125
 126 /* Check whether the stack is still used or not.  */
 127 #define FREE_P(descr) ((descr)->tid <= 0)
 128
 129
 130 /* We create a double linked list of all cache entries.  Double linked
 131    because this allows removing entries from the end.  */
 132
 133
 134 /* Get a stack frame from the cache.  We have to match by size since
 135    some blocks might be too small or far too large.  */
 136 static struct pthread *
 137 get_cached_stack (size_t *sizep, void **memp)
 138 {
 139   size_t size = *sizep;
 140   struct pthread *result = NULL;
 141   list_t *entry;
 142
 143   lll_lock (stack_cache_lock);
 144
 145   /* Search the cache for a matching entry.  We search for the
 146      smallest stack which has at least the required size.  Note that
 147      in normal situations the size of all allocated stacks is the
 148      same.  As the very least there are only a few different sizes.
 149      Therefore this loop will exit early most of the time with an
 150      exact match.  */
 151   list_for_each (entry, &stack_cache)
 152     {
 153       struct pthread *curr;
 154
 155       curr = list_entry (entry, struct pthread, list);
 156       if (FREE_P (curr) && curr->stackblock_size >= size)
 157         {
 158           if (curr->stackblock_size == size)
 159             {
 160               result = curr;
 161               break;
 162             }
 163
 164           if (result == NULL
 165               || result->stackblock_size > curr->stackblock_size)
 166             result = curr;
 167         }
 168     }
 169
 170   if (__builtin_expect (result == NULL, 0)
 171       /* Make sure the size difference is not too excessive.  In that
 172          case we do not use the block.  */
 173       || __builtin_expect (result->stackblock_size > 4 * size, 0))
 174     {
 175       /* Release the lock.  */
 176       lll_unlock (stack_cache_lock);
 177
 178       return NULL;
 179     }
 180
 181   /* Dequeue the entry.  */
 182   list_del (&result->list);
 183
 184   /* And add to the list of stacks in use.  */
 185   list_add (&result->list, &stack_used);
 186
 187   /* And decrease the cache size.  */
 188   stack_cache_actsize -= result->stackblock_size;
 189
 190   /* Release the lock early.  */
 191   lll_unlock (stack_cache_lock);
 192
 193   /* Report size and location of the stack to the caller.  */
 194   *sizep = result->stackblock_size;
 195   *memp = result->stackblock;
 196
 197   /* Cancellation handling is back to the default.  */
 198   result->cancelhandling = 0;
 199   result->cleanup = NULL;
 200
 201   /* No pending event.  */
 202   result->nextevent = NULL;
 203
 204   /* Clear the DTV.  */
 205   dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
 206   memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
 207
 208   /* Re-initialize the TLS.  */
 209   _dl_allocate_tls_init (TLS_TPADJ (result));
 210
 211   return result;
 212 }
 213
 214
 215 /* Add a stack frame which is not used anymore to the stack.  Must be
 216    called with the cache lock held.  */
 217 static inline void
 218 __attribute ((always_inline))
 219 queue_stack (struct pthread *stack)
 220 {
 221   /* We unconditionally add the stack to the list.  The memory may
 222      still be in use but it will not be reused until the kernel marks
 223      the stack as not used anymore.  */
 224   list_add (&stack->list, &stack_cache);
 225
 226   stack_cache_actsize += stack->stackblock_size;
 227   if (__builtin_expect (stack_cache_actsize > stack_cache_maxsize, 0))
 228     {
 229       /* We reduce the size of the cache.  Remove the last entries
 230          until the size is below the limit.  */
 231       list_t *entry;
 232       list_t *prev;
 233
 234       /* Search from the end of the list.  */
 235       list_for_each_prev_safe (entry, prev, &stack_cache)
 236         {
 237           struct pthread *curr;
 238
 239           curr = list_entry (entry, struct pthread, list);
 240           if (FREE_P (curr))
 241             {
 242               /* Unlink the block.  */
 243               list_del (entry);
 244
 245               /* Account for the freed memory.  */
 246               stack_cache_actsize -= curr->stackblock_size;
 247
 248               /* Free the memory associated with the ELF TLS.  */
 249               _dl_deallocate_tls (TLS_TPADJ (curr), false);
 250
 251               /* Remove this block.  This should never fail.  If it
 252                  does something is really wrong.  */
 253               if (munmap (curr->stackblock, curr->stackblock_size) != 0)
 254                 abort ();
 255
 256               /* Maybe we have freed enough.  */
 257               if (stack_cache_actsize <= stack_cache_maxsize)
 258                 break;
 259             }
 260         }
 261     }
 262 }
 263
 264
 265 static int
 266 internal_function
 267 change_stack_perm (struct pthread *pd
 268 #ifdef NEED_SEPARATE_REGISTER_STACK
 269                    , size_t pagemask
 270 #endif
 271                    )
 272 {
 273 #ifdef NEED_SEPARATE_REGISTER_STACK
 274   void *stack = (pd->stackblock
 275                  + (((((pd->stackblock_size - pd->guardsize) / 2)
 276                       & pagemask) + pd->guardsize) & pagemask));
 277   size_t len = pd->stackblock + pd->stackblock_size - stack;
 278 #else
 279   void *stack = pd->stackblock + pd->guardsize;
 280   size_t len = pd->stackblock_size - pd->guardsize;
 281 #endif
 282   if (mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
 283     return errno;
 284
 285   return 0;
 286 }
 287
 288
 289 static int
 290 allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 291                 ALLOCATE_STACK_PARMS)
 292 {
 293   struct pthread *pd;
 294   size_t size;
 295   size_t pagesize_m1 = __getpagesize () - 1;
 296   void *stacktop;
 297
 298   assert (attr != NULL);
 299   assert (powerof2 (pagesize_m1 + 1));
 300   assert (TCB_ALIGNMENT >= STACK_ALIGN);
 301
 302   /* Get the stack size from the attribute if it is set.  Otherwise we
 303      use the default we determined at start time.  */
 304   size = attr->stacksize ?: __default_stacksize;
 305
 306   /* Get memory for the stack.  */
 307   if (__builtin_expect (attr->flags & ATTR_FLAG_STACKADDR, 0))
 308     {
 309       uintptr_t adj;
 310
 311       /* If the user also specified the size of the stack make sure it
 312          is large enough.  */
 313       if (attr->stacksize != 0
 314           && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
 315         return EINVAL;
 316
 317       /* Adjust stack size for alignment of the TLS block.  */
 318 #if TLS_TCB_AT_TP
 319       adj = ((uintptr_t) attr->stackaddr - TLS_TCB_SIZE)
 320             & __static_tls_align_m1;
 321       assert (size > adj + TLS_TCB_SIZE);
 322 #elif TLS_DTV_AT_TP
 323       adj = ((uintptr_t) attr->stackaddr - __static_tls_size)
 324             & __static_tls_align_m1;
 325       assert (size > adj);
 326 #endif
 327
 328       /* The user provided some memory.  Let's hope it matches the
 329          size...  We do not allocate guard pages if the user provided
 330          the stack.  It is the user's responsibility to do this if it
 331          is wanted.  */
 332 #if TLS_TCB_AT_TP
 333       pd = (struct pthread *) ((uintptr_t) attr->stackaddr
 334                                - TLS_TCB_SIZE - adj);
 335 #elif TLS_DTV_AT_TP
 336       pd = (struct pthread *) (((uintptr_t) attr->stackaddr
 337                                 - __static_tls_size - adj)
 338                                - TLS_PRE_TCB_SIZE);
 339 #endif
 340
 341       /* The user provided stack memory needs to be cleared.  */
 342       memset (pd, '\0', sizeof (struct pthread));
 343
 344       /* The first TSD block is included in the TCB.  */
 345       pd->specific[0] = pd->specific_1stblock;
 346
 347       /* Remember the stack-related values.  */
 348       pd->stackblock = (char *) attr->stackaddr - size;
 349       pd->stackblock_size = size;
 350
 351       /* This is a user-provided stack.  It will not be queued in the
 352          stack cache nor will the memory (except the TLS memory) be freed.  */
 353       pd->user_stack = true;
 354
 355       /* This is at least the second thread.  */
 356       pd->header.multiple_threads = 1;
 357 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
 358       __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
 359 #endif
 360
 361 #ifdef NEED_DL_SYSINFO
 362       /* Copy the sysinfo value from the parent.  */
 363       THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
 364 #endif
 365
 366       /* The process ID is also the same as that of the caller.  */
 367       pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
 368
 369       /* Allocate the DTV for this thread.  */
 370       if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
 371         {
 372           /* Something went wrong.  */
 373           assert (errno == ENOMEM);
 374           return EAGAIN;
 375         }
 376
 377
 378       /* Prepare to modify global data.  */
 379       lll_lock (stack_cache_lock);
 380
 381       /* And add to the list of stacks in use.  */
 382       list_add (&pd->list, &__stack_user);
 383
 384       lll_unlock (stack_cache_lock);
 385     }
 386   else
 387     {
 388       /* Allocate some anonymous memory.  If possible use the cache.  */
 389       size_t guardsize;
 390       size_t reqsize;
 391       void *mem;
 392       const int prot = (PROT_READ | PROT_WRITE
 393                         | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
 394
 395 #if COLORING_INCREMENT != 0
 396       /* Add one more page for stack coloring.  Don't do it for stacks
 397          with 16 times pagesize or larger.  This might just cause
 398          unnecessary misalignment.  */
 399       if (size <= 16 * pagesize_m1)
 400         size += pagesize_m1 + 1;
 401 #endif
 402
 403       /* Adjust the stack size for alignment.  */
 404       size &= ~__static_tls_align_m1;
 405       assert (size != 0);
 406
 407       /* Make sure the size of the stack is enough for the guard and
 408          eventually the thread descriptor.  */
 409       guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
 410       if (__builtin_expect (size < (guardsize + __static_tls_size
 411                                     + MINIMAL_REST_STACK + pagesize_m1 + 1),
 412                             0))
 413         /* The stack is too small (or the guard too large).  */
 414         return EINVAL;
 415
 416       /* Try to get a stack from the cache.  */
 417       reqsize = size;
 418       pd = get_cached_stack (&size, &mem);
 419       if (pd == NULL)
 420         {
 421           /* To avoid aliasing effects on a larger scale than pages we
 422              adjust the allocated stack size if necessary.  This way
 423              allocations directly following each other will not have
 424              aliasing problems.  */
 425 #if MULTI_PAGE_ALIASING != 0
 426           if ((size % MULTI_PAGE_ALIASING) == 0)
 427             size += pagesize_m1 + 1;
 428 #endif
 429
 430           mem = mmap (NULL, size, prot,
 431                       MAP_PRIVATE | MAP_ANONYMOUS | ARCH_MAP_FLAGS, -1, 0);
 432
 433           if (__builtin_expect (mem == MAP_FAILED, 0))
 434             {
 435 #ifdef ARCH_RETRY_MMAP
 436               mem = ARCH_RETRY_MMAP (size);
 437               if (__builtin_expect (mem == MAP_FAILED, 0))
 438 #endif
 439                 return errno;
 440             }
 441
 442           /* SIZE is guaranteed to be greater than zero.
 443              So we can never get a null pointer back from mmap.  */
 444           assert (mem != NULL);
 445
 446 #if COLORING_INCREMENT != 0
 447           /* Atomically increment NCREATED.  */
 448           unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
 449
 450           /* We chose the offset for coloring by incrementing it for
 451              every new thread by a fixed amount.  The offset used
 452              module the page size.  Even if coloring would be better
 453              relative to higher alignment values it makes no sense to
 454              do it since the mmap() interface does not allow us to
 455              specify any alignment for the returned memory block.  */
 456           size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
 457
 458           /* Make sure the coloring offsets does not disturb the alignment
 459              of the TCB and static TLS block.  */
 460           if (__builtin_expect ((coloring & __static_tls_align_m1) != 0, 0))
 461             coloring = (((coloring + __static_tls_align_m1)
 462                          & ~(__static_tls_align_m1))
 463                         & ~pagesize_m1);
 464 #else
 465           /* Unless specified we do not make any adjustments.  */
 466 # define coloring 0
 467 #endif
 468
 469           /* Place the thread descriptor at the end of the stack.  */
 470 #if TLS_TCB_AT_TP
 471           pd = (struct pthread *) ((char *) mem + size - coloring) - 1;
 472 #elif TLS_DTV_AT_TP
 473           pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
 474                                     - __static_tls_size)
 475                                     & ~__static_tls_align_m1)
 476                                    - TLS_PRE_TCB_SIZE);
 477 #endif
 478
 479           /* Remember the stack-related values.  */
 480           pd->stackblock = mem;
 481           pd->stackblock_size = size;
 482
 483           /* We allocated the first block thread-specific data array.
 484              This address will not change for the lifetime of this
 485              descriptor.  */
 486           pd->specific[0] = pd->specific_1stblock;
 487
 488           /* This is at least the second thread.  */
 489           pd->header.multiple_threads = 1;
 490 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
 491           __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
 492 #endif
 493
 494 #ifdef NEED_DL_SYSINFO
 495           /* Copy the sysinfo value from the parent.  */
 496           THREAD_SYSINFO(pd) = THREAD_SELF_SYSINFO;
 497 #endif
 498
 499           /* The process ID is also the same as that of the caller.  */
 500           pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
 501
 502           /* Allocate the DTV for this thread.  */
 503           if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
 504             {
 505               /* Something went wrong.  */
 506               assert (errno == ENOMEM);
 507
 508               /* Free the stack memory we just allocated.  */
 509               (void) munmap (mem, size);
 510
 511               return EAGAIN;
 512             }
 513
 514
 515           /* Prepare to modify global data.  */
 516           lll_lock (stack_cache_lock);
 517
 518           /* And add to the list of stacks in use.  */
 519           list_add (&pd->list, &stack_used);
 520
 521           lll_unlock (stack_cache_lock);
 522
 523
 524           /* There might have been a race.  Another thread might have
 525              caused the stacks to get exec permission while this new
 526              stack was prepared.  Detect if this was possible and
 527              change the permission if necessary.  */
 528           if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0
 529                                 && (prot & PROT_EXEC) == 0, 0))
 530             {
 531               int err = change_stack_perm (pd
 532 #ifdef NEED_SEPARATE_REGISTER_STACK
 533                                            , ~pagesize_m1
 534 #endif
 535                                            );
 536               if (err != 0)
 537                 {
 538                   /* Free the stack memory we just allocated.  */
 539                   (void) munmap (mem, size);
 540
 541                   return err;
 542                 }
 543             }
 544
 545
 546           /* Note that all of the stack and the thread descriptor is
 547              zeroed.  This means we do not have to initialize fields
 548              with initial value zero.  This is specifically true for
 549              the 'tid' field which is always set back to zero once the
 550              stack is not used anymore and for the 'guardsize' field
 551              which will be read next.  */
 552         }
 553
 554       /* Create or resize the guard area if necessary.  */
 555       if (__builtin_expect (guardsize > pd->guardsize, 0))
 556         {
 557 #ifdef NEED_SEPARATE_REGISTER_STACK
 558           char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
 559 #else
 560           char *guard = mem;
 561 #endif
 562           if (mprotect (guard, guardsize, PROT_NONE) != 0)
 563             {
 564               int err;
 565             mprot_error:
 566               err = errno;
 567
 568               lll_lock (stack_cache_lock);
 569
 570               /* Remove the thread from the list.  */
 571               list_del (&pd->list);
 572
 573               lll_unlock (stack_cache_lock);
 574
 575               /* Get rid of the TLS block we allocated.  */
 576               _dl_deallocate_tls (TLS_TPADJ (pd), false);
 577
 578               /* Free the stack memory regardless of whether the size
 579                  of the cache is over the limit or not.  If this piece
 580                  of memory caused problems we better do not use it
 581                  anymore.  Uh, and we ignore possible errors.  There
 582                  is nothing we could do.  */
 583               (void) munmap (mem, size);
 584
 585               return err;
 586             }
 587
 588           pd->guardsize = guardsize;
 589         }
 590       else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
 591                                  0))
 592         {
 593           /* The old guard area is too large.  */
 594
 595 #ifdef NEED_SEPARATE_REGISTER_STACK
 596           char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
 597           char *oldguard = mem + (((size - pd->guardsize) / 2) & ~pagesize_m1);
 598
 599           if (oldguard < guard
 600               && mprotect (oldguard, guard - oldguard, prot) != 0)
 601             goto mprot_error;
 602
 603           if (mprotect (guard + guardsize,
 604                         oldguard + pd->guardsize - guard - guardsize,
 605                         prot) != 0)
 606             goto mprot_error;
 607 #else
 608           if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
 609                         prot) != 0)
 610             goto mprot_error;
 611 #endif
 612
 613           pd->guardsize = guardsize;
 614         }
 615       /* The pthread_getattr_np() calls need to get passed the size
 616          requested in the attribute, regardless of how large the
 617          actually used guardsize is.  */
 618       pd->reported_guardsize = guardsize;
 619     }
 620
 621   /* Initialize the lock.  We have to do this unconditionally since the
 622      stillborn thread could be canceled while the lock is taken.  */
 623   pd->lock = LLL_LOCK_INITIALIZER;
 624
 625   /* We place the thread descriptor at the end of the stack.  */
 626   *pdp = pd;
 627
 628 #if TLS_TCB_AT_TP
 629   /* The stack begins before the TCB and the static TLS block.  */
 630   stacktop = ((char *) (pd + 1) - __static_tls_size);
 631 #elif TLS_DTV_AT_TP
 632   stacktop = (char *) (pd - 1);
 633 #endif
 634
 635 #ifdef NEED_SEPARATE_REGISTER_STACK
 636   *stack = pd->stackblock;
 637   *stacksize = stacktop - *stack;
 638 #else
 639   *stack = stacktop;
 640 #endif
 641
 642   return 0;
 643 }
 644
 645
 646 void
 647 internal_function
 648 __deallocate_stack (struct pthread *pd)
 649 {
 650   lll_lock (stack_cache_lock);
 651
 652   /* Remove the thread from the list of threads with user defined
 653      stacks.  */
 654   list_del (&pd->list);
 655
 656   /* Not much to do.  Just free the mmap()ed memory.  Note that we do
 657      not reset the 'used' flag in the 'tid' field.  This is done by
 658      the kernel.  If no thread has been created yet this field is
 659      still zero.  */
 660   if (__builtin_expect (! pd->user_stack, 1))
 661     (void) queue_stack (pd);
 662   else
 663     /* Free the memory associated with the ELF TLS.  */
 664     _dl_deallocate_tls (TLS_TPADJ (pd), false);
 665
 666   lll_unlock (stack_cache_lock);
 667 }
 668
 669
 670 int
 671 internal_function
 672 __make_stacks_executable (void **stack_endp)
 673 {
 674   /* First the main thread's stack.  */
 675   int err = _dl_make_stack_executable (stack_endp);
 676   if (err != 0)
 677     return err;
 678
 679 #ifdef NEED_SEPARATE_REGISTER_STACK
 680   const size_t pagemask = ~(__getpagesize () - 1);
 681 #endif
 682
 683   lll_lock (stack_cache_lock);
 684
 685   list_t *runp;
 686   list_for_each (runp, &stack_used)
 687     {
 688       err = change_stack_perm (list_entry (runp, struct pthread, list)
 689 #ifdef NEED_SEPARATE_REGISTER_STACK
 690                                , pagemask
 691 #endif
 692                                );
 693       if (err != 0)
 694         break;
 695     }
 696
 697   /* Also change the permission for the currently unused stacks.  This
 698      might be wasted time but better spend it here than adding a check
 699      in the fast path.  */
 700   if (err == 0)
 701     list_for_each (runp, &stack_cache)
 702       {
 703         err = change_stack_perm (list_entry (runp, struct pthread, list)
 704 #ifdef NEED_SEPARATE_REGISTER_STACK
 705                                  , pagemask
 706 #endif
 707                                  );
 708         if (err != 0)
 709           break;
 710       }
 711
 712   lll_unlock (stack_cache_lock);
 713
 714   return err;
 715 }
 716
 717
 718 /* In case of a fork() call the memory allocation in the child will be
 719    the same but only one thread is running.  All stacks except that of
 720    the one running thread are not used anymore.  We have to recycle
 721    them.  */
 722 void
 723 __reclaim_stacks (void)
 724 {
 725   struct pthread *self = (struct pthread *) THREAD_SELF;
 726
 727   /* No locking necessary.  The caller is the only stack in use.  */
 728
 729   /* Mark all stacks except the still running one as free.  */
 730   list_t *runp;
 731   list_for_each (runp, &stack_used)
 732     {
 733       struct pthread *curp;
 734
 735       curp = list_entry (runp, struct pthread, list);
 736       if (curp != self)
 737         {
 738           /* This marks the stack as free.  */
 739           curp->tid = 0;
 740
 741           /* The PID field must be initialized for the new process.  */
 742           curp->pid = self->pid;
 743
 744           /* Account for the size of the stack.  */
 745           stack_cache_actsize += curp->stackblock_size;
 746         }
 747     }
 748
 749   /* Add the stack of all running threads to the cache.  */
 750   list_splice (&stack_used, &stack_cache);
 751
 752   /* Remove the entry for the current thread to from the cache list
 753      and add it to the list of running threads.  Which of the two
 754      lists is decided by the user_stack flag.  */
 755   list_del (&self->list);
 756
 757   /* Re-initialize the lists for all the threads.  */
 758   INIT_LIST_HEAD (&stack_used);
 759   INIT_LIST_HEAD (&__stack_user);
 760
 761   if (__builtin_expect (THREAD_GETMEM (self, user_stack), 0))
 762     list_add (&self->list, &__stack_user);
 763   else
 764     list_add (&self->list, &stack_used);
 765
 766   /* There is one thread running.  */
 767   __nptl_nthreads = 1;
 768
 769   /* Initialize the lock.  */
 770   stack_cache_lock = LLL_LOCK_INITIALIZER;
 771 }
 772
 773
 774 #if HP_TIMING_AVAIL
 775 # undef __find_thread_by_id
 776 /* Find a thread given the thread ID.  */
 777 attribute_hidden
 778 struct pthread *
 779 __find_thread_by_id (pid_t tid)
 780 {
 781   struct pthread *result = NULL;
 782
 783   lll_lock (stack_cache_lock);
 784
 785   /* Iterate over the list with system-allocated threads first.  */
 786   list_t *runp;
 787   list_for_each (runp, &stack_used)
 788     {
 789       struct pthread *curp;
 790
 791       curp = list_entry (runp, struct pthread, list);
 792
 793       if (curp->tid == tid)
 794         {
 795           result = curp;
 796           goto out;
 797         }
 798     }
 799
 800   /* Now the list with threads using user-allocated stacks.  */
 801   list_for_each (runp, &__stack_user)
 802     {
 803       struct pthread *curp;
 804
 805       curp = list_entry (runp, struct pthread, list);
 806
 807       if (curp->tid == tid)
 808         {
 809           result = curp;
 810           goto out;
 811         }
 812     }
 813
 814  out:
 815   lll_unlock (stack_cache_lock);
 816
 817   return result;
 818 }
 819 #endif
 820
 821
 822 static void
 823 internal_function
 824 setxid_signal_thread (struct xid_command *cmdp, struct pthread *t)
 825 {
 826   if (! IS_DETACHED (t))
 827     {
 828       int ch;
 829       do
 830         {
 831           ch = t->cancelhandling;
 832
 833           /* If the thread is exiting right now, ignore it.  */
 834           if ((ch & EXITING_BITMASK) != 0)
 835             return;
 836         }
 837       while (atomic_compare_and_exchange_val_acq (&t->cancelhandling,
 838                                                   ch | SETXID_BITMASK, ch));
 839     }
 840
 841   int val;
 842 #if __ASSUME_TGKILL
 843   val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid),
 844                           t->tid, SIGSETXID);
 845 #else
 846 # ifdef __NR_tgkill
 847   val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid),
 848                           t->tid, SIGSETXID);
 849   if (INTERNAL_SYSCALL_ERROR_P (val, err)
 850       && INTERNAL_SYSCALL_ERRNO (val, err) == ENOSYS)
 851 # endif
 852     val = INTERNAL_SYSCALL (tkill, err, 2, t->tid, SIGSETXID);
 853 #endif
 854
 855   if (!INTERNAL_SYSCALL_ERROR_P (val, err))
 856     atomic_increment (&cmdp->cntr);
 857 }
 858
 859
 860 int
 861 attribute_hidden
 862 __nptl_setxid (struct xid_command *cmdp)
 863 {
 864   int result;
 865   lll_lock (stack_cache_lock);
 866
 867   __xidcmd = cmdp;
 868   cmdp->cntr = 0;
 869
 870   INTERNAL_SYSCALL_DECL (err);
 871
 872   struct pthread *self = THREAD_SELF;
 873
 874   /* Iterate over the list with system-allocated threads first.  */
 875   list_t *runp;
 876   list_for_each (runp, &stack_used)
 877     {
 878       struct pthread *t = list_entry (runp, struct pthread, list);
 879       if (t == self)
 880         continue;
 881
 882       setxid_signal_thread (cmdp, t);
 883     }
 884
 885   /* Now the list with threads using user-allocated stacks.  */
 886   list_for_each (runp, &__stack_user)
 887     {
 888       struct pthread *t = list_entry (runp, struct pthread, list);
 889       if (t == self)
 890         continue;
 891
 892       setxid_signal_thread (cmdp, t);
 893     }
 894
 895   int cur = cmdp->cntr;
 896   while (cur != 0)
 897     {
 898       lll_futex_wait (&cmdp->cntr, cur);
 899       cur = cmdp->cntr;
 900     }
 901
 902   /* This must be last, otherwise the current thread might not have
 903      permissions to send SIGSETXID syscall to the other threads.  */
 904   result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, 3,
 905                                  cmdp->id[0], cmdp->id[1], cmdp->id[2]);
 906   if (INTERNAL_SYSCALL_ERROR_P (result, err))
 907     {
 908       __set_errno (INTERNAL_SYSCALL_ERRNO (result, err));
 909       result = -1;
 910     }
 911
 912   lll_unlock (stack_cache_lock);
 913   return result;
 914 }
 915
 916 static inline void __attribute__((always_inline))
 917 init_one_static_tls (struct pthread *curp, struct link_map *map)
 918 {
 919   dtv_t *dtv = GET_DTV (TLS_TPADJ (curp));
 920 # if TLS_TCB_AT_TP
 921   void *dest = (char *) curp - map->l_tls_offset;
 922 # elif TLS_DTV_AT_TP
 923   void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
 924 # else
 925 #  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
 926 # endif
 927
 928   /* Fill in the DTV slot so that a later LD/GD access will find it.  */
 929   dtv[map->l_tls_modid].pointer.val = dest;
 930   dtv[map->l_tls_modid].pointer.is_static = true;
 931
 932   /* Initialize the memory.  */
 933   memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
 934           '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
 935 }
 936
 937 void
 938 attribute_hidden
 939 __pthread_init_static_tls (struct link_map *map)
 940 {
 941   lll_lock (stack_cache_lock);
 942
 943   /* Iterate over the list with system-allocated threads first.  */
 944   list_t *runp;
 945   list_for_each (runp, &stack_used)
 946     init_one_static_tls (list_entry (runp, struct pthread, list), map);
 947
 948   /* Now the list with threads using user-allocated stacks.  */
 949   list_for_each (runp, &__stack_user)
 950     init_one_static_tls (list_entry (runp, struct pthread, list), map);
 951
 952   lll_unlock (stack_cache_lock);
 953 }