]> git.ipfire.org Git - thirdparty/glibc.git/blob - nptl/allocatestack.c
nptl: support thread stacks that grow up
[thirdparty/glibc.git] / nptl / allocatestack.c
1 /* Copyright (C) 2002-2016 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18
19 #include <assert.h>
20 #include <errno.h>
21 #include <signal.h>
22 #include <stdint.h>
23 #include <string.h>
24 #include <unistd.h>
25 #include <sys/mman.h>
26 #include <sys/param.h>
27 #include <dl-sysdep.h>
28 #include <dl-tls.h>
29 #include <tls.h>
30 #include <list.h>
31 #include <lowlevellock.h>
32 #include <futex-internal.h>
33 #include <kernel-features.h>
34 #include <stack-aliasing.h>
35
36
37 #ifndef NEED_SEPARATE_REGISTER_STACK
38
39 /* Most architectures have exactly one stack pointer. Some have more. */
40 # define STACK_VARIABLES void *stackaddr = NULL
41
42 /* How to pass the values to the 'create_thread' function. */
43 # define STACK_VARIABLES_ARGS stackaddr
44
45 /* How to declare function which gets there parameters. */
46 # define STACK_VARIABLES_PARMS void *stackaddr
47
48 /* How to declare allocate_stack. */
49 # define ALLOCATE_STACK_PARMS void **stack
50
51 /* This is how the function is called. We do it this way to allow
52 other variants of the function to have more parameters. */
53 # define ALLOCATE_STACK(attr, pd) allocate_stack (attr, pd, &stackaddr)
54
55 #else
56
57 /* We need two stacks. The kernel will place them but we have to tell
58 the kernel about the size of the reserved address space. */
59 # define STACK_VARIABLES void *stackaddr = NULL; size_t stacksize = 0
60
61 /* How to pass the values to the 'create_thread' function. */
62 # define STACK_VARIABLES_ARGS stackaddr, stacksize
63
64 /* How to declare function which gets there parameters. */
65 # define STACK_VARIABLES_PARMS void *stackaddr, size_t stacksize
66
67 /* How to declare allocate_stack. */
68 # define ALLOCATE_STACK_PARMS void **stack, size_t *stacksize
69
70 /* This is how the function is called. We do it this way to allow
71 other variants of the function to have more parameters. */
72 # define ALLOCATE_STACK(attr, pd) \
73 allocate_stack (attr, pd, &stackaddr, &stacksize)
74
75 #endif
76
77
78 /* Default alignment of stack. */
79 #ifndef STACK_ALIGN
80 # define STACK_ALIGN __alignof__ (long double)
81 #endif
82
83 /* Default value for minimal stack size after allocating thread
84 descriptor and guard. */
85 #ifndef MINIMAL_REST_STACK
86 # define MINIMAL_REST_STACK 4096
87 #endif
88
89
90 /* Newer kernels have the MAP_STACK flag to indicate a mapping is used for
91 a stack. Use it when possible. */
92 #ifndef MAP_STACK
93 # define MAP_STACK 0
94 #endif
95
96 /* This yields the pointer that TLS support code calls the thread pointer. */
97 #if TLS_TCB_AT_TP
98 # define TLS_TPADJ(pd) (pd)
99 #elif TLS_DTV_AT_TP
100 # define TLS_TPADJ(pd) ((struct pthread *)((char *) (pd) + TLS_PRE_TCB_SIZE))
101 #endif
102
103 /* Cache handling for not-yet free stacks. */
104
105 /* Maximum size in kB of cache. */
106 static size_t stack_cache_maxsize = 40 * 1024 * 1024; /* 40MiBi by default. */
107 static size_t stack_cache_actsize;
108
109 /* Mutex protecting this variable. */
110 static int stack_cache_lock = LLL_LOCK_INITIALIZER;
111
112 /* List of queued stack frames. */
113 static LIST_HEAD (stack_cache);
114
115 /* List of the stacks in use. */
116 static LIST_HEAD (stack_used);
117
118 /* We need to record what list operations we are going to do so that,
119 in case of an asynchronous interruption due to a fork() call, we
120 can correct for the work. */
121 static uintptr_t in_flight_stack;
122
123 /* List of the threads with user provided stacks in use. No need to
124 initialize this, since it's done in __pthread_initialize_minimal. */
125 list_t __stack_user __attribute__ ((nocommon));
126 hidden_data_def (__stack_user)
127
128 #if COLORING_INCREMENT != 0
129 /* Number of threads created. */
130 static unsigned int nptl_ncreated;
131 #endif
132
133
134 /* Check whether the stack is still used or not. */
135 #define FREE_P(descr) ((descr)->tid <= 0)
136
137
138 static void
139 stack_list_del (list_t *elem)
140 {
141 in_flight_stack = (uintptr_t) elem;
142
143 atomic_write_barrier ();
144
145 list_del (elem);
146
147 atomic_write_barrier ();
148
149 in_flight_stack = 0;
150 }
151
152
153 static void
154 stack_list_add (list_t *elem, list_t *list)
155 {
156 in_flight_stack = (uintptr_t) elem | 1;
157
158 atomic_write_barrier ();
159
160 list_add (elem, list);
161
162 atomic_write_barrier ();
163
164 in_flight_stack = 0;
165 }
166
167
168 /* We create a double linked list of all cache entries. Double linked
169 because this allows removing entries from the end. */
170
171
172 /* Get a stack frame from the cache. We have to match by size since
173 some blocks might be too small or far too large. */
174 static struct pthread *
175 get_cached_stack (size_t *sizep, void **memp)
176 {
177 size_t size = *sizep;
178 struct pthread *result = NULL;
179 list_t *entry;
180
181 lll_lock (stack_cache_lock, LLL_PRIVATE);
182
183 /* Search the cache for a matching entry. We search for the
184 smallest stack which has at least the required size. Note that
185 in normal situations the size of all allocated stacks is the
186 same. As the very least there are only a few different sizes.
187 Therefore this loop will exit early most of the time with an
188 exact match. */
189 list_for_each (entry, &stack_cache)
190 {
191 struct pthread *curr;
192
193 curr = list_entry (entry, struct pthread, list);
194 if (FREE_P (curr) && curr->stackblock_size >= size)
195 {
196 if (curr->stackblock_size == size)
197 {
198 result = curr;
199 break;
200 }
201
202 if (result == NULL
203 || result->stackblock_size > curr->stackblock_size)
204 result = curr;
205 }
206 }
207
208 if (__builtin_expect (result == NULL, 0)
209 /* Make sure the size difference is not too excessive. In that
210 case we do not use the block. */
211 || __builtin_expect (result->stackblock_size > 4 * size, 0))
212 {
213 /* Release the lock. */
214 lll_unlock (stack_cache_lock, LLL_PRIVATE);
215
216 return NULL;
217 }
218
219 /* Don't allow setxid until cloned. */
220 result->setxid_futex = -1;
221
222 /* Dequeue the entry. */
223 stack_list_del (&result->list);
224
225 /* And add to the list of stacks in use. */
226 stack_list_add (&result->list, &stack_used);
227
228 /* And decrease the cache size. */
229 stack_cache_actsize -= result->stackblock_size;
230
231 /* Release the lock early. */
232 lll_unlock (stack_cache_lock, LLL_PRIVATE);
233
234 /* Report size and location of the stack to the caller. */
235 *sizep = result->stackblock_size;
236 *memp = result->stackblock;
237
238 /* Cancellation handling is back to the default. */
239 result->cancelhandling = 0;
240 result->cleanup = NULL;
241
242 /* No pending event. */
243 result->nextevent = NULL;
244
245 /* Clear the DTV. */
246 dtv_t *dtv = GET_DTV (TLS_TPADJ (result));
247 for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
248 if (! dtv[1 + cnt].pointer.is_static
249 && dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
250 free (dtv[1 + cnt].pointer.val);
251 memset (dtv, '\0', (dtv[-1].counter + 1) * sizeof (dtv_t));
252
253 /* Re-initialize the TLS. */
254 _dl_allocate_tls_init (TLS_TPADJ (result));
255
256 return result;
257 }
258
259
260 /* Free stacks until cache size is lower than LIMIT. */
261 void
262 __free_stacks (size_t limit)
263 {
264 /* We reduce the size of the cache. Remove the last entries until
265 the size is below the limit. */
266 list_t *entry;
267 list_t *prev;
268
269 /* Search from the end of the list. */
270 list_for_each_prev_safe (entry, prev, &stack_cache)
271 {
272 struct pthread *curr;
273
274 curr = list_entry (entry, struct pthread, list);
275 if (FREE_P (curr))
276 {
277 /* Unlink the block. */
278 stack_list_del (entry);
279
280 /* Account for the freed memory. */
281 stack_cache_actsize -= curr->stackblock_size;
282
283 /* Free the memory associated with the ELF TLS. */
284 _dl_deallocate_tls (TLS_TPADJ (curr), false);
285
286 /* Remove this block. This should never fail. If it does
287 something is really wrong. */
288 if (munmap (curr->stackblock, curr->stackblock_size) != 0)
289 abort ();
290
291 /* Maybe we have freed enough. */
292 if (stack_cache_actsize <= limit)
293 break;
294 }
295 }
296 }
297
298
299 /* Add a stack frame which is not used anymore to the stack. Must be
300 called with the cache lock held. */
301 static inline void
302 __attribute ((always_inline))
303 queue_stack (struct pthread *stack)
304 {
305 /* We unconditionally add the stack to the list. The memory may
306 still be in use but it will not be reused until the kernel marks
307 the stack as not used anymore. */
308 stack_list_add (&stack->list, &stack_cache);
309
310 stack_cache_actsize += stack->stackblock_size;
311 if (__glibc_unlikely (stack_cache_actsize > stack_cache_maxsize))
312 __free_stacks (stack_cache_maxsize);
313 }
314
315
316 static int
317 internal_function
318 change_stack_perm (struct pthread *pd
319 #ifdef NEED_SEPARATE_REGISTER_STACK
320 , size_t pagemask
321 #endif
322 )
323 {
324 #ifdef NEED_SEPARATE_REGISTER_STACK
325 void *stack = (pd->stackblock
326 + (((((pd->stackblock_size - pd->guardsize) / 2)
327 & pagemask) + pd->guardsize) & pagemask));
328 size_t len = pd->stackblock + pd->stackblock_size - stack;
329 #elif _STACK_GROWS_DOWN
330 void *stack = pd->stackblock + pd->guardsize;
331 size_t len = pd->stackblock_size - pd->guardsize;
332 #elif _STACK_GROWS_UP
333 void *stack = pd->stackblock;
334 size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
335 #else
336 # error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
337 #endif
338 if (mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
339 return errno;
340
341 return 0;
342 }
343
344
345 /* Returns a usable stack for a new thread either by allocating a
346 new stack or reusing a cached stack of sufficient size.
347 ATTR must be non-NULL and point to a valid pthread_attr.
348 PDP must be non-NULL. */
349 static int
350 allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
351 ALLOCATE_STACK_PARMS)
352 {
353 struct pthread *pd;
354 size_t size;
355 size_t pagesize_m1 = __getpagesize () - 1;
356
357 assert (powerof2 (pagesize_m1 + 1));
358 assert (TCB_ALIGNMENT >= STACK_ALIGN);
359
360 /* Get the stack size from the attribute if it is set. Otherwise we
361 use the default we determined at start time. */
362 if (attr->stacksize != 0)
363 size = attr->stacksize;
364 else
365 {
366 lll_lock (__default_pthread_attr_lock, LLL_PRIVATE);
367 size = __default_pthread_attr.stacksize;
368 lll_unlock (__default_pthread_attr_lock, LLL_PRIVATE);
369 }
370
371 /* Get memory for the stack. */
372 if (__glibc_unlikely (attr->flags & ATTR_FLAG_STACKADDR))
373 {
374 uintptr_t adj;
375 char *stackaddr = (char *) attr->stackaddr;
376
377 /* Assume the same layout as the _STACK_GROWS_DOWN case, with struct
378 pthread at the top of the stack block. Later we adjust the guard
379 location and stack address to match the _STACK_GROWS_UP case. */
380 if (_STACK_GROWS_UP)
381 stackaddr += attr->stacksize;
382
383 /* If the user also specified the size of the stack make sure it
384 is large enough. */
385 if (attr->stacksize != 0
386 && attr->stacksize < (__static_tls_size + MINIMAL_REST_STACK))
387 return EINVAL;
388
389 /* Adjust stack size for alignment of the TLS block. */
390 #if TLS_TCB_AT_TP
391 adj = ((uintptr_t) stackaddr - TLS_TCB_SIZE)
392 & __static_tls_align_m1;
393 assert (size > adj + TLS_TCB_SIZE);
394 #elif TLS_DTV_AT_TP
395 adj = ((uintptr_t) stackaddr - __static_tls_size)
396 & __static_tls_align_m1;
397 assert (size > adj);
398 #endif
399
400 /* The user provided some memory. Let's hope it matches the
401 size... We do not allocate guard pages if the user provided
402 the stack. It is the user's responsibility to do this if it
403 is wanted. */
404 #if TLS_TCB_AT_TP
405 pd = (struct pthread *) ((uintptr_t) stackaddr
406 - TLS_TCB_SIZE - adj);
407 #elif TLS_DTV_AT_TP
408 pd = (struct pthread *) (((uintptr_t) stackaddr
409 - __static_tls_size - adj)
410 - TLS_PRE_TCB_SIZE);
411 #endif
412
413 /* The user provided stack memory needs to be cleared. */
414 memset (pd, '\0', sizeof (struct pthread));
415
416 /* The first TSD block is included in the TCB. */
417 pd->specific[0] = pd->specific_1stblock;
418
419 /* Remember the stack-related values. */
420 pd->stackblock = (char *) stackaddr - size;
421 pd->stackblock_size = size;
422
423 /* This is a user-provided stack. It will not be queued in the
424 stack cache nor will the memory (except the TLS memory) be freed. */
425 pd->user_stack = true;
426
427 /* This is at least the second thread. */
428 pd->header.multiple_threads = 1;
429 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
430 __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
431 #endif
432
433 #ifndef __ASSUME_PRIVATE_FUTEX
434 /* The thread must know when private futexes are supported. */
435 pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
436 header.private_futex);
437 #endif
438
439 #ifdef NEED_DL_SYSINFO
440 SETUP_THREAD_SYSINFO (pd);
441 #endif
442
443 /* The process ID is also the same as that of the caller. */
444 pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
445
446 /* Don't allow setxid until cloned. */
447 pd->setxid_futex = -1;
448
449 /* Allocate the DTV for this thread. */
450 if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
451 {
452 /* Something went wrong. */
453 assert (errno == ENOMEM);
454 return errno;
455 }
456
457
458 /* Prepare to modify global data. */
459 lll_lock (stack_cache_lock, LLL_PRIVATE);
460
461 /* And add to the list of stacks in use. */
462 list_add (&pd->list, &__stack_user);
463
464 lll_unlock (stack_cache_lock, LLL_PRIVATE);
465 }
466 else
467 {
468 /* Allocate some anonymous memory. If possible use the cache. */
469 size_t guardsize;
470 size_t reqsize;
471 void *mem;
472 const int prot = (PROT_READ | PROT_WRITE
473 | ((GL(dl_stack_flags) & PF_X) ? PROT_EXEC : 0));
474
475 #if COLORING_INCREMENT != 0
476 /* Add one more page for stack coloring. Don't do it for stacks
477 with 16 times pagesize or larger. This might just cause
478 unnecessary misalignment. */
479 if (size <= 16 * pagesize_m1)
480 size += pagesize_m1 + 1;
481 #endif
482
483 /* Adjust the stack size for alignment. */
484 size &= ~__static_tls_align_m1;
485 assert (size != 0);
486
487 /* Make sure the size of the stack is enough for the guard and
488 eventually the thread descriptor. */
489 guardsize = (attr->guardsize + pagesize_m1) & ~pagesize_m1;
490 if (__builtin_expect (size < ((guardsize + __static_tls_size
491 + MINIMAL_REST_STACK + pagesize_m1)
492 & ~pagesize_m1),
493 0))
494 /* The stack is too small (or the guard too large). */
495 return EINVAL;
496
497 /* Try to get a stack from the cache. */
498 reqsize = size;
499 pd = get_cached_stack (&size, &mem);
500 if (pd == NULL)
501 {
502 /* To avoid aliasing effects on a larger scale than pages we
503 adjust the allocated stack size if necessary. This way
504 allocations directly following each other will not have
505 aliasing problems. */
506 #if MULTI_PAGE_ALIASING != 0
507 if ((size % MULTI_PAGE_ALIASING) == 0)
508 size += pagesize_m1 + 1;
509 #endif
510
511 mem = mmap (NULL, size, prot,
512 MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
513
514 if (__glibc_unlikely (mem == MAP_FAILED))
515 return errno;
516
517 /* SIZE is guaranteed to be greater than zero.
518 So we can never get a null pointer back from mmap. */
519 assert (mem != NULL);
520
521 #if COLORING_INCREMENT != 0
522 /* Atomically increment NCREATED. */
523 unsigned int ncreated = atomic_increment_val (&nptl_ncreated);
524
525 /* We chose the offset for coloring by incrementing it for
526 every new thread by a fixed amount. The offset used
527 module the page size. Even if coloring would be better
528 relative to higher alignment values it makes no sense to
529 do it since the mmap() interface does not allow us to
530 specify any alignment for the returned memory block. */
531 size_t coloring = (ncreated * COLORING_INCREMENT) & pagesize_m1;
532
533 /* Make sure the coloring offsets does not disturb the alignment
534 of the TCB and static TLS block. */
535 if (__glibc_unlikely ((coloring & __static_tls_align_m1) != 0))
536 coloring = (((coloring + __static_tls_align_m1)
537 & ~(__static_tls_align_m1))
538 & ~pagesize_m1);
539 #else
540 /* Unless specified we do not make any adjustments. */
541 # define coloring 0
542 #endif
543
544 /* Place the thread descriptor at the end of the stack. */
545 #if TLS_TCB_AT_TP
546 pd = (struct pthread *) ((char *) mem + size - coloring) - 1;
547 #elif TLS_DTV_AT_TP
548 pd = (struct pthread *) ((((uintptr_t) mem + size - coloring
549 - __static_tls_size)
550 & ~__static_tls_align_m1)
551 - TLS_PRE_TCB_SIZE);
552 #endif
553
554 /* Remember the stack-related values. */
555 pd->stackblock = mem;
556 pd->stackblock_size = size;
557
558 /* We allocated the first block thread-specific data array.
559 This address will not change for the lifetime of this
560 descriptor. */
561 pd->specific[0] = pd->specific_1stblock;
562
563 /* This is at least the second thread. */
564 pd->header.multiple_threads = 1;
565 #ifndef TLS_MULTIPLE_THREADS_IN_TCB
566 __pthread_multiple_threads = *__libc_multiple_threads_ptr = 1;
567 #endif
568
569 #ifndef __ASSUME_PRIVATE_FUTEX
570 /* The thread must know when private futexes are supported. */
571 pd->header.private_futex = THREAD_GETMEM (THREAD_SELF,
572 header.private_futex);
573 #endif
574
575 #ifdef NEED_DL_SYSINFO
576 SETUP_THREAD_SYSINFO (pd);
577 #endif
578
579 /* Don't allow setxid until cloned. */
580 pd->setxid_futex = -1;
581
582 /* The process ID is also the same as that of the caller. */
583 pd->pid = THREAD_GETMEM (THREAD_SELF, pid);
584
585 /* Allocate the DTV for this thread. */
586 if (_dl_allocate_tls (TLS_TPADJ (pd)) == NULL)
587 {
588 /* Something went wrong. */
589 assert (errno == ENOMEM);
590
591 /* Free the stack memory we just allocated. */
592 (void) munmap (mem, size);
593
594 return errno;
595 }
596
597
598 /* Prepare to modify global data. */
599 lll_lock (stack_cache_lock, LLL_PRIVATE);
600
601 /* And add to the list of stacks in use. */
602 stack_list_add (&pd->list, &stack_used);
603
604 lll_unlock (stack_cache_lock, LLL_PRIVATE);
605
606
607 /* There might have been a race. Another thread might have
608 caused the stacks to get exec permission while this new
609 stack was prepared. Detect if this was possible and
610 change the permission if necessary. */
611 if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0
612 && (prot & PROT_EXEC) == 0, 0))
613 {
614 int err = change_stack_perm (pd
615 #ifdef NEED_SEPARATE_REGISTER_STACK
616 , ~pagesize_m1
617 #endif
618 );
619 if (err != 0)
620 {
621 /* Free the stack memory we just allocated. */
622 (void) munmap (mem, size);
623
624 return err;
625 }
626 }
627
628
629 /* Note that all of the stack and the thread descriptor is
630 zeroed. This means we do not have to initialize fields
631 with initial value zero. This is specifically true for
632 the 'tid' field which is always set back to zero once the
633 stack is not used anymore and for the 'guardsize' field
634 which will be read next. */
635 }
636
637 /* Create or resize the guard area if necessary. */
638 if (__glibc_unlikely (guardsize > pd->guardsize))
639 {
640 #ifdef NEED_SEPARATE_REGISTER_STACK
641 char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
642 #elif _STACK_GROWS_DOWN
643 char *guard = mem;
644 #elif _STACK_GROWS_UP
645 char *guard = (char *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
646 #endif
647 if (mprotect (guard, guardsize, PROT_NONE) != 0)
648 {
649 mprot_error:
650 lll_lock (stack_cache_lock, LLL_PRIVATE);
651
652 /* Remove the thread from the list. */
653 stack_list_del (&pd->list);
654
655 lll_unlock (stack_cache_lock, LLL_PRIVATE);
656
657 /* Get rid of the TLS block we allocated. */
658 _dl_deallocate_tls (TLS_TPADJ (pd), false);
659
660 /* Free the stack memory regardless of whether the size
661 of the cache is over the limit or not. If this piece
662 of memory caused problems we better do not use it
663 anymore. Uh, and we ignore possible errors. There
664 is nothing we could do. */
665 (void) munmap (mem, size);
666
667 return errno;
668 }
669
670 pd->guardsize = guardsize;
671 }
672 else if (__builtin_expect (pd->guardsize - guardsize > size - reqsize,
673 0))
674 {
675 /* The old guard area is too large. */
676
677 #ifdef NEED_SEPARATE_REGISTER_STACK
678 char *guard = mem + (((size - guardsize) / 2) & ~pagesize_m1);
679 char *oldguard = mem + (((size - pd->guardsize) / 2) & ~pagesize_m1);
680
681 if (oldguard < guard
682 && mprotect (oldguard, guard - oldguard, prot) != 0)
683 goto mprot_error;
684
685 if (mprotect (guard + guardsize,
686 oldguard + pd->guardsize - guard - guardsize,
687 prot) != 0)
688 goto mprot_error;
689 #elif _STACK_GROWS_DOWN
690 if (mprotect ((char *) mem + guardsize, pd->guardsize - guardsize,
691 prot) != 0)
692 goto mprot_error;
693 #elif _STACK_GROWS_UP
694 if (mprotect ((char *) pd - pd->guardsize,
695 pd->guardsize - guardsize, prot) != 0)
696 goto mprot_error;
697 #endif
698
699 pd->guardsize = guardsize;
700 }
701 /* The pthread_getattr_np() calls need to get passed the size
702 requested in the attribute, regardless of how large the
703 actually used guardsize is. */
704 pd->reported_guardsize = guardsize;
705 }
706
707 /* Initialize the lock. We have to do this unconditionally since the
708 stillborn thread could be canceled while the lock is taken. */
709 pd->lock = LLL_LOCK_INITIALIZER;
710
711 /* The robust mutex lists also need to be initialized
712 unconditionally because the cleanup for the previous stack owner
713 might have happened in the kernel. */
714 pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
715 - offsetof (pthread_mutex_t,
716 __data.__list.__next));
717 pd->robust_head.list_op_pending = NULL;
718 #ifdef __PTHREAD_MUTEX_HAVE_PREV
719 pd->robust_prev = &pd->robust_head;
720 #endif
721 pd->robust_head.list = &pd->robust_head;
722
723 /* We place the thread descriptor at the end of the stack. */
724 *pdp = pd;
725
726 #if _STACK_GROWS_DOWN
727 void *stacktop;
728
729 # if TLS_TCB_AT_TP
730 /* The stack begins before the TCB and the static TLS block. */
731 stacktop = ((char *) (pd + 1) - __static_tls_size);
732 # elif TLS_DTV_AT_TP
733 stacktop = (char *) (pd - 1);
734 # endif
735
736 # ifdef NEED_SEPARATE_REGISTER_STACK
737 *stack = pd->stackblock;
738 *stacksize = stacktop - *stack;
739 # else
740 *stack = stacktop;
741 # endif
742 #else
743 *stack = pd->stackblock;
744 #endif
745
746 return 0;
747 }
748
749
750 void
751 internal_function
752 __deallocate_stack (struct pthread *pd)
753 {
754 lll_lock (stack_cache_lock, LLL_PRIVATE);
755
756 /* Remove the thread from the list of threads with user defined
757 stacks. */
758 stack_list_del (&pd->list);
759
760 /* Not much to do. Just free the mmap()ed memory. Note that we do
761 not reset the 'used' flag in the 'tid' field. This is done by
762 the kernel. If no thread has been created yet this field is
763 still zero. */
764 if (__glibc_likely (! pd->user_stack))
765 (void) queue_stack (pd);
766 else
767 /* Free the memory associated with the ELF TLS. */
768 _dl_deallocate_tls (TLS_TPADJ (pd), false);
769
770 lll_unlock (stack_cache_lock, LLL_PRIVATE);
771 }
772
773
774 int
775 internal_function
776 __make_stacks_executable (void **stack_endp)
777 {
778 /* First the main thread's stack. */
779 int err = _dl_make_stack_executable (stack_endp);
780 if (err != 0)
781 return err;
782
783 #ifdef NEED_SEPARATE_REGISTER_STACK
784 const size_t pagemask = ~(__getpagesize () - 1);
785 #endif
786
787 lll_lock (stack_cache_lock, LLL_PRIVATE);
788
789 list_t *runp;
790 list_for_each (runp, &stack_used)
791 {
792 err = change_stack_perm (list_entry (runp, struct pthread, list)
793 #ifdef NEED_SEPARATE_REGISTER_STACK
794 , pagemask
795 #endif
796 );
797 if (err != 0)
798 break;
799 }
800
801 /* Also change the permission for the currently unused stacks. This
802 might be wasted time but better spend it here than adding a check
803 in the fast path. */
804 if (err == 0)
805 list_for_each (runp, &stack_cache)
806 {
807 err = change_stack_perm (list_entry (runp, struct pthread, list)
808 #ifdef NEED_SEPARATE_REGISTER_STACK
809 , pagemask
810 #endif
811 );
812 if (err != 0)
813 break;
814 }
815
816 lll_unlock (stack_cache_lock, LLL_PRIVATE);
817
818 return err;
819 }
820
821
822 /* In case of a fork() call the memory allocation in the child will be
823 the same but only one thread is running. All stacks except that of
824 the one running thread are not used anymore. We have to recycle
825 them. */
826 void
827 __reclaim_stacks (void)
828 {
829 struct pthread *self = (struct pthread *) THREAD_SELF;
830
831 /* No locking necessary. The caller is the only stack in use. But
832 we have to be aware that we might have interrupted a list
833 operation. */
834
835 if (in_flight_stack != 0)
836 {
837 bool add_p = in_flight_stack & 1;
838 list_t *elem = (list_t *) (in_flight_stack & ~(uintptr_t) 1);
839
840 if (add_p)
841 {
842 /* We always add at the beginning of the list. So in this case we
843 only need to check the beginning of these lists to see if the
844 pointers at the head of the list are inconsistent. */
845 list_t *l = NULL;
846
847 if (stack_used.next->prev != &stack_used)
848 l = &stack_used;
849 else if (stack_cache.next->prev != &stack_cache)
850 l = &stack_cache;
851
852 if (l != NULL)
853 {
854 assert (l->next->prev == elem);
855 elem->next = l->next;
856 elem->prev = l;
857 l->next = elem;
858 }
859 }
860 else
861 {
862 /* We can simply always replay the delete operation. */
863 elem->next->prev = elem->prev;
864 elem->prev->next = elem->next;
865 }
866 }
867
868 /* Mark all stacks except the still running one as free. */
869 list_t *runp;
870 list_for_each (runp, &stack_used)
871 {
872 struct pthread *curp = list_entry (runp, struct pthread, list);
873 if (curp != self)
874 {
875 /* This marks the stack as free. */
876 curp->tid = 0;
877
878 /* The PID field must be initialized for the new process. */
879 curp->pid = self->pid;
880
881 /* Account for the size of the stack. */
882 stack_cache_actsize += curp->stackblock_size;
883
884 if (curp->specific_used)
885 {
886 /* Clear the thread-specific data. */
887 memset (curp->specific_1stblock, '\0',
888 sizeof (curp->specific_1stblock));
889
890 curp->specific_used = false;
891
892 for (size_t cnt = 1; cnt < PTHREAD_KEY_1STLEVEL_SIZE; ++cnt)
893 if (curp->specific[cnt] != NULL)
894 {
895 memset (curp->specific[cnt], '\0',
896 sizeof (curp->specific_1stblock));
897
898 /* We have allocated the block which we do not
899 free here so re-set the bit. */
900 curp->specific_used = true;
901 }
902 }
903 }
904 }
905
906 /* Reset the PIDs in any cached stacks. */
907 list_for_each (runp, &stack_cache)
908 {
909 struct pthread *curp = list_entry (runp, struct pthread, list);
910 curp->pid = self->pid;
911 }
912
913 /* Add the stack of all running threads to the cache. */
914 list_splice (&stack_used, &stack_cache);
915
916 /* Remove the entry for the current thread to from the cache list
917 and add it to the list of running threads. Which of the two
918 lists is decided by the user_stack flag. */
919 stack_list_del (&self->list);
920
921 /* Re-initialize the lists for all the threads. */
922 INIT_LIST_HEAD (&stack_used);
923 INIT_LIST_HEAD (&__stack_user);
924
925 if (__glibc_unlikely (THREAD_GETMEM (self, user_stack)))
926 list_add (&self->list, &__stack_user);
927 else
928 list_add (&self->list, &stack_used);
929
930 /* There is one thread running. */
931 __nptl_nthreads = 1;
932
933 in_flight_stack = 0;
934
935 /* Initialize locks. */
936 stack_cache_lock = LLL_LOCK_INITIALIZER;
937 __default_pthread_attr_lock = LLL_LOCK_INITIALIZER;
938 }
939
940
941 #if HP_TIMING_AVAIL
942 # undef __find_thread_by_id
943 /* Find a thread given the thread ID. */
944 attribute_hidden
945 struct pthread *
946 __find_thread_by_id (pid_t tid)
947 {
948 struct pthread *result = NULL;
949
950 lll_lock (stack_cache_lock, LLL_PRIVATE);
951
952 /* Iterate over the list with system-allocated threads first. */
953 list_t *runp;
954 list_for_each (runp, &stack_used)
955 {
956 struct pthread *curp;
957
958 curp = list_entry (runp, struct pthread, list);
959
960 if (curp->tid == tid)
961 {
962 result = curp;
963 goto out;
964 }
965 }
966
967 /* Now the list with threads using user-allocated stacks. */
968 list_for_each (runp, &__stack_user)
969 {
970 struct pthread *curp;
971
972 curp = list_entry (runp, struct pthread, list);
973
974 if (curp->tid == tid)
975 {
976 result = curp;
977 goto out;
978 }
979 }
980
981 out:
982 lll_unlock (stack_cache_lock, LLL_PRIVATE);
983
984 return result;
985 }
986 #endif
987
988
989 #ifdef SIGSETXID
990 static void
991 internal_function
992 setxid_mark_thread (struct xid_command *cmdp, struct pthread *t)
993 {
994 int ch;
995
996 /* Wait until this thread is cloned. */
997 if (t->setxid_futex == -1
998 && ! atomic_compare_and_exchange_bool_acq (&t->setxid_futex, -2, -1))
999 do
1000 futex_wait_simple (&t->setxid_futex, -2, FUTEX_PRIVATE);
1001 while (t->setxid_futex == -2);
1002
1003 /* Don't let the thread exit before the setxid handler runs. */
1004 t->setxid_futex = 0;
1005
1006 do
1007 {
1008 ch = t->cancelhandling;
1009
1010 /* If the thread is exiting right now, ignore it. */
1011 if ((ch & EXITING_BITMASK) != 0)
1012 {
1013 /* Release the futex if there is no other setxid in
1014 progress. */
1015 if ((ch & SETXID_BITMASK) == 0)
1016 {
1017 t->setxid_futex = 1;
1018 futex_wake (&t->setxid_futex, 1, FUTEX_PRIVATE);
1019 }
1020 return;
1021 }
1022 }
1023 while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
1024 ch | SETXID_BITMASK, ch));
1025 }
1026
1027
1028 static void
1029 internal_function
1030 setxid_unmark_thread (struct xid_command *cmdp, struct pthread *t)
1031 {
1032 int ch;
1033
1034 do
1035 {
1036 ch = t->cancelhandling;
1037 if ((ch & SETXID_BITMASK) == 0)
1038 return;
1039 }
1040 while (atomic_compare_and_exchange_bool_acq (&t->cancelhandling,
1041 ch & ~SETXID_BITMASK, ch));
1042
1043 /* Release the futex just in case. */
1044 t->setxid_futex = 1;
1045 futex_wake (&t->setxid_futex, 1, FUTEX_PRIVATE);
1046 }
1047
1048
1049 static int
1050 internal_function
1051 setxid_signal_thread (struct xid_command *cmdp, struct pthread *t)
1052 {
1053 if ((t->cancelhandling & SETXID_BITMASK) == 0)
1054 return 0;
1055
1056 int val;
1057 INTERNAL_SYSCALL_DECL (err);
1058 val = INTERNAL_SYSCALL (tgkill, err, 3, THREAD_GETMEM (THREAD_SELF, pid),
1059 t->tid, SIGSETXID);
1060
1061 /* If this failed, it must have had not started yet or else exited. */
1062 if (!INTERNAL_SYSCALL_ERROR_P (val, err))
1063 {
1064 atomic_increment (&cmdp->cntr);
1065 return 1;
1066 }
1067 else
1068 return 0;
1069 }
1070
1071 /* Check for consistency across set*id system call results. The abort
1072 should not happen as long as all privileges changes happen through
1073 the glibc wrappers. ERROR must be 0 (no error) or an errno
1074 code. */
1075 void
1076 attribute_hidden
1077 __nptl_setxid_error (struct xid_command *cmdp, int error)
1078 {
1079 do
1080 {
1081 int olderror = cmdp->error;
1082 if (olderror == error)
1083 break;
1084 if (olderror != -1)
1085 /* Mismatch between current and previous results. */
1086 abort ();
1087 }
1088 while (atomic_compare_and_exchange_bool_acq (&cmdp->error, error, -1));
1089 }
1090
1091 int
1092 attribute_hidden
1093 __nptl_setxid (struct xid_command *cmdp)
1094 {
1095 int signalled;
1096 int result;
1097 lll_lock (stack_cache_lock, LLL_PRIVATE);
1098
1099 __xidcmd = cmdp;
1100 cmdp->cntr = 0;
1101 cmdp->error = -1;
1102
1103 struct pthread *self = THREAD_SELF;
1104
1105 /* Iterate over the list with system-allocated threads first. */
1106 list_t *runp;
1107 list_for_each (runp, &stack_used)
1108 {
1109 struct pthread *t = list_entry (runp, struct pthread, list);
1110 if (t == self)
1111 continue;
1112
1113 setxid_mark_thread (cmdp, t);
1114 }
1115
1116 /* Now the list with threads using user-allocated stacks. */
1117 list_for_each (runp, &__stack_user)
1118 {
1119 struct pthread *t = list_entry (runp, struct pthread, list);
1120 if (t == self)
1121 continue;
1122
1123 setxid_mark_thread (cmdp, t);
1124 }
1125
1126 /* Iterate until we don't succeed in signalling anyone. That means
1127 we have gotten all running threads, and their children will be
1128 automatically correct once started. */
1129 do
1130 {
1131 signalled = 0;
1132
1133 list_for_each (runp, &stack_used)
1134 {
1135 struct pthread *t = list_entry (runp, struct pthread, list);
1136 if (t == self)
1137 continue;
1138
1139 signalled += setxid_signal_thread (cmdp, t);
1140 }
1141
1142 list_for_each (runp, &__stack_user)
1143 {
1144 struct pthread *t = list_entry (runp, struct pthread, list);
1145 if (t == self)
1146 continue;
1147
1148 signalled += setxid_signal_thread (cmdp, t);
1149 }
1150
1151 int cur = cmdp->cntr;
1152 while (cur != 0)
1153 {
1154 futex_wait_simple ((unsigned int *) &cmdp->cntr, cur,
1155 FUTEX_PRIVATE);
1156 cur = cmdp->cntr;
1157 }
1158 }
1159 while (signalled != 0);
1160
1161 /* Clean up flags, so that no thread blocks during exit waiting
1162 for a signal which will never come. */
1163 list_for_each (runp, &stack_used)
1164 {
1165 struct pthread *t = list_entry (runp, struct pthread, list);
1166 if (t == self)
1167 continue;
1168
1169 setxid_unmark_thread (cmdp, t);
1170 }
1171
1172 list_for_each (runp, &__stack_user)
1173 {
1174 struct pthread *t = list_entry (runp, struct pthread, list);
1175 if (t == self)
1176 continue;
1177
1178 setxid_unmark_thread (cmdp, t);
1179 }
1180
1181 /* This must be last, otherwise the current thread might not have
1182 permissions to send SIGSETXID syscall to the other threads. */
1183 INTERNAL_SYSCALL_DECL (err);
1184 result = INTERNAL_SYSCALL_NCS (cmdp->syscall_no, err, 3,
1185 cmdp->id[0], cmdp->id[1], cmdp->id[2]);
1186 int error = 0;
1187 if (__glibc_unlikely (INTERNAL_SYSCALL_ERROR_P (result, err)))
1188 {
1189 error = INTERNAL_SYSCALL_ERRNO (result, err);
1190 __set_errno (error);
1191 result = -1;
1192 }
1193 __nptl_setxid_error (cmdp, error);
1194
1195 lll_unlock (stack_cache_lock, LLL_PRIVATE);
1196 return result;
1197 }
1198 #endif /* SIGSETXID. */
1199
1200
1201 static inline void __attribute__((always_inline))
1202 init_one_static_tls (struct pthread *curp, struct link_map *map)
1203 {
1204 # if TLS_TCB_AT_TP
1205 void *dest = (char *) curp - map->l_tls_offset;
1206 # elif TLS_DTV_AT_TP
1207 void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
1208 # else
1209 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
1210 # endif
1211
1212 /* We cannot delay the initialization of the Static TLS area, since
1213 it can be accessed with LE or IE, but since the DTV is only used
1214 by GD and LD, we can delay its update to avoid a race. */
1215 memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
1216 '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
1217 }
1218
1219 void
1220 attribute_hidden
1221 __pthread_init_static_tls (struct link_map *map)
1222 {
1223 lll_lock (stack_cache_lock, LLL_PRIVATE);
1224
1225 /* Iterate over the list with system-allocated threads first. */
1226 list_t *runp;
1227 list_for_each (runp, &stack_used)
1228 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1229
1230 /* Now the list with threads using user-allocated stacks. */
1231 list_for_each (runp, &__stack_user)
1232 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1233
1234 lll_unlock (stack_cache_lock, LLL_PRIVATE);
1235 }
1236
1237
1238 void
1239 attribute_hidden
1240 __wait_lookup_done (void)
1241 {
1242 lll_lock (stack_cache_lock, LLL_PRIVATE);
1243
1244 struct pthread *self = THREAD_SELF;
1245
1246 /* Iterate over the list with system-allocated threads first. */
1247 list_t *runp;
1248 list_for_each (runp, &stack_used)
1249 {
1250 struct pthread *t = list_entry (runp, struct pthread, list);
1251 if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1252 continue;
1253
1254 int *const gscope_flagp = &t->header.gscope_flag;
1255
1256 /* We have to wait until this thread is done with the global
1257 scope. First tell the thread that we are waiting and
1258 possibly have to be woken. */
1259 if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1260 THREAD_GSCOPE_FLAG_WAIT,
1261 THREAD_GSCOPE_FLAG_USED))
1262 continue;
1263
1264 do
1265 futex_wait_simple ((unsigned int *) gscope_flagp,
1266 THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
1267 while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1268 }
1269
1270 /* Now the list with threads using user-allocated stacks. */
1271 list_for_each (runp, &__stack_user)
1272 {
1273 struct pthread *t = list_entry (runp, struct pthread, list);
1274 if (t == self || t->header.gscope_flag == THREAD_GSCOPE_FLAG_UNUSED)
1275 continue;
1276
1277 int *const gscope_flagp = &t->header.gscope_flag;
1278
1279 /* We have to wait until this thread is done with the global
1280 scope. First tell the thread that we are waiting and
1281 possibly have to be woken. */
1282 if (atomic_compare_and_exchange_bool_acq (gscope_flagp,
1283 THREAD_GSCOPE_FLAG_WAIT,
1284 THREAD_GSCOPE_FLAG_USED))
1285 continue;
1286
1287 do
1288 futex_wait_simple ((unsigned int *) gscope_flagp,
1289 THREAD_GSCOPE_FLAG_WAIT, FUTEX_PRIVATE);
1290 while (*gscope_flagp == THREAD_GSCOPE_FLAG_WAIT);
1291 }
1292
1293 lll_unlock (stack_cache_lock, LLL_PRIVATE);
1294 }