]> git.ipfire.org Git - thirdparty/glibc.git/blob - elf/dl-tls.c
rtld: Avoid using up static TLS surplus for optimizations [BZ #25051]
[thirdparty/glibc.git] / elf / dl-tls.c
1 /* Thread-local storage handling in the ELF dynamic linker. Generic version.
2 Copyright (C) 2002-2020 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19 #include <assert.h>
20 #include <errno.h>
21 #include <libintl.h>
22 #include <signal.h>
23 #include <stdlib.h>
24 #include <unistd.h>
25 #include <sys/param.h>
26 #include <atomic.h>
27
28 #include <tls.h>
29 #include <dl-tls.h>
30 #include <ldsodefs.h>
31
32 #define TUNABLE_NAMESPACE rtld
33 #include <dl-tunables.h>
34
35 /* Surplus static TLS, GLRO(dl_tls_static_surplus), is used for
36
37 - IE TLS in libc.so for all dlmopen namespaces except in the initial
38 one where libc.so is not loaded dynamically but at startup time,
39 - IE TLS in other libraries which may be dynamically loaded even in the
40 initial namespace,
41 - and optionally for optimizing dynamic TLS access.
42
43 The maximum number of namespaces is DL_NNS, but to support that many
44 namespaces correctly the static TLS allocation should be significantly
45 increased, which may cause problems with small thread stacks due to the
46 way static TLS is accounted (bug 11787).
47
48 So there is a rtld.nns tunable limit on the number of supported namespaces
49 that affects the size of the static TLS and by default it's small enough
50 not to cause problems with existing applications. The limit is not
51 enforced or checked: it is the user's responsibility to increase rtld.nns
52 if more dlmopen namespaces are used.
53
54 Audit modules use their own namespaces, they are not included in rtld.nns,
55 but come on top when computing the number of namespaces. */
56
57 /* Size of initial-exec TLS in libc.so. */
58 #define LIBC_IE_TLS 192
59 /* Size of initial-exec TLS in libraries other than libc.so.
60 This should be large enough to cover runtime libraries of the
61 compiler such as libgomp and libraries in libc other than libc.so. */
62 #define OTHER_IE_TLS 144
63
64 /* Calculate the size of the static TLS surplus, when the given
65 number of audit modules are loaded. Must be called after the
66 number of audit modules is known and before static TLS allocation. */
67 void
68 _dl_tls_static_surplus_init (size_t naudit)
69 {
70 size_t nns, opt_tls;
71
72 #if HAVE_TUNABLES
73 nns = TUNABLE_GET (nns, size_t, NULL);
74 opt_tls = TUNABLE_GET (optional_static_tls, size_t, NULL);
75 #else
76 /* Default values of the tunables. */
77 nns = 4;
78 opt_tls = 512;
79 #endif
80 if (nns > DL_NNS)
81 nns = DL_NNS;
82 if (DL_NNS - nns < naudit)
83 _dl_fatal_printf ("Failed loading %lu audit modules, %lu are supported.\n",
84 (unsigned long) naudit, (unsigned long) (DL_NNS - nns));
85 nns += naudit;
86
87 GL(dl_tls_static_optional) = opt_tls;
88 GLRO(dl_tls_static_surplus) = ((nns - 1) * LIBC_IE_TLS
89 + nns * OTHER_IE_TLS
90 + opt_tls);
91 }
92
93 /* Out-of-memory handler. */
94 static void
95 __attribute__ ((__noreturn__))
96 oom (void)
97 {
98 _dl_fatal_printf ("cannot allocate memory for thread-local data: ABORT\n");
99 }
100
101
102 size_t
103 _dl_next_tls_modid (void)
104 {
105 size_t result;
106
107 if (__builtin_expect (GL(dl_tls_dtv_gaps), false))
108 {
109 size_t disp = 0;
110 struct dtv_slotinfo_list *runp = GL(dl_tls_dtv_slotinfo_list);
111
112 /* Note that this branch will never be executed during program
113 start since there are no gaps at that time. Therefore it
114 does not matter that the dl_tls_dtv_slotinfo is not allocated
115 yet when the function is called for the first times.
116
117 NB: the offset +1 is due to the fact that DTV[0] is used
118 for something else. */
119 result = GL(dl_tls_static_nelem) + 1;
120 if (result <= GL(dl_tls_max_dtv_idx))
121 do
122 {
123 while (result - disp < runp->len)
124 {
125 if (runp->slotinfo[result - disp].map == NULL)
126 break;
127
128 ++result;
129 assert (result <= GL(dl_tls_max_dtv_idx) + 1);
130 }
131
132 if (result - disp < runp->len)
133 break;
134
135 disp += runp->len;
136 }
137 while ((runp = runp->next) != NULL);
138
139 if (result > GL(dl_tls_max_dtv_idx))
140 {
141 /* The new index must indeed be exactly one higher than the
142 previous high. */
143 assert (result == GL(dl_tls_max_dtv_idx) + 1);
144 /* There is no gap anymore. */
145 GL(dl_tls_dtv_gaps) = false;
146
147 goto nogaps;
148 }
149 }
150 else
151 {
152 /* No gaps, allocate a new entry. */
153 nogaps:
154
155 result = ++GL(dl_tls_max_dtv_idx);
156 }
157
158 return result;
159 }
160
161
162 size_t
163 _dl_count_modids (void)
164 {
165 /* It is rare that we have gaps; see elf/dl-open.c (_dl_open) where
166 we fail to load a module and unload it leaving a gap. If we don't
167 have gaps then the number of modids is the current maximum so
168 return that. */
169 if (__glibc_likely (!GL(dl_tls_dtv_gaps)))
170 return GL(dl_tls_max_dtv_idx);
171
172 /* We have gaps and are forced to count the non-NULL entries. */
173 size_t n = 0;
174 struct dtv_slotinfo_list *runp = GL(dl_tls_dtv_slotinfo_list);
175 while (runp != NULL)
176 {
177 for (size_t i = 0; i < runp->len; ++i)
178 if (runp->slotinfo[i].map != NULL)
179 ++n;
180
181 runp = runp->next;
182 }
183
184 return n;
185 }
186
187
188 #ifdef SHARED
189 void
190 _dl_determine_tlsoffset (void)
191 {
192 size_t max_align = TLS_TCB_ALIGN;
193 /* libc.so with rseq has TLS with 32-byte alignment. Since TLS is
194 initialized before audit modules are loaded and slotinfo
195 information is available, this is not taken into account below in
196 the audit case. */
197 max_align = MAX (max_align, 32U);
198
199 size_t freetop = 0;
200 size_t freebottom = 0;
201
202 /* The first element of the dtv slot info list is allocated. */
203 assert (GL(dl_tls_dtv_slotinfo_list) != NULL);
204 /* There is at this point only one element in the
205 dl_tls_dtv_slotinfo_list list. */
206 assert (GL(dl_tls_dtv_slotinfo_list)->next == NULL);
207
208 struct dtv_slotinfo *slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo;
209
210 /* Determining the offset of the various parts of the static TLS
211 block has several dependencies. In addition we have to work
212 around bugs in some toolchains.
213
214 Each TLS block from the objects available at link time has a size
215 and an alignment requirement. The GNU ld computes the alignment
216 requirements for the data at the positions *in the file*, though.
217 I.e, it is not simply possible to allocate a block with the size
218 of the TLS program header entry. The data is layed out assuming
219 that the first byte of the TLS block fulfills
220
221 p_vaddr mod p_align == &TLS_BLOCK mod p_align
222
223 This means we have to add artificial padding at the beginning of
224 the TLS block. These bytes are never used for the TLS data in
225 this module but the first byte allocated must be aligned
226 according to mod p_align == 0 so that the first byte of the TLS
227 block is aligned according to p_vaddr mod p_align. This is ugly
228 and the linker can help by computing the offsets in the TLS block
229 assuming the first byte of the TLS block is aligned according to
230 p_align.
231
232 The extra space which might be allocated before the first byte of
233 the TLS block need not go unused. The code below tries to use
234 that memory for the next TLS block. This can work if the total
235 memory requirement for the next TLS block is smaller than the
236 gap. */
237
238 #if TLS_TCB_AT_TP
239 /* We simply start with zero. */
240 size_t offset = 0;
241
242 for (size_t cnt = 0; slotinfo[cnt].map != NULL; ++cnt)
243 {
244 assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
245
246 size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset
247 & (slotinfo[cnt].map->l_tls_align - 1));
248 size_t off;
249 max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
250
251 if (freebottom - freetop >= slotinfo[cnt].map->l_tls_blocksize)
252 {
253 off = roundup (freetop + slotinfo[cnt].map->l_tls_blocksize
254 - firstbyte, slotinfo[cnt].map->l_tls_align)
255 + firstbyte;
256 if (off <= freebottom)
257 {
258 freetop = off;
259
260 /* XXX For some architectures we perhaps should store the
261 negative offset. */
262 slotinfo[cnt].map->l_tls_offset = off;
263 continue;
264 }
265 }
266
267 off = roundup (offset + slotinfo[cnt].map->l_tls_blocksize - firstbyte,
268 slotinfo[cnt].map->l_tls_align) + firstbyte;
269 if (off > offset + slotinfo[cnt].map->l_tls_blocksize
270 + (freebottom - freetop))
271 {
272 freetop = offset;
273 freebottom = off - slotinfo[cnt].map->l_tls_blocksize;
274 }
275 offset = off;
276
277 /* XXX For some architectures we perhaps should store the
278 negative offset. */
279 slotinfo[cnt].map->l_tls_offset = off;
280 }
281
282 GL(dl_tls_static_used) = offset;
283 GL(dl_tls_static_size) = (roundup (offset + GLRO(dl_tls_static_surplus),
284 max_align)
285 + TLS_TCB_SIZE);
286 #elif TLS_DTV_AT_TP
287 /* The TLS blocks start right after the TCB. */
288 size_t offset = TLS_TCB_SIZE;
289
290 for (size_t cnt = 0; slotinfo[cnt].map != NULL; ++cnt)
291 {
292 assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
293
294 size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset
295 & (slotinfo[cnt].map->l_tls_align - 1));
296 size_t off;
297 max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
298
299 if (slotinfo[cnt].map->l_tls_blocksize <= freetop - freebottom)
300 {
301 off = roundup (freebottom, slotinfo[cnt].map->l_tls_align);
302 if (off - freebottom < firstbyte)
303 off += slotinfo[cnt].map->l_tls_align;
304 if (off + slotinfo[cnt].map->l_tls_blocksize - firstbyte <= freetop)
305 {
306 slotinfo[cnt].map->l_tls_offset = off - firstbyte;
307 freebottom = (off + slotinfo[cnt].map->l_tls_blocksize
308 - firstbyte);
309 continue;
310 }
311 }
312
313 off = roundup (offset, slotinfo[cnt].map->l_tls_align);
314 if (off - offset < firstbyte)
315 off += slotinfo[cnt].map->l_tls_align;
316
317 slotinfo[cnt].map->l_tls_offset = off - firstbyte;
318 if (off - firstbyte - offset > freetop - freebottom)
319 {
320 freebottom = offset;
321 freetop = off - firstbyte;
322 }
323
324 offset = off + slotinfo[cnt].map->l_tls_blocksize - firstbyte;
325 }
326
327 GL(dl_tls_static_used) = offset;
328 GL(dl_tls_static_size) = roundup (offset + GLRO(dl_tls_static_surplus),
329 TLS_TCB_ALIGN);
330 #else
331 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
332 #endif
333
334 /* The alignment requirement for the static TLS block. */
335 GL(dl_tls_static_align) = max_align;
336 }
337 #endif /* SHARED */
338
339 static void *
340 allocate_dtv (void *result)
341 {
342 dtv_t *dtv;
343 size_t dtv_length;
344
345 /* We allocate a few more elements in the dtv than are needed for the
346 initial set of modules. This should avoid in most cases expansions
347 of the dtv. */
348 dtv_length = GL(dl_tls_max_dtv_idx) + DTV_SURPLUS;
349 dtv = calloc (dtv_length + 2, sizeof (dtv_t));
350 if (dtv != NULL)
351 {
352 /* This is the initial length of the dtv. */
353 dtv[0].counter = dtv_length;
354
355 /* The rest of the dtv (including the generation counter) is
356 Initialize with zero to indicate nothing there. */
357
358 /* Add the dtv to the thread data structures. */
359 INSTALL_DTV (result, dtv);
360 }
361 else
362 result = NULL;
363
364 return result;
365 }
366
367
368 /* Get size and alignment requirements of the static TLS block. */
369 void
370 _dl_get_tls_static_info (size_t *sizep, size_t *alignp)
371 {
372 *sizep = GL(dl_tls_static_size);
373 *alignp = GL(dl_tls_static_align);
374 }
375
376 /* Derive the location of the pointer to the start of the original
377 allocation (before alignment) from the pointer to the TCB. */
378 static inline void **
379 tcb_to_pointer_to_free_location (void *tcb)
380 {
381 #if TLS_TCB_AT_TP
382 /* The TCB follows the TLS blocks, and the pointer to the front
383 follows the TCB. */
384 void **original_pointer_location = tcb + TLS_TCB_SIZE;
385 #elif TLS_DTV_AT_TP
386 /* The TCB comes first, preceded by the pre-TCB, and the pointer is
387 before that. */
388 void **original_pointer_location = tcb - TLS_PRE_TCB_SIZE - sizeof (void *);
389 #endif
390 return original_pointer_location;
391 }
392
393 void *
394 _dl_allocate_tls_storage (void)
395 {
396 void *result;
397 size_t size = GL(dl_tls_static_size);
398
399 #if TLS_DTV_AT_TP
400 /* Memory layout is:
401 [ TLS_PRE_TCB_SIZE ] [ TLS_TCB_SIZE ] [ TLS blocks ]
402 ^ This should be returned. */
403 size += TLS_PRE_TCB_SIZE;
404 #endif
405
406 /* Perform the allocation. Reserve space for the required alignment
407 and the pointer to the original allocation. */
408 size_t alignment = GL(dl_tls_static_align);
409 void *allocated = malloc (size + alignment + sizeof (void *));
410 if (__glibc_unlikely (allocated == NULL))
411 return NULL;
412
413 /* Perform alignment and allocate the DTV. */
414 #if TLS_TCB_AT_TP
415 /* The TCB follows the TLS blocks, which determine the alignment.
416 (TCB alignment requirements have been taken into account when
417 calculating GL(dl_tls_static_align).) */
418 void *aligned = (void *) roundup ((uintptr_t) allocated, alignment);
419 result = aligned + size - TLS_TCB_SIZE;
420
421 /* Clear the TCB data structure. We can't ask the caller (i.e.
422 libpthread) to do it, because we will initialize the DTV et al. */
423 memset (result, '\0', TLS_TCB_SIZE);
424 #elif TLS_DTV_AT_TP
425 /* Pre-TCB and TCB come before the TLS blocks. The layout computed
426 in _dl_determine_tlsoffset assumes that the TCB is aligned to the
427 TLS block alignment, and not just the TLS blocks after it. This
428 can leave an unused alignment gap between the TCB and the TLS
429 blocks. */
430 result = (void *) roundup
431 (sizeof (void *) + TLS_PRE_TCB_SIZE + (uintptr_t) allocated,
432 alignment);
433
434 /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before
435 it. We can't ask the caller (i.e. libpthread) to do it, because
436 we will initialize the DTV et al. */
437 memset (result - TLS_PRE_TCB_SIZE, '\0', TLS_PRE_TCB_SIZE + TLS_TCB_SIZE);
438 #endif
439
440 /* Record the value of the original pointer for later
441 deallocation. */
442 *tcb_to_pointer_to_free_location (result) = allocated;
443
444 result = allocate_dtv (result);
445 if (result == NULL)
446 free (allocated);
447 return result;
448 }
449
450
451 #ifndef SHARED
452 extern dtv_t _dl_static_dtv[];
453 # define _dl_initial_dtv (&_dl_static_dtv[1])
454 #endif
455
456 static dtv_t *
457 _dl_resize_dtv (dtv_t *dtv)
458 {
459 /* Resize the dtv. */
460 dtv_t *newp;
461 /* Load GL(dl_tls_max_dtv_idx) atomically since it may be written to by
462 other threads concurrently. */
463 size_t newsize
464 = atomic_load_acquire (&GL(dl_tls_max_dtv_idx)) + DTV_SURPLUS;
465 size_t oldsize = dtv[-1].counter;
466
467 if (dtv == GL(dl_initial_dtv))
468 {
469 /* This is the initial dtv that was either statically allocated in
470 __libc_setup_tls or allocated during rtld startup using the
471 dl-minimal.c malloc instead of the real malloc. We can't free
472 it, we have to abandon the old storage. */
473
474 newp = malloc ((2 + newsize) * sizeof (dtv_t));
475 if (newp == NULL)
476 oom ();
477 memcpy (newp, &dtv[-1], (2 + oldsize) * sizeof (dtv_t));
478 }
479 else
480 {
481 newp = realloc (&dtv[-1],
482 (2 + newsize) * sizeof (dtv_t));
483 if (newp == NULL)
484 oom ();
485 }
486
487 newp[0].counter = newsize;
488
489 /* Clear the newly allocated part. */
490 memset (newp + 2 + oldsize, '\0',
491 (newsize - oldsize) * sizeof (dtv_t));
492
493 /* Return the generation counter. */
494 return &newp[1];
495 }
496
497
498 void *
499 _dl_allocate_tls_init (void *result)
500 {
501 if (result == NULL)
502 /* The memory allocation failed. */
503 return NULL;
504
505 dtv_t *dtv = GET_DTV (result);
506 struct dtv_slotinfo_list *listp;
507 size_t total = 0;
508 size_t maxgen = 0;
509
510 /* Check if the current dtv is big enough. */
511 if (dtv[-1].counter < GL(dl_tls_max_dtv_idx))
512 {
513 /* Resize the dtv. */
514 dtv = _dl_resize_dtv (dtv);
515
516 /* Install this new dtv in the thread data structures. */
517 INSTALL_DTV (result, &dtv[-1]);
518 }
519
520 /* We have to prepare the dtv for all currently loaded modules using
521 TLS. For those which are dynamically loaded we add the values
522 indicating deferred allocation. */
523 listp = GL(dl_tls_dtv_slotinfo_list);
524 while (1)
525 {
526 size_t cnt;
527
528 for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
529 {
530 struct link_map *map;
531 void *dest;
532
533 /* Check for the total number of used slots. */
534 if (total + cnt > GL(dl_tls_max_dtv_idx))
535 break;
536
537 map = listp->slotinfo[cnt].map;
538 if (map == NULL)
539 /* Unused entry. */
540 continue;
541
542 /* Keep track of the maximum generation number. This might
543 not be the generation counter. */
544 assert (listp->slotinfo[cnt].gen <= GL(dl_tls_generation));
545 maxgen = MAX (maxgen, listp->slotinfo[cnt].gen);
546
547 dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED;
548 dtv[map->l_tls_modid].pointer.to_free = NULL;
549
550 if (map->l_tls_offset == NO_TLS_OFFSET
551 || map->l_tls_offset == FORCED_DYNAMIC_TLS_OFFSET)
552 continue;
553
554 assert (map->l_tls_modid == total + cnt);
555 assert (map->l_tls_blocksize >= map->l_tls_initimage_size);
556 #if TLS_TCB_AT_TP
557 assert ((size_t) map->l_tls_offset >= map->l_tls_blocksize);
558 dest = (char *) result - map->l_tls_offset;
559 #elif TLS_DTV_AT_TP
560 dest = (char *) result + map->l_tls_offset;
561 #else
562 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
563 #endif
564
565 /* Set up the DTV entry. The simplified __tls_get_addr that
566 some platforms use in static programs requires it. */
567 dtv[map->l_tls_modid].pointer.val = dest;
568
569 /* Copy the initialization image and clear the BSS part. */
570 memset (__mempcpy (dest, map->l_tls_initimage,
571 map->l_tls_initimage_size), '\0',
572 map->l_tls_blocksize - map->l_tls_initimage_size);
573 }
574
575 total += cnt;
576 if (total >= GL(dl_tls_max_dtv_idx))
577 break;
578
579 listp = listp->next;
580 assert (listp != NULL);
581 }
582
583 /* The DTV version is up-to-date now. */
584 dtv[0].counter = maxgen;
585
586 return result;
587 }
588 rtld_hidden_def (_dl_allocate_tls_init)
589
590 void *
591 _dl_allocate_tls (void *mem)
592 {
593 return _dl_allocate_tls_init (mem == NULL
594 ? _dl_allocate_tls_storage ()
595 : allocate_dtv (mem));
596 }
597 rtld_hidden_def (_dl_allocate_tls)
598
599
600 void
601 _dl_deallocate_tls (void *tcb, bool dealloc_tcb)
602 {
603 dtv_t *dtv = GET_DTV (tcb);
604
605 /* We need to free the memory allocated for non-static TLS. */
606 for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
607 free (dtv[1 + cnt].pointer.to_free);
608
609 /* The array starts with dtv[-1]. */
610 if (dtv != GL(dl_initial_dtv))
611 free (dtv - 1);
612
613 if (dealloc_tcb)
614 free (*tcb_to_pointer_to_free_location (tcb));
615 }
616 rtld_hidden_def (_dl_deallocate_tls)
617
618
619 #ifdef SHARED
620 /* The __tls_get_addr function has two basic forms which differ in the
621 arguments. The IA-64 form takes two parameters, the module ID and
622 offset. The form used, among others, on IA-32 takes a reference to
623 a special structure which contain the same information. The second
624 form seems to be more often used (in the moment) so we default to
625 it. Users of the IA-64 form have to provide adequate definitions
626 of the following macros. */
627 # ifndef GET_ADDR_ARGS
628 # define GET_ADDR_ARGS tls_index *ti
629 # define GET_ADDR_PARAM ti
630 # endif
631 # ifndef GET_ADDR_MODULE
632 # define GET_ADDR_MODULE ti->ti_module
633 # endif
634 # ifndef GET_ADDR_OFFSET
635 # define GET_ADDR_OFFSET ti->ti_offset
636 # endif
637
638 /* Allocate one DTV entry. */
639 static struct dtv_pointer
640 allocate_dtv_entry (size_t alignment, size_t size)
641 {
642 if (powerof2 (alignment) && alignment <= _Alignof (max_align_t))
643 {
644 /* The alignment is supported by malloc. */
645 void *ptr = malloc (size);
646 return (struct dtv_pointer) { ptr, ptr };
647 }
648
649 /* Emulate memalign to by manually aligning a pointer returned by
650 malloc. First compute the size with an overflow check. */
651 size_t alloc_size = size + alignment;
652 if (alloc_size < size)
653 return (struct dtv_pointer) {};
654
655 /* Perform the allocation. This is the pointer we need to free
656 later. */
657 void *start = malloc (alloc_size);
658 if (start == NULL)
659 return (struct dtv_pointer) {};
660
661 /* Find the aligned position within the larger allocation. */
662 void *aligned = (void *) roundup ((uintptr_t) start, alignment);
663
664 return (struct dtv_pointer) { .val = aligned, .to_free = start };
665 }
666
667 static struct dtv_pointer
668 allocate_and_init (struct link_map *map)
669 {
670 struct dtv_pointer result = allocate_dtv_entry
671 (map->l_tls_align, map->l_tls_blocksize);
672 if (result.val == NULL)
673 oom ();
674
675 /* Initialize the memory. */
676 memset (__mempcpy (result.val, map->l_tls_initimage,
677 map->l_tls_initimage_size),
678 '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
679
680 return result;
681 }
682
683
684 struct link_map *
685 _dl_update_slotinfo (unsigned long int req_modid)
686 {
687 struct link_map *the_map = NULL;
688 dtv_t *dtv = THREAD_DTV ();
689
690 /* The global dl_tls_dtv_slotinfo array contains for each module
691 index the generation counter current when the entry was created.
692 This array never shrinks so that all module indices which were
693 valid at some time can be used to access it. Before the first
694 use of a new module index in this function the array was extended
695 appropriately. Access also does not have to be guarded against
696 modifications of the array. It is assumed that pointer-size
697 values can be read atomically even in SMP environments. It is
698 possible that other threads at the same time dynamically load
699 code and therefore add to the slotinfo list. This is a problem
700 since we must not pick up any information about incomplete work.
701 The solution to this is to ignore all dtv slots which were
702 created after the one we are currently interested. We know that
703 dynamic loading for this module is completed and this is the last
704 load operation we know finished. */
705 unsigned long int idx = req_modid;
706 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
707
708 while (idx >= listp->len)
709 {
710 idx -= listp->len;
711 listp = listp->next;
712 }
713
714 if (dtv[0].counter < listp->slotinfo[idx].gen)
715 {
716 /* The generation counter for the slot is higher than what the
717 current dtv implements. We have to update the whole dtv but
718 only those entries with a generation counter <= the one for
719 the entry we need. */
720 size_t new_gen = listp->slotinfo[idx].gen;
721 size_t total = 0;
722
723 /* We have to look through the entire dtv slotinfo list. */
724 listp = GL(dl_tls_dtv_slotinfo_list);
725 do
726 {
727 for (size_t cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
728 {
729 size_t gen = listp->slotinfo[cnt].gen;
730
731 if (gen > new_gen)
732 /* This is a slot for a generation younger than the
733 one we are handling now. It might be incompletely
734 set up so ignore it. */
735 continue;
736
737 /* If the entry is older than the current dtv layout we
738 know we don't have to handle it. */
739 if (gen <= dtv[0].counter)
740 continue;
741
742 /* If there is no map this means the entry is empty. */
743 struct link_map *map = listp->slotinfo[cnt].map;
744 if (map == NULL)
745 {
746 if (dtv[-1].counter >= total + cnt)
747 {
748 /* If this modid was used at some point the memory
749 might still be allocated. */
750 free (dtv[total + cnt].pointer.to_free);
751 dtv[total + cnt].pointer.val = TLS_DTV_UNALLOCATED;
752 dtv[total + cnt].pointer.to_free = NULL;
753 }
754
755 continue;
756 }
757
758 /* Check whether the current dtv array is large enough. */
759 size_t modid = map->l_tls_modid;
760 assert (total + cnt == modid);
761 if (dtv[-1].counter < modid)
762 {
763 /* Resize the dtv. */
764 dtv = _dl_resize_dtv (dtv);
765
766 assert (modid <= dtv[-1].counter);
767
768 /* Install this new dtv in the thread data
769 structures. */
770 INSTALL_NEW_DTV (dtv);
771 }
772
773 /* If there is currently memory allocate for this
774 dtv entry free it. */
775 /* XXX Ideally we will at some point create a memory
776 pool. */
777 free (dtv[modid].pointer.to_free);
778 dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
779 dtv[modid].pointer.to_free = NULL;
780
781 if (modid == req_modid)
782 the_map = map;
783 }
784
785 total += listp->len;
786 }
787 while ((listp = listp->next) != NULL);
788
789 /* This will be the new maximum generation counter. */
790 dtv[0].counter = new_gen;
791 }
792
793 return the_map;
794 }
795
796
797 static void *
798 __attribute_noinline__
799 tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
800 {
801 /* The allocation was deferred. Do it now. */
802 if (the_map == NULL)
803 {
804 /* Find the link map for this module. */
805 size_t idx = GET_ADDR_MODULE;
806 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
807
808 while (idx >= listp->len)
809 {
810 idx -= listp->len;
811 listp = listp->next;
812 }
813
814 the_map = listp->slotinfo[idx].map;
815 }
816
817 /* Make sure that, if a dlopen running in parallel forces the
818 variable into static storage, we'll wait until the address in the
819 static TLS block is set up, and use that. If we're undecided
820 yet, make sure we make the decision holding the lock as well. */
821 if (__glibc_unlikely (the_map->l_tls_offset
822 != FORCED_DYNAMIC_TLS_OFFSET))
823 {
824 __rtld_lock_lock_recursive (GL(dl_load_lock));
825 if (__glibc_likely (the_map->l_tls_offset == NO_TLS_OFFSET))
826 {
827 the_map->l_tls_offset = FORCED_DYNAMIC_TLS_OFFSET;
828 __rtld_lock_unlock_recursive (GL(dl_load_lock));
829 }
830 else if (__glibc_likely (the_map->l_tls_offset
831 != FORCED_DYNAMIC_TLS_OFFSET))
832 {
833 #if TLS_TCB_AT_TP
834 void *p = (char *) THREAD_SELF - the_map->l_tls_offset;
835 #elif TLS_DTV_AT_TP
836 void *p = (char *) THREAD_SELF + the_map->l_tls_offset + TLS_PRE_TCB_SIZE;
837 #else
838 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
839 #endif
840 __rtld_lock_unlock_recursive (GL(dl_load_lock));
841
842 dtv[GET_ADDR_MODULE].pointer.to_free = NULL;
843 dtv[GET_ADDR_MODULE].pointer.val = p;
844
845 return (char *) p + GET_ADDR_OFFSET;
846 }
847 else
848 __rtld_lock_unlock_recursive (GL(dl_load_lock));
849 }
850 struct dtv_pointer result = allocate_and_init (the_map);
851 dtv[GET_ADDR_MODULE].pointer = result;
852 assert (result.to_free != NULL);
853
854 return (char *) result.val + GET_ADDR_OFFSET;
855 }
856
857
858 static struct link_map *
859 __attribute_noinline__
860 update_get_addr (GET_ADDR_ARGS)
861 {
862 struct link_map *the_map = _dl_update_slotinfo (GET_ADDR_MODULE);
863 dtv_t *dtv = THREAD_DTV ();
864
865 void *p = dtv[GET_ADDR_MODULE].pointer.val;
866
867 if (__glibc_unlikely (p == TLS_DTV_UNALLOCATED))
868 return tls_get_addr_tail (GET_ADDR_PARAM, dtv, the_map);
869
870 return (void *) p + GET_ADDR_OFFSET;
871 }
872
873 /* For all machines that have a non-macro version of __tls_get_addr, we
874 want to use rtld_hidden_proto/rtld_hidden_def in order to call the
875 internal alias for __tls_get_addr from ld.so. This avoids a PLT entry
876 in ld.so for __tls_get_addr. */
877
878 #ifndef __tls_get_addr
879 extern void * __tls_get_addr (GET_ADDR_ARGS);
880 rtld_hidden_proto (__tls_get_addr)
881 rtld_hidden_def (__tls_get_addr)
882 #endif
883
884 /* The generic dynamic and local dynamic model cannot be used in
885 statically linked applications. */
886 void *
887 __tls_get_addr (GET_ADDR_ARGS)
888 {
889 dtv_t *dtv = THREAD_DTV ();
890
891 if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation)))
892 return update_get_addr (GET_ADDR_PARAM);
893
894 void *p = dtv[GET_ADDR_MODULE].pointer.val;
895
896 if (__glibc_unlikely (p == TLS_DTV_UNALLOCATED))
897 return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL);
898
899 return (char *) p + GET_ADDR_OFFSET;
900 }
901 #endif
902
903
904 /* Look up the module's TLS block as for __tls_get_addr,
905 but never touch anything. Return null if it's not allocated yet. */
906 void *
907 _dl_tls_get_addr_soft (struct link_map *l)
908 {
909 if (__glibc_unlikely (l->l_tls_modid == 0))
910 /* This module has no TLS segment. */
911 return NULL;
912
913 dtv_t *dtv = THREAD_DTV ();
914 if (__glibc_unlikely (dtv[0].counter != GL(dl_tls_generation)))
915 {
916 /* This thread's DTV is not completely current,
917 but it might already cover this module. */
918
919 if (l->l_tls_modid >= dtv[-1].counter)
920 /* Nope. */
921 return NULL;
922
923 size_t idx = l->l_tls_modid;
924 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
925 while (idx >= listp->len)
926 {
927 idx -= listp->len;
928 listp = listp->next;
929 }
930
931 /* We've reached the slot for this module.
932 If its generation counter is higher than the DTV's,
933 this thread does not know about this module yet. */
934 if (dtv[0].counter < listp->slotinfo[idx].gen)
935 return NULL;
936 }
937
938 void *data = dtv[l->l_tls_modid].pointer.val;
939 if (__glibc_unlikely (data == TLS_DTV_UNALLOCATED))
940 /* The DTV is current, but this thread has not yet needed
941 to allocate this module's segment. */
942 data = NULL;
943
944 return data;
945 }
946
947
948 void
949 _dl_add_to_slotinfo (struct link_map *l, bool do_add)
950 {
951 /* Now that we know the object is loaded successfully add
952 modules containing TLS data to the dtv info table. We
953 might have to increase its size. */
954 struct dtv_slotinfo_list *listp;
955 struct dtv_slotinfo_list *prevp;
956 size_t idx = l->l_tls_modid;
957
958 /* Find the place in the dtv slotinfo list. */
959 listp = GL(dl_tls_dtv_slotinfo_list);
960 prevp = NULL; /* Needed to shut up gcc. */
961 do
962 {
963 /* Does it fit in the array of this list element? */
964 if (idx < listp->len)
965 break;
966 idx -= listp->len;
967 prevp = listp;
968 listp = listp->next;
969 }
970 while (listp != NULL);
971
972 if (listp == NULL)
973 {
974 /* When we come here it means we have to add a new element
975 to the slotinfo list. And the new module must be in
976 the first slot. */
977 assert (idx == 0);
978
979 listp = prevp->next = (struct dtv_slotinfo_list *)
980 malloc (sizeof (struct dtv_slotinfo_list)
981 + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
982 if (listp == NULL)
983 {
984 /* We ran out of memory. We will simply fail this
985 call but don't undo anything we did so far. The
986 application will crash or be terminated anyway very
987 soon. */
988
989 /* We have to do this since some entries in the dtv
990 slotinfo array might already point to this
991 generation. */
992 ++GL(dl_tls_generation);
993
994 _dl_signal_error (ENOMEM, "dlopen", NULL, N_("\
995 cannot create TLS data structures"));
996 }
997
998 listp->len = TLS_SLOTINFO_SURPLUS;
999 listp->next = NULL;
1000 memset (listp->slotinfo, '\0',
1001 TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
1002 }
1003
1004 /* Add the information into the slotinfo data structure. */
1005 if (do_add)
1006 {
1007 listp->slotinfo[idx].map = l;
1008 listp->slotinfo[idx].gen = GL(dl_tls_generation) + 1;
1009 }
1010 }