]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/aarch64/dl-tlsdesc.S
aarch64: Use explicit offsets in _dl_tlsdesc_dynamic
[thirdparty/glibc.git] / sysdeps / aarch64 / dl-tlsdesc.S
1 /* Thread-local storage handling in the ELF dynamic linker.
2 AArch64 version.
3 Copyright (C) 2011-2016 Free Software Foundation, Inc.
4
5 This file is part of the GNU C Library.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, see
19 <http://www.gnu.org/licenses/>. */
20
21 #include <sysdep.h>
22 #include <tls.h>
23 #include "tlsdesc.h"
24
25 #define NSAVEDQREGPAIRS 16
26 #define SAVE_Q_REGISTERS \
27 stp q0, q1, [sp, #-32*NSAVEDQREGPAIRS]!; \
28 cfi_adjust_cfa_offset (32*NSAVEDQREGPAIRS); \
29 stp q2, q3, [sp, #32*1]; \
30 stp q4, q5, [sp, #32*2]; \
31 stp q6, q7, [sp, #32*3]; \
32 stp q8, q9, [sp, #32*4]; \
33 stp q10, q11, [sp, #32*5]; \
34 stp q12, q13, [sp, #32*6]; \
35 stp q14, q15, [sp, #32*7]; \
36 stp q16, q17, [sp, #32*8]; \
37 stp q18, q19, [sp, #32*9]; \
38 stp q20, q21, [sp, #32*10]; \
39 stp q22, q23, [sp, #32*11]; \
40 stp q24, q25, [sp, #32*12]; \
41 stp q26, q27, [sp, #32*13]; \
42 stp q28, q29, [sp, #32*14]; \
43 stp q30, q31, [sp, #32*15];
44
45 #define RESTORE_Q_REGISTERS \
46 ldp q2, q3, [sp, #32*1]; \
47 ldp q4, q5, [sp, #32*2]; \
48 ldp q6, q7, [sp, #32*3]; \
49 ldp q8, q9, [sp, #32*4]; \
50 ldp q10, q11, [sp, #32*5]; \
51 ldp q12, q13, [sp, #32*6]; \
52 ldp q14, q15, [sp, #32*7]; \
53 ldp q16, q17, [sp, #32*8]; \
54 ldp q18, q19, [sp, #32*9]; \
55 ldp q20, q21, [sp, #32*10]; \
56 ldp q22, q23, [sp, #32*11]; \
57 ldp q24, q25, [sp, #32*12]; \
58 ldp q26, q27, [sp, #32*13]; \
59 ldp q28, q29, [sp, #32*14]; \
60 ldp q30, q31, [sp, #32*15]; \
61 ldp q0, q1, [sp], #32*NSAVEDQREGPAIRS; \
62 cfi_adjust_cfa_offset (-32*NSAVEDQREGPAIRS);
63
64 .text
65
66 /* Compute the thread pointer offset for symbols in the static
67 TLS block. The offset is the same for all threads.
68 Prototype:
69 _dl_tlsdesc_return (tlsdesc *) ;
70 */
71 .hidden _dl_tlsdesc_return
72 .global _dl_tlsdesc_return
73 .type _dl_tlsdesc_return,%function
74 cfi_startproc
75 .align 2
76 _dl_tlsdesc_return:
77 DELOUSE (0)
78 ldr PTR_REG (0), [x0, #PTR_SIZE]
79 RET
80 cfi_endproc
81 .size _dl_tlsdesc_return, .-_dl_tlsdesc_return
82
83 /* Same as _dl_tlsdesc_return but with synchronization for
84 lazy relocation.
85 Prototype:
86 _dl_tlsdesc_return_lazy (tlsdesc *) ;
87 */
88 .hidden _dl_tlsdesc_return_lazy
89 .global _dl_tlsdesc_return_lazy
90 .type _dl_tlsdesc_return_lazy,%function
91 cfi_startproc
92 .align 2
93 _dl_tlsdesc_return_lazy:
94 /* The ldar here happens after the load from [x0] at the call site
95 (that is generated by the compiler as part of the TLS access ABI),
96 so it reads the same value (this function is the final value of
97 td->entry) and thus it synchronizes with the release store to
98 td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
99 from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */
100 DELOUSE (0)
101 ldar PTR_REG (zr), [x0]
102 ldr PTR_REG (0), [x0, #PTR_SIZE]
103 RET
104 cfi_endproc
105 .size _dl_tlsdesc_return_lazy, .-_dl_tlsdesc_return_lazy
106
107 /* Handler for undefined weak TLS symbols.
108 Prototype:
109 _dl_tlsdesc_undefweak (tlsdesc *);
110
111 The second word of the descriptor contains the addend.
112 Return the addend minus the thread pointer. This ensures
113 that when the caller adds on the thread pointer it gets back
114 the addend. */
115
116 .hidden _dl_tlsdesc_undefweak
117 .global _dl_tlsdesc_undefweak
118 .type _dl_tlsdesc_undefweak,%function
119 cfi_startproc
120 .align 2
121 _dl_tlsdesc_undefweak:
122 str x1, [sp, #-16]!
123 cfi_adjust_cfa_offset (16)
124 /* The ldar here happens after the load from [x0] at the call site
125 (that is generated by the compiler as part of the TLS access ABI),
126 so it reads the same value (this function is the final value of
127 td->entry) and thus it synchronizes with the release store to
128 td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
129 from [x0,#8] here happens after the initialization of td->arg. */
130 DELOUSE (0)
131 ldar PTR_REG (zr), [x0]
132 ldr PTR_REG (0), [x0, #PTR_SIZE]
133 mrs x1, tpidr_el0
134 sub PTR_REG (0), PTR_REG (0), PTR_REG (1)
135 ldr x1, [sp], #16
136 cfi_adjust_cfa_offset (-16)
137 RET
138 cfi_endproc
139 .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
140
141 #ifdef SHARED
142 /* Handler for dynamic TLS symbols.
143 Prototype:
144 _dl_tlsdesc_dynamic (tlsdesc *) ;
145
146 The second word of the descriptor points to a
147 tlsdesc_dynamic_arg structure.
148
149 Returns the offset between the thread pointer and the
150 object referenced by the argument.
151
152 ptrdiff_t
153 __attribute__ ((__regparm__ (1)))
154 _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
155 {
156 struct tlsdesc_dynamic_arg *td = tdp->arg;
157 dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + TCBHEAD_DTV);
158 if (__builtin_expect (td->gen_count <= dtv[0].counter
159 && (dtv[td->tlsinfo.ti_module].pointer.val
160 != TLS_DTV_UNALLOCATED),
161 1))
162 return dtv[td->tlsinfo.ti_module].pointer.val
163 + td->tlsinfo.ti_offset
164 - __thread_pointer;
165
166 return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
167 }
168 */
169
170 .hidden _dl_tlsdesc_dynamic
171 .global _dl_tlsdesc_dynamic
172 .type _dl_tlsdesc_dynamic,%function
173 cfi_startproc
174 .align 2
175 _dl_tlsdesc_dynamic:
176 # define NSAVEXREGPAIRS 2
177 stp x29, x30, [sp,#-(32+16*NSAVEXREGPAIRS)]!
178 cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
179 mov x29, sp
180 DELOUSE (0)
181
182 /* Save just enough registers to support fast path, if we fall
183 into slow path we will save additional registers. */
184
185 stp x1, x2, [sp, #32+16*0]
186 stp x3, x4, [sp, #32+16*1]
187
188 mrs x4, tpidr_el0
189 /* The ldar here happens after the load from [x0] at the call site
190 (that is generated by the compiler as part of the TLS access ABI),
191 so it reads the same value (this function is the final value of
192 td->entry) and thus it synchronizes with the release store to
193 td->entry in _dl_tlsdesc_resolve_rela_fixup ensuring that the load
194 from [x0,#PTR_SIZE] here happens after the initialization of td->arg. */
195 ldar PTR_REG (zr), [x0]
196 ldr PTR_REG (1), [x0,#TLSDESC_ARG]
197 ldr PTR_REG (0), [x4,#TCBHEAD_DTV]
198 ldr PTR_REG (3), [x1,#TLSDESC_GEN_COUNT]
199 ldr PTR_REG (2), [x0,#DTV_COUNTER]
200 cmp PTR_REG (3), PTR_REG (2)
201 b.hi 2f
202 ldr PTR_REG (2), [x1,#TLSDESC_MODID]
203 add PTR_REG (0), PTR_REG (0), PTR_REG (2), lsl #(PTR_LOG_SIZE + 1)
204 ldr PTR_REG (0), [x0] /* Load val member of DTV entry. */
205 cmp x0, #TLS_DTV_UNALLOCATED
206 b.eq 2f
207 ldr PTR_REG (1), [x1,#TLSDESC_MODOFF]
208 add PTR_REG (0), PTR_REG (0), PTR_REG (1)
209 sub PTR_REG (0), PTR_REG (0), PTR_REG (4)
210 1:
211 ldp x1, x2, [sp, #32+16*0]
212 ldp x3, x4, [sp, #32+16*1]
213
214 ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
215 cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
216 # undef NSAVEXREGPAIRS
217 RET
218 2:
219 /* This is the slow path. We need to call __tls_get_addr() which
220 means we need to save and restore all the register that the
221 callee will trash. */
222
223 /* Save the remaining registers that we must treat as caller save. */
224 # define NSAVEXREGPAIRS 7
225 stp x5, x6, [sp, #-16*NSAVEXREGPAIRS]!
226 cfi_adjust_cfa_offset (16*NSAVEXREGPAIRS)
227 stp x7, x8, [sp, #16*1]
228 stp x9, x10, [sp, #16*2]
229 stp x11, x12, [sp, #16*3]
230 stp x13, x14, [sp, #16*4]
231 stp x15, x16, [sp, #16*5]
232 stp x17, x18, [sp, #16*6]
233
234 SAVE_Q_REGISTERS
235
236 mov x0, x1
237 bl __tls_get_addr
238
239 mrs x1, tpidr_el0
240 sub PTR_REG (0), PTR_REG (0), PTR_REG (1)
241
242 RESTORE_Q_REGISTERS
243
244 ldp x7, x8, [sp, #16*1]
245 ldp x9, x10, [sp, #16*2]
246 ldp x11, x12, [sp, #16*3]
247 ldp x13, x14, [sp, #16*4]
248 ldp x15, x16, [sp, #16*5]
249 ldp x17, x18, [sp, #16*6]
250 ldp x5, x6, [sp], #16*NSAVEXREGPAIRS
251 cfi_adjust_cfa_offset (-16*NSAVEXREGPAIRS)
252 b 1b
253 cfi_endproc
254 .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
255 # undef NSAVEXREGPAIRS
256 #endif
257
258 /* This function is a wrapper for a lazy resolver for TLS_DESC
259 RELA relocations.
260 When the actual resolver returns, it will have adjusted the
261 TLS descriptor such that we can tail-call it for it to return
262 the TP offset of the symbol. */
263
264 .hidden _dl_tlsdesc_resolve_rela
265 .global _dl_tlsdesc_resolve_rela
266 .type _dl_tlsdesc_resolve_rela,%function
267 cfi_startproc
268 .align 2
269 _dl_tlsdesc_resolve_rela:
270 #define NSAVEXREGPAIRS 9
271 stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
272 cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
273 mov x29, sp
274 stp x1, x4, [sp, #32+16*0]
275 stp x5, x6, [sp, #32+16*1]
276 stp x7, x8, [sp, #32+16*2]
277 stp x9, x10, [sp, #32+16*3]
278 stp x11, x12, [sp, #32+16*4]
279 stp x13, x14, [sp, #32+16*5]
280 stp x15, x16, [sp, #32+16*6]
281 stp x17, x18, [sp, #32+16*7]
282 str x0, [sp, #32+16*8]
283
284 SAVE_Q_REGISTERS
285
286 DELOUSE (3)
287 ldr PTR_REG (1), [x3, #PTR_SIZE]
288 bl _dl_tlsdesc_resolve_rela_fixup
289
290 RESTORE_Q_REGISTERS
291
292 ldr x0, [sp, #32+16*8]
293 DELOUSE (0)
294 ldr PTR_REG (1), [x0]
295 blr x1
296
297 ldp x1, x4, [sp, #32+16*0]
298 ldp x5, x6, [sp, #32+16*1]
299 ldp x7, x8, [sp, #32+16*2]
300 ldp x9, x10, [sp, #32+16*3]
301 ldp x11, x12, [sp, #32+16*4]
302 ldp x13, x14, [sp, #32+16*5]
303 ldp x15, x16, [sp, #32+16*6]
304 ldp x17, x18, [sp, #32+16*7]
305 ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
306 cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
307 ldp x2, x3, [sp], #16
308 cfi_adjust_cfa_offset (-16)
309 RET
310 #undef NSAVEXREGPAIRS
311 cfi_endproc
312 .size _dl_tlsdesc_resolve_rela, .-_dl_tlsdesc_resolve_rela
313
314 /* This function is a placeholder for lazy resolving of TLS
315 relocations. Once some thread starts resolving a TLS
316 relocation, it sets up the TLS descriptor to use this
317 resolver, such that other threads that would attempt to
318 resolve it concurrently may skip the call to the original lazy
319 resolver and go straight to a condition wait.
320
321 When the actual resolver returns, it will have adjusted the
322 TLS descriptor such that we can tail-call it for it to return
323 the TP offset of the symbol. */
324
325 .hidden _dl_tlsdesc_resolve_hold
326 .global _dl_tlsdesc_resolve_hold
327 .type _dl_tlsdesc_resolve_hold,%function
328 cfi_startproc
329 .align 2
330 _dl_tlsdesc_resolve_hold:
331 #define NSAVEXREGPAIRS 10
332 1:
333 stp x29, x30, [sp, #-(32+16*NSAVEXREGPAIRS)]!
334 cfi_adjust_cfa_offset (32+16*NSAVEXREGPAIRS)
335 mov x29, sp
336 stp x1, x2, [sp, #32+16*0]
337 stp x3, x4, [sp, #32+16*1]
338 stp x5, x6, [sp, #32+16*2]
339 stp x7, x8, [sp, #32+16*3]
340 stp x9, x10, [sp, #32+16*4]
341 stp x11, x12, [sp, #32+16*5]
342 stp x13, x14, [sp, #32+16*6]
343 stp x15, x16, [sp, #32+16*7]
344 stp x17, x18, [sp, #32+16*8]
345 str x0, [sp, #32+16*9]
346
347 SAVE_Q_REGISTERS
348
349 adr x1, 1b
350 bl _dl_tlsdesc_resolve_hold_fixup
351
352 RESTORE_Q_REGISTERS
353
354 ldr x0, [sp, #32+16*9]
355 DELOUSE (0)
356 ldr PTR_REG (1), [x0]
357 blr x1
358
359 ldp x1, x2, [sp, #32+16*0]
360 ldp x3, x4, [sp, #32+16*1]
361 ldp x5, x6, [sp, #32+16*2]
362 ldp x7, x8, [sp, #32+16*3]
363 ldp x9, x10, [sp, #32+16*4]
364 ldp x11, x12, [sp, #32+16*5]
365 ldp x13, x14, [sp, #32+16*6]
366 ldp x15, x16, [sp, #32+16*7]
367 ldp x17, x18, [sp, #32+16*8]
368 ldp x29, x30, [sp], #(32+16*NSAVEXREGPAIRS)
369 cfi_adjust_cfa_offset (-32-16*NSAVEXREGPAIRS)
370 RET
371 cfi_endproc
372 .size _dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
373 #undef NSAVEXREGPAIRS