]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgcc/config/aarch64/lse.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / aarch64 / lse.S
1 /* Out-of-line LSE atomics for AArch64 architecture.
2 Copyright (C) 2019-2024 Free Software Foundation, Inc.
3 Contributed by Linaro Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26 /*
27 * The problem that we are trying to solve is operating system deployment
28 * of ARMv8.1-Atomics, also known as Large System Exensions (LSE).
29 *
30 * There are a number of potential solutions for this problem which have
31 * been proposed and rejected for various reasons. To recap:
32 *
33 * (1) Multiple builds. The dynamic linker will examine /lib64/atomics/
34 * if HWCAP_ATOMICS is set, allowing entire libraries to be overwritten.
35 * However, not all Linux distributions are happy with multiple builds,
36 * and anyway it has no effect on main applications.
37 *
38 * (2) IFUNC. We could put these functions into libgcc_s.so, and have
39 * a single copy of each function for all DSOs. However, ARM is concerned
40 * that the branch-to-indirect-branch that is implied by using a PLT,
41 * as required by IFUNC, is too much overhead for smaller cpus.
42 *
43 * (3) Statically predicted direct branches. This is the approach that
44 * is taken here. These functions are linked into every DSO that uses them.
45 * All of the symbols are hidden, so that the functions are called via a
46 * direct branch. The choice of LSE vs non-LSE is done via one byte load
47 * followed by a well-predicted direct branch. The functions are compiled
48 * separately to minimize code size.
49 *
50 * Since these functions have hidden visibility and are never called
51 * indirectly, they do not need to start with a BTI instruction.
52 */
53
54 #include "auto-target.h"
55
56 /* Tell the assembler to accept LSE instructions. */
57 #ifdef HAVE_AS_LSE
58 .arch armv8-a+lse
59 #else
60 .arch armv8-a
61 #endif
62
63 /* Declare the symbol gating the LSE implementations. */
64 .hidden __aarch64_have_lse_atomics
65
66 /* Turn size and memory model defines into mnemonic fragments. */
67 #if SIZE == 1
68 # define S b
69 # define UXT uxtb
70 # define B 0x00000000
71 #elif SIZE == 2
72 # define S h
73 # define UXT uxth
74 # define B 0x40000000
75 #elif SIZE == 4 || SIZE == 8 || SIZE == 16
76 # define S
77 # define UXT mov
78 # if SIZE == 4
79 # define B 0x80000000
80 # elif SIZE == 8
81 # define B 0xc0000000
82 # endif
83 #else
84 # error
85 #endif
86
87 #if MODEL == 1
88 # define SUFF _relax
89 # define A
90 # define L
91 # define M 0x000000
92 # define N 0x000000
93 # define BARRIER
94 #elif MODEL == 2
95 # define SUFF _acq
96 # define A a
97 # define L
98 # define M 0x400000
99 # define N 0x800000
100 # define BARRIER
101 #elif MODEL == 3
102 # define SUFF _rel
103 # define A
104 # define L l
105 # define M 0x008000
106 # define N 0x400000
107 # define BARRIER
108 #elif MODEL == 4
109 # define SUFF _acq_rel
110 # define A a
111 # define L l
112 # define M 0x408000
113 # define N 0xc00000
114 # define BARRIER
115 #elif MODEL == 5
116 # define SUFF _sync
117 #ifdef L_swp
118 /* swp has _acq semantics. */
119 # define A a
120 # define L
121 # define M 0x400000
122 # define N 0x800000
123 #else
124 /* All other _sync functions have _seq semantics. */
125 # define A a
126 # define L l
127 # define M 0x408000
128 # define N 0xc00000
129 #endif
130 # define BARRIER dmb ish
131 #else
132 # error
133 #endif
134
135 /* Concatenate symbols. */
136 #define glue2_(A, B) A ## B
137 #define glue2(A, B) glue2_(A, B)
138 #define glue3_(A, B, C) A ## B ## C
139 #define glue3(A, B, C) glue3_(A, B, C)
140 #define glue4_(A, B, C, D) A ## B ## C ## D
141 #define glue4(A, B, C, D) glue4_(A, B, C, D)
142
143 /* Select the size of a register, given a regno. */
144 #define x(N) glue2(x, N)
145 #define w(N) glue2(w, N)
146 #if SIZE < 8
147 # define s(N) w(N)
148 #else
149 # define s(N) x(N)
150 #endif
151
152 #define NAME(BASE) glue4(__aarch64_, BASE, SIZE, SUFF)
153 #if MODEL == 5
154 /* Drop A for _sync functions. */
155 # define LDXR glue3(ld, xr, S)
156 #else
157 # define LDXR glue4(ld, A, xr, S)
158 #endif
159 #define STXR glue4(st, L, xr, S)
160
161 /* Temporary registers used. Other than these, only the return value
162 register (x0) and the flags are modified. */
163 #define tmp0 16
164 #define tmp1 17
165 #define tmp2 15
166 #define tmp3 14
167 #define tmp4 13
168
169 /* Start and end a function. */
170 .macro STARTFN name
171 .text
172 .balign 16
173 .globl \name
174 .hidden \name
175 .type \name, %function
176 .cfi_startproc
177 \name:
178 .endm
179
180 .macro ENDFN name
181 .cfi_endproc
182 .size \name, . - \name
183 .endm
184
185 /* Branch to LABEL if LSE is disabled. */
186 .macro JUMP_IF_NOT_LSE label
187 adrp x(tmp0), __aarch64_have_lse_atomics
188 ldrb w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics]
189 cbz w(tmp0), \label
190 .endm
191
192 #ifdef L_cas
193
194 STARTFN NAME(cas)
195 JUMP_IF_NOT_LSE 8f
196
197 #if SIZE < 16
198 #ifdef HAVE_AS_LSE
199 # define CAS glue4(cas, A, L, S) s(0), s(1), [x2]
200 #else
201 # define CAS .inst 0x08a07c41 + B + M
202 #endif
203
204 CAS /* s(0), s(1), [x2] */
205 ret
206
207 8: UXT s(tmp0), s(0)
208 0: LDXR s(0), [x2]
209 cmp s(0), s(tmp0)
210 bne 1f
211 STXR w(tmp1), s(1), [x2]
212 cbnz w(tmp1), 0b
213 1: BARRIER
214 ret
215
216 #else
217 #if MODEL == 5
218 /* Drop A for _sync functions. */
219 # define LDXP glue2(ld, xp)
220 #else
221 # define LDXP glue3(ld, A, xp)
222 #endif
223 #define STXP glue3(st, L, xp)
224 #ifdef HAVE_AS_LSE
225 # define CASP glue3(casp, A, L) x0, x1, x2, x3, [x4]
226 #else
227 # define CASP .inst 0x48207c82 + M
228 #endif
229
230 CASP /* x0, x1, x2, x3, [x4] */
231 ret
232
233 8: mov x(tmp0), x0
234 mov x(tmp1), x1
235 0: LDXP x0, x1, [x4]
236 cmp x0, x(tmp0)
237 ccmp x1, x(tmp1), #0, eq
238 csel x(tmp2), x2, x0, eq
239 csel x(tmp3), x3, x1, eq
240 STXP w(tmp4), x(tmp2), x(tmp3), [x4]
241 cbnz w(tmp4), 0b
242 BARRIER
243 ret
244
245 #endif
246
247 ENDFN NAME(cas)
248 #endif
249
250 #ifdef L_swp
251 #ifdef HAVE_AS_LSE
252 # define SWP glue4(swp, A, L, S) s(0), s(0), [x1]
253 #else
254 # define SWP .inst 0x38208020 + B + N
255 #endif
256
257 STARTFN NAME(swp)
258 JUMP_IF_NOT_LSE 8f
259
260 SWP /* s(0), s(0), [x1] */
261 ret
262
263 8: mov s(tmp0), s(0)
264 0: LDXR s(0), [x1]
265 STXR w(tmp1), s(tmp0), [x1]
266 cbnz w(tmp1), 0b
267 BARRIER
268 ret
269
270 ENDFN NAME(swp)
271 #endif
272
273 #if defined(L_ldadd) || defined(L_ldclr) \
274 || defined(L_ldeor) || defined(L_ldset)
275
276 #ifdef L_ldadd
277 #define LDNM ldadd
278 #define OP add
279 #define OPN 0x0000
280 #elif defined(L_ldclr)
281 #define LDNM ldclr
282 #define OP bic
283 #define OPN 0x1000
284 #elif defined(L_ldeor)
285 #define LDNM ldeor
286 #define OP eor
287 #define OPN 0x2000
288 #elif defined(L_ldset)
289 #define LDNM ldset
290 #define OP orr
291 #define OPN 0x3000
292 #else
293 #error
294 #endif
295 #ifdef HAVE_AS_LSE
296 # define LDOP glue4(LDNM, A, L, S) s(0), s(0), [x1]
297 #else
298 # define LDOP .inst 0x38200020 + OPN + B + N
299 #endif
300
301 STARTFN NAME(LDNM)
302 JUMP_IF_NOT_LSE 8f
303
304 LDOP /* s(0), s(0), [x1] */
305 ret
306
307 8: mov s(tmp0), s(0)
308 0: LDXR s(0), [x1]
309 OP s(tmp1), s(0), s(tmp0)
310 STXR w(tmp2), s(tmp1), [x1]
311 cbnz w(tmp2), 0b
312 BARRIER
313 ret
314
315 ENDFN NAME(LDNM)
316 #endif
317
318 /* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */
319 #define FEATURE_1_AND 0xc0000000
320 #define FEATURE_1_BTI 1
321 #define FEATURE_1_PAC 2
322
323 /* Supported features based on the code generation options. */
324 #if defined(__ARM_FEATURE_BTI_DEFAULT)
325 # define BTI_FLAG FEATURE_1_BTI
326 #else
327 # define BTI_FLAG 0
328 #endif
329
330 #if __ARM_FEATURE_PAC_DEFAULT & 3
331 # define PAC_FLAG FEATURE_1_PAC
332 #else
333 # define PAC_FLAG 0
334 #endif
335
336 /* Add a NT_GNU_PROPERTY_TYPE_0 note. */
337 #define GNU_PROPERTY(type, value) \
338 .section .note.gnu.property, "a"; \
339 .p2align 3; \
340 .word 4; \
341 .word 16; \
342 .word 5; \
343 .asciz "GNU"; \
344 .word type; \
345 .word 4; \
346 .word value; \
347 .word 0;
348
349 #if defined(__linux__) || defined(__FreeBSD__)
350 .section .note.GNU-stack, "", %progbits
351
352 /* Add GNU property note if built with branch protection. */
353 # if (BTI_FLAG|PAC_FLAG) != 0
354 GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)
355 # endif
356 #endif