1 /* Out-of-line LSE atomics for AArch64 architecture.
2 Copyright (C) 2019-2024 Free Software Foundation, Inc.
3 Contributed by Linaro Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
27 * The problem that we are trying to solve is operating system deployment
28 * of ARMv8.1-Atomics, also known as Large System Exensions (LSE).
30 * There are a number of potential solutions for this problem which have
31 * been proposed and rejected for various reasons. To recap:
33 * (1) Multiple builds. The dynamic linker will examine /lib64/atomics/
34 * if HWCAP_ATOMICS is set, allowing entire libraries to be overwritten.
35 * However, not all Linux distributions are happy with multiple builds,
36 * and anyway it has no effect on main applications.
38 * (2) IFUNC. We could put these functions into libgcc_s.so, and have
39 * a single copy of each function for all DSOs. However, ARM is concerned
40 * that the branch-to-indirect-branch that is implied by using a PLT,
41 * as required by IFUNC, is too much overhead for smaller cpus.
43 * (3) Statically predicted direct branches. This is the approach that
44 * is taken here. These functions are linked into every DSO that uses them.
45 * All of the symbols are hidden, so that the functions are called via a
46 * direct branch. The choice of LSE vs non-LSE is done via one byte load
47 * followed by a well-predicted direct branch. The functions are compiled
48 * separately to minimize code size.
50 * Since these functions have hidden visibility and are never called
51 * indirectly, they do not need to start with a BTI instruction.
54 #include "auto-target.h"
56 /* Tell the assembler to accept LSE instructions. */
63 /* Declare the symbol gating the LSE implementations. */
64 .hidden __aarch64_have_lse_atomics
66 /* Turn size and memory model defines into mnemonic fragments. */
75 #elif SIZE == 4 || SIZE == 8 || SIZE == 16
109 # define SUFF _acq_rel
118 /* swp has _acq semantics. */
124 /* All other _sync functions have _seq semantics. */
130 # define BARRIER dmb ish
135 /* Concatenate symbols. */
136 #define glue2_(A, B) A ## B
137 #define glue2(A, B) glue2_(A, B)
138 #define glue3_(A, B, C) A ## B ## C
139 #define glue3(A, B, C) glue3_(A, B, C)
140 #define glue4_(A, B, C, D) A ## B ## C ## D
141 #define glue4(A, B, C, D) glue4_(A, B, C, D)
143 /* Select the size of a register, given a regno. */
144 #define x(N) glue2(x, N)
145 #define w(N) glue2(w, N)
152 #define NAME(BASE) glue4(__aarch64_, BASE, SIZE, SUFF)
154 /* Drop A for _sync functions. */
155 # define LDXR glue3(ld, xr, S)
157 # define LDXR glue4(ld, A, xr, S)
159 #define STXR glue4(st, L, xr, S)
161 /* Temporary registers used. Other than these, only the return value
162 register (x0) and the flags are modified. */
169 /* Start and end a function. */
175 .type \name, %function
182 .size \name, . - \name
185 /* Branch to LABEL if LSE is disabled. */
186 .macro JUMP_IF_NOT_LSE label
187 adrp x(tmp0), __aarch64_have_lse_atomics
188 ldrb w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics]
199 # define CAS glue4(cas, A, L, S) s(0), s(1), [x2]
201 # define CAS .inst 0x08a07c41 + B + M
204 CAS /* s(0), s(1), [x2] */
211 STXR w(tmp1), s(1), [x2]
218 /* Drop A for _sync functions. */
219 # define LDXP glue2(ld, xp)
221 # define LDXP glue3(ld, A, xp)
223 #define STXP glue3(st, L, xp)
225 # define CASP glue3(casp, A, L) x0, x1, x2, x3, [x4]
227 # define CASP .inst 0x48207c82 + M
230 CASP /* x0, x1, x2, x3, [x4] */
237 ccmp x1, x(tmp1), #0, eq
238 csel x(tmp2), x2, x0, eq
239 csel x(tmp3), x3, x1, eq
240 STXP w(tmp4), x(tmp2), x(tmp3), [x4]
252 # define SWP glue4(swp, A, L, S) s(0), s(0), [x1]
254 # define SWP .inst 0x38208020 + B + N
260 SWP /* s(0), s(0), [x1] */
265 STXR w(tmp1), s(tmp0), [x1]
273 #if defined(L_ldadd) || defined(L_ldclr) \
274 || defined(L_ldeor) || defined(L_ldset)
280 #elif defined(L_ldclr)
284 #elif defined(L_ldeor)
288 #elif defined(L_ldset)
296 # define LDOP glue4(LDNM, A, L, S) s(0), s(0), [x1]
298 # define LDOP .inst 0x38200020 + OPN + B + N
304 LDOP /* s(0), s(0), [x1] */
309 OP s(tmp1), s(0), s(tmp0)
310 STXR w(tmp2), s(tmp1), [x1]
318 /* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */
319 #define FEATURE_1_AND 0xc0000000
320 #define FEATURE_1_BTI 1
321 #define FEATURE_1_PAC 2
323 /* Supported features based on the code generation options. */
324 #if defined(__ARM_FEATURE_BTI_DEFAULT)
325 # define BTI_FLAG FEATURE_1_BTI
330 #if __ARM_FEATURE_PAC_DEFAULT & 3
331 # define PAC_FLAG FEATURE_1_PAC
336 /* Add a NT_GNU_PROPERTY_TYPE_0 note. */
337 #define GNU_PROPERTY(type, value) \
338 .section .note.gnu.property, "a"; \
349 #if defined(__linux__) || defined(__FreeBSD__)
350 .section .note.GNU-stack, "", %progbits
352 /* Add GNU property note if built with branch protection. */
353 # if (BTI_FLAG|PAC_FLAG) != 0
354 GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)