]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/aarch64/lse.S
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / aarch64 / lse.S
CommitLineData
33befddc 1/* Out-of-line LSE atomics for AArch64 architecture.
8d9254fc 2 Copyright (C) 2019-2020 Free Software Foundation, Inc.
33befddc
RH
3 Contributed by Linaro Ltd.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17Under Section 7 of GPL version 3, you are granted additional
18permissions described in the GCC Runtime Library Exception, version
193.1, as published by the Free Software Foundation.
20
21You should have received a copy of the GNU General Public License and
22a copy of the GCC Runtime Library Exception along with this program;
23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24<http://www.gnu.org/licenses/>. */
25
26/*
27 * The problem that we are trying to solve is operating system deployment
28 * of ARMv8.1-Atomics, also known as Large System Exensions (LSE).
29 *
30 * There are a number of potential solutions for this problem which have
31 * been proposed and rejected for various reasons. To recap:
32 *
33 * (1) Multiple builds. The dynamic linker will examine /lib64/atomics/
34 * if HWCAP_ATOMICS is set, allowing entire libraries to be overwritten.
35 * However, not all Linux distributions are happy with multiple builds,
36 * and anyway it has no effect on main applications.
37 *
38 * (2) IFUNC. We could put these functions into libgcc_s.so, and have
39 * a single copy of each function for all DSOs. However, ARM is concerned
40 * that the branch-to-indirect-branch that is implied by using a PLT,
41 * as required by IFUNC, is too much overhead for smaller cpus.
42 *
43 * (3) Statically predicted direct branches. This is the approach that
44 * is taken here. These functions are linked into every DSO that uses them.
45 * All of the symbols are hidden, so that the functions are called via a
46 * direct branch. The choice of LSE vs non-LSE is done via one byte load
47 * followed by a well-predicted direct branch. The functions are compiled
48 * separately to minimize code size.
49 */
50
51/* Tell the assembler to accept LSE instructions. */
52 .arch armv8-a+lse
53
54/* Declare the symbol gating the LSE implementations. */
55 .hidden __aarch64_have_lse_atomics
56
57/* Turn size and memory model defines into mnemonic fragments. */
58#if SIZE == 1
59# define S b
60# define UXT uxtb
61#elif SIZE == 2
62# define S h
63# define UXT uxth
64#elif SIZE == 4 || SIZE == 8 || SIZE == 16
65# define S
66# define UXT mov
67#else
68# error
69#endif
70
71#if MODEL == 1
72# define SUFF _relax
73# define A
74# define L
75#elif MODEL == 2
76# define SUFF _acq
77# define A a
78# define L
79#elif MODEL == 3
80# define SUFF _rel
81# define A
82# define L l
83#elif MODEL == 4
84# define SUFF _acq_rel
85# define A a
86# define L l
87#else
88# error
89#endif
90
91/* Concatenate symbols. */
92#define glue2_(A, B) A ## B
93#define glue2(A, B) glue2_(A, B)
94#define glue3_(A, B, C) A ## B ## C
95#define glue3(A, B, C) glue3_(A, B, C)
96#define glue4_(A, B, C, D) A ## B ## C ## D
97#define glue4(A, B, C, D) glue4_(A, B, C, D)
98
99/* Select the size of a register, given a regno. */
100#define x(N) glue2(x, N)
101#define w(N) glue2(w, N)
102#if SIZE < 8
103# define s(N) w(N)
104#else
105# define s(N) x(N)
106#endif
107
108#define NAME(BASE) glue4(__aarch64_, BASE, SIZE, SUFF)
109#define LDXR glue4(ld, A, xr, S)
110#define STXR glue4(st, L, xr, S)
111
112/* Temporary registers used. Other than these, only the return value
113 register (x0) and the flags are modified. */
114#define tmp0 16
115#define tmp1 17
116#define tmp2 15
117
118/* Start and end a function. */
119.macro STARTFN name
120 .text
121 .balign 16
122 .globl \name
123 .hidden \name
124 .type \name, %function
125 .cfi_startproc
126\name:
127.endm
128
129.macro ENDFN name
130 .cfi_endproc
131 .size \name, . - \name
132.endm
133
134/* Branch to LABEL if LSE is disabled. */
135.macro JUMP_IF_NOT_LSE label
136 adrp x(tmp0), __aarch64_have_lse_atomics
137 ldrb w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics]
138 cbz w(tmp0), \label
139.endm
140
141#ifdef L_cas
142
143STARTFN NAME(cas)
144 JUMP_IF_NOT_LSE 8f
145
146#if SIZE < 16
147#define CAS glue4(cas, A, L, S)
148
149 CAS s(0), s(1), [x2]
150 ret
151
1528: UXT s(tmp0), s(0)
1530: LDXR s(0), [x2]
154 cmp s(0), s(tmp0)
155 bne 1f
156 STXR w(tmp1), s(1), [x2]
157 cbnz w(tmp1), 0b
1581: ret
159
160#else
161#define LDXP glue3(ld, A, xp)
162#define STXP glue3(st, L, xp)
163#define CASP glue3(casp, A, L)
164
165 CASP x0, x1, x2, x3, [x4]
166 ret
167
1688: mov x(tmp0), x0
169 mov x(tmp1), x1
1700: LDXP x0, x1, [x4]
171 cmp x0, x(tmp0)
172 ccmp x1, x(tmp1), #0, eq
173 bne 1f
174 STXP w(tmp2), x(tmp0), x(tmp1), [x4]
175 cbnz w(tmp2), 0b
1761: ret
177
178#endif
179
180ENDFN NAME(cas)
181#endif
182
183#ifdef L_swp
184#define SWP glue4(swp, A, L, S)
185
186STARTFN NAME(swp)
187 JUMP_IF_NOT_LSE 8f
188
189 SWP s(0), s(0), [x1]
190 ret
191
1928: mov s(tmp0), s(0)
1930: LDXR s(0), [x1]
194 STXR w(tmp1), s(tmp0), [x1]
195 cbnz w(tmp1), 0b
196 ret
197
198ENDFN NAME(swp)
199#endif
200
201#if defined(L_ldadd) || defined(L_ldclr) \
202 || defined(L_ldeor) || defined(L_ldset)
203
204#ifdef L_ldadd
205#define LDNM ldadd
206#define OP add
207#elif defined(L_ldclr)
208#define LDNM ldclr
209#define OP bic
210#elif defined(L_ldeor)
211#define LDNM ldeor
212#define OP eor
213#elif defined(L_ldset)
214#define LDNM ldset
215#define OP orr
216#else
217#error
218#endif
219#define LDOP glue4(LDNM, A, L, S)
220
221STARTFN NAME(LDNM)
222 JUMP_IF_NOT_LSE 8f
223
224 LDOP s(0), s(0), [x1]
225 ret
226
2278: mov s(tmp0), s(0)
2280: LDXR s(0), [x1]
229 OP s(tmp1), s(0), s(tmp0)
88a51d68
RH
230 STXR w(tmp2), s(tmp1), [x1]
231 cbnz w(tmp2), 0b
33befddc
RH
232 ret
233
234ENDFN NAME(LDNM)
235#endif