]>
Commit | Line | Data |
---|---|---|
33befddc | 1 | /* Out-of-line LSE atomics for AArch64 architecture. |
8d9254fc | 2 | Copyright (C) 2019-2020 Free Software Foundation, Inc. |
33befddc RH |
3 | Contributed by Linaro Ltd. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it under | |
8 | the terms of the GNU General Public License as published by the Free | |
9 | Software Foundation; either version 3, or (at your option) any later | |
10 | version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 | for more details. | |
16 | ||
17 | Under Section 7 of GPL version 3, you are granted additional | |
18 | permissions described in the GCC Runtime Library Exception, version | |
19 | 3.1, as published by the Free Software Foundation. | |
20 | ||
21 | You should have received a copy of the GNU General Public License and | |
22 | a copy of the GCC Runtime Library Exception along with this program; | |
23 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
24 | <http://www.gnu.org/licenses/>. */ | |
25 | ||
26 | /* | |
27 | * The problem that we are trying to solve is operating system deployment | |
28 | * of ARMv8.1-Atomics, also known as Large System Exensions (LSE). | |
29 | * | |
30 | * There are a number of potential solutions for this problem which have | |
31 | * been proposed and rejected for various reasons. To recap: | |
32 | * | |
33 | * (1) Multiple builds. The dynamic linker will examine /lib64/atomics/ | |
34 | * if HWCAP_ATOMICS is set, allowing entire libraries to be overwritten. | |
35 | * However, not all Linux distributions are happy with multiple builds, | |
36 | * and anyway it has no effect on main applications. | |
37 | * | |
38 | * (2) IFUNC. We could put these functions into libgcc_s.so, and have | |
39 | * a single copy of each function for all DSOs. However, ARM is concerned | |
40 | * that the branch-to-indirect-branch that is implied by using a PLT, | |
41 | * as required by IFUNC, is too much overhead for smaller cpus. | |
42 | * | |
43 | * (3) Statically predicted direct branches. This is the approach that | |
44 | * is taken here. These functions are linked into every DSO that uses them. | |
45 | * All of the symbols are hidden, so that the functions are called via a | |
46 | * direct branch. The choice of LSE vs non-LSE is done via one byte load | |
47 | * followed by a well-predicted direct branch. The functions are compiled | |
48 | * separately to minimize code size. | |
49 | */ | |
50 | ||
51 | /* Tell the assembler to accept LSE instructions. */ | |
52 | .arch armv8-a+lse | |
53 | ||
54 | /* Declare the symbol gating the LSE implementations. */ | |
55 | .hidden __aarch64_have_lse_atomics | |
56 | ||
57 | /* Turn size and memory model defines into mnemonic fragments. */ | |
58 | #if SIZE == 1 | |
59 | # define S b | |
60 | # define UXT uxtb | |
61 | #elif SIZE == 2 | |
62 | # define S h | |
63 | # define UXT uxth | |
64 | #elif SIZE == 4 || SIZE == 8 || SIZE == 16 | |
65 | # define S | |
66 | # define UXT mov | |
67 | #else | |
68 | # error | |
69 | #endif | |
70 | ||
71 | #if MODEL == 1 | |
72 | # define SUFF _relax | |
73 | # define A | |
74 | # define L | |
75 | #elif MODEL == 2 | |
76 | # define SUFF _acq | |
77 | # define A a | |
78 | # define L | |
79 | #elif MODEL == 3 | |
80 | # define SUFF _rel | |
81 | # define A | |
82 | # define L l | |
83 | #elif MODEL == 4 | |
84 | # define SUFF _acq_rel | |
85 | # define A a | |
86 | # define L l | |
87 | #else | |
88 | # error | |
89 | #endif | |
90 | ||
91 | /* Concatenate symbols. */ | |
92 | #define glue2_(A, B) A ## B | |
93 | #define glue2(A, B) glue2_(A, B) | |
94 | #define glue3_(A, B, C) A ## B ## C | |
95 | #define glue3(A, B, C) glue3_(A, B, C) | |
96 | #define glue4_(A, B, C, D) A ## B ## C ## D | |
97 | #define glue4(A, B, C, D) glue4_(A, B, C, D) | |
98 | ||
99 | /* Select the size of a register, given a regno. */ | |
100 | #define x(N) glue2(x, N) | |
101 | #define w(N) glue2(w, N) | |
102 | #if SIZE < 8 | |
103 | # define s(N) w(N) | |
104 | #else | |
105 | # define s(N) x(N) | |
106 | #endif | |
107 | ||
108 | #define NAME(BASE) glue4(__aarch64_, BASE, SIZE, SUFF) | |
109 | #define LDXR glue4(ld, A, xr, S) | |
110 | #define STXR glue4(st, L, xr, S) | |
111 | ||
112 | /* Temporary registers used. Other than these, only the return value | |
113 | register (x0) and the flags are modified. */ | |
114 | #define tmp0 16 | |
115 | #define tmp1 17 | |
116 | #define tmp2 15 | |
117 | ||
118 | /* Start and end a function. */ | |
119 | .macro STARTFN name | |
120 | .text | |
121 | .balign 16 | |
122 | .globl \name | |
123 | .hidden \name | |
124 | .type \name, %function | |
125 | .cfi_startproc | |
126 | \name: | |
127 | .endm | |
128 | ||
129 | .macro ENDFN name | |
130 | .cfi_endproc | |
131 | .size \name, . - \name | |
132 | .endm | |
133 | ||
134 | /* Branch to LABEL if LSE is disabled. */ | |
135 | .macro JUMP_IF_NOT_LSE label | |
136 | adrp x(tmp0), __aarch64_have_lse_atomics | |
137 | ldrb w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics] | |
138 | cbz w(tmp0), \label | |
139 | .endm | |
140 | ||
141 | #ifdef L_cas | |
142 | ||
143 | STARTFN NAME(cas) | |
144 | JUMP_IF_NOT_LSE 8f | |
145 | ||
146 | #if SIZE < 16 | |
147 | #define CAS glue4(cas, A, L, S) | |
148 | ||
149 | CAS s(0), s(1), [x2] | |
150 | ret | |
151 | ||
152 | 8: UXT s(tmp0), s(0) | |
153 | 0: LDXR s(0), [x2] | |
154 | cmp s(0), s(tmp0) | |
155 | bne 1f | |
156 | STXR w(tmp1), s(1), [x2] | |
157 | cbnz w(tmp1), 0b | |
158 | 1: ret | |
159 | ||
160 | #else | |
161 | #define LDXP glue3(ld, A, xp) | |
162 | #define STXP glue3(st, L, xp) | |
163 | #define CASP glue3(casp, A, L) | |
164 | ||
165 | CASP x0, x1, x2, x3, [x4] | |
166 | ret | |
167 | ||
168 | 8: mov x(tmp0), x0 | |
169 | mov x(tmp1), x1 | |
170 | 0: LDXP x0, x1, [x4] | |
171 | cmp x0, x(tmp0) | |
172 | ccmp x1, x(tmp1), #0, eq | |
173 | bne 1f | |
174 | STXP w(tmp2), x(tmp0), x(tmp1), [x4] | |
175 | cbnz w(tmp2), 0b | |
176 | 1: ret | |
177 | ||
178 | #endif | |
179 | ||
180 | ENDFN NAME(cas) | |
181 | #endif | |
182 | ||
183 | #ifdef L_swp | |
184 | #define SWP glue4(swp, A, L, S) | |
185 | ||
186 | STARTFN NAME(swp) | |
187 | JUMP_IF_NOT_LSE 8f | |
188 | ||
189 | SWP s(0), s(0), [x1] | |
190 | ret | |
191 | ||
192 | 8: mov s(tmp0), s(0) | |
193 | 0: LDXR s(0), [x1] | |
194 | STXR w(tmp1), s(tmp0), [x1] | |
195 | cbnz w(tmp1), 0b | |
196 | ret | |
197 | ||
198 | ENDFN NAME(swp) | |
199 | #endif | |
200 | ||
201 | #if defined(L_ldadd) || defined(L_ldclr) \ | |
202 | || defined(L_ldeor) || defined(L_ldset) | |
203 | ||
204 | #ifdef L_ldadd | |
205 | #define LDNM ldadd | |
206 | #define OP add | |
207 | #elif defined(L_ldclr) | |
208 | #define LDNM ldclr | |
209 | #define OP bic | |
210 | #elif defined(L_ldeor) | |
211 | #define LDNM ldeor | |
212 | #define OP eor | |
213 | #elif defined(L_ldset) | |
214 | #define LDNM ldset | |
215 | #define OP orr | |
216 | #else | |
217 | #error | |
218 | #endif | |
219 | #define LDOP glue4(LDNM, A, L, S) | |
220 | ||
221 | STARTFN NAME(LDNM) | |
222 | JUMP_IF_NOT_LSE 8f | |
223 | ||
224 | LDOP s(0), s(0), [x1] | |
225 | ret | |
226 | ||
227 | 8: mov s(tmp0), s(0) | |
228 | 0: LDXR s(0), [x1] | |
229 | OP s(tmp1), s(0), s(tmp0) | |
88a51d68 RH |
230 | STXR w(tmp2), s(tmp1), [x1] |
231 | cbnz w(tmp2), 0b | |
33befddc RH |
232 | ret |
233 | ||
234 | ENDFN NAME(LDNM) | |
235 | #endif |