]> git.ipfire.org Git - thirdparty/gcc.git/blame - libgcc/config/gcn/amdgcn_veclib.h
Update copyright years.
[thirdparty/gcc.git] / libgcc / config / gcn / amdgcn_veclib.h
CommitLineData
d9d67745
AS
1/* Macro library used to help during conversion of scalar math functions to
2 vectorized SIMD equivalents on AMD GCN.
3
a945c346 4 Copyright (C) 2023-2024 Free Software Foundation, Inc.
d9d67745
AS
5 Contributed by Siemens.
6
7 This file is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 This file is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 Under Section 7 of GPL version 3, you are granted additional
18 permissions described in the GCC Runtime Library Exception, version
19 3.1, as published by the Free Software Foundation.
20
21 You should have received a copy of the GNU General Public License and
22 a copy of the GCC Runtime Library Exception along with this program;
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24 <http://www.gnu.org/licenses/>. */
25
26typedef union {
27 v2sf t_v2sf;
28 v4sf t_v4sf;
29 v8sf t_v8sf;
30 v16sf t_v16sf;
31 v32sf t_v32sf;
32 v64sf t_v64sf;
33
34 v2df t_v2df;
35 v4df t_v4df;
36 v8df t_v8df;
37 v16df t_v16df;
38 v32df t_v32df;
39 v64df t_v64df;
40
41 v64qi t_v64qi;
42 v64hi t_v64hi;
43
44 v2si t_v2si;
45 v4si t_v4si;
46 v8si t_v8si;
47 v16si t_v16si;
48 v32si t_v32si;
49 v64si t_v64si;
50
51 v64usi t_v64usi;
52
53 v2di t_v2di;
54 v4di t_v4di;
55 v8di t_v8di;
56 v16di t_v16di;
57 v32di t_v32di;
58 v64di t_v64di;
59} vector_union;
60
61/* Cast between vectors with a different number of elements, or type. */
62
63#define VGPR_CAST(to_t, from) \
64({ \
65 to_t __res; \
66 __asm__ ("" : "=v"(__res) : "0"(from)); \
67 __res; \
68})
69
70#define PACK_SI_PAIR(low, high) \
71({ \
72 v64udi __res; \
73 asm ("v_mov_b32\t%L0, %1\n\t" \
74 "v_mov_b32\t%H0, %2" \
75 : "=&v"(__res) : "v0"(low), "v"(high), "e"(-1L)); \
76 __res; \
77 })
78
79#define UNPACK_SI_LOW(to_t, pair) VGPR_CAST(to_t, pair)
80#define UNPACK_SI_HIGH(to_t, pair) \
81({ \
82 to_t __res; \
83 asm ("v_mov_b32\t%0, %H1" : "=v"(__res) : "v"(pair), "e"(-1L)); \
84 __res; \
85 })
86
87#define PACK_DI_PAIR(low, high) \
88({ \
89 v64uti __res; \
90 asm ("v_mov_b32\t%L0, %L1\n\t" \
91 "v_mov_b32\t%H0, %H1\n\t" \
92 "v_mov_b32\t%J0, %L2\n\t" \
93 "v_mov_b32\t%K0, %H2" \
94 : "=&v"(__res) : "v0"(low), "v"(high), "e"(-1L)); \
95 __res; \
96 })
97
98#define UNPACK_DI_LOW(to_t, pair) VGPR_CAST(to_t, pair)
99#define UNPACK_DI_HIGH(to_t, pair) \
100({ \
101 to_t __res; \
102 asm ("v_mov_b32\t%L0, %J1\n\t" \
103 "v_mov_b32\t%H0, %K1" : "=v"(__res) : "v"(pair), "e"(-1L)); \
104 __res; \
105 })
106
107#define NO_COND __mask
108
109/* Note - __mask is _not_ accounted for in VECTOR_MERGE! */
110#define VECTOR_MERGE(vec1, vec2, cond) \
111({ \
112 _Static_assert (__builtin_types_compatible_p (typeof (vec1), typeof (vec2))); \
113 union { \
114 typeof (vec1) val; \
115 v64qi t_v64qi; \
116 v64hi t_v64hi; \
117 v64si t_v64si; \
118 v64di t_v64di; \
119 } __vec1, __vec2, __res; \
120 __vec1.val = (vec1); \
121 __vec2.val = (vec2); \
122 __builtin_choose_expr ( \
123 sizeof (vec1) == sizeof (v64si), \
124 ({ \
125 v64si __bitmask = __builtin_convertvector ((cond), v64si); \
126 __res.t_v64si = (__vec1.t_v64si & __bitmask) \
127 | (__vec2.t_v64si & ~__bitmask); \
128 }), \
129 __builtin_choose_expr ( \
130 sizeof (vec1) == sizeof (v64hi), \
131 ({ \
132 v64hi __bitmask = __builtin_convertvector ((cond), v64hi); \
133 __res.t_v64hi = (__vec1.t_v64hi & __bitmask) \
134 | (__vec2.t_v64hi & ~__bitmask); \
135 }), \
136 __builtin_choose_expr ( \
137 sizeof (vec1) == sizeof (v64qi), \
138 ({ \
139 v64qi __bitmask = __builtin_convertvector ((cond), v64qi); \
140 __res.t_v64qi = (__vec1.t_v64qi & __bitmask) \
141 | (__vec2.t_v64qi & ~__bitmask); \
142 }), \
143 ({ \
144 v64di __bitmask = __builtin_convertvector ((cond), v64di); \
145 __res.t_v64di = (__vec1.t_v64di & __bitmask) \
146 | (__vec2.t_v64di & ~__bitmask); \
147 })))); \
148 __res.val; \
149})
150
151#define VECTOR_COND_MOVE(var, val, cond) \
152do { \
153 _Static_assert (__builtin_types_compatible_p (typeof (var), typeof (val))); \
154 __auto_type __cond = __builtin_convertvector ((cond), typeof (__mask)); \
155 var = VECTOR_MERGE ((val), var, __cond & __mask); \
156} while (0)
157
158#define VECTOR_IF(cond, cond_var) \
159{ \
160 __auto_type cond_var = (cond); \
161 __auto_type __inv_cond __attribute__((unused)) = ~cond_var; \
162 if (!ALL_ZEROES_P (cond_var)) \
163 {
164
165#define VECTOR_ELSEIF(cond, cond_var) \
166 } \
167 cond_var = __inv_cond & (cond); \
168 __inv_cond &= ~(cond); \
169 if (!ALL_ZEROES_P (cond_var)) \
170 {
171
172#define VECTOR_ELSE(cond_var) \
173 } \
174 cond_var = __inv_cond; \
175 if (!ALL_ZEROES_P (cond_var)) \
176 {
177
178#define VECTOR_IF2(cond, cond_var, prev_cond_var) \
179{ \
180 __auto_type cond_var = (cond) & __builtin_convertvector (prev_cond_var, typeof (cond)); \
181 __auto_type __inv_cond __attribute__((unused)) = ~cond_var; \
182 if (!ALL_ZEROES_P (cond_var)) \
183 {
184
185#define VECTOR_ELSEIF2(cond, cond_var, prev_cond_var) \
186 } \
187 cond_var = (cond) & __inv_cond & __builtin_convertvector (prev_cond_var, typeof (cond)); \
188 __inv_cond &= ~(cond); \
189 if (!ALL_ZEROES_P (cond_var)) \
190 {
191
192#define VECTOR_ELSE2(cond_var, prev_cond_var) \
193 } \
194 cond_var = __inv_cond & __builtin_convertvector (prev_cond_var, typeof (__inv_cond)); \
195 if (!ALL_ZEROES_P (cond_var)) \
196 {
197
198
199#define VECTOR_ENDIF \
200 } \
201}
202
203#define VECTOR_INIT_AUX(x, type) \
204({ \
205 typeof (x) __e = (x); \
206 type __tmp = { \
207 __e, __e, __e, __e, __e, __e, __e, __e, \
208 __e, __e, __e, __e, __e, __e, __e, __e, \
209 __e, __e, __e, __e, __e, __e, __e, __e, \
210 __e, __e, __e, __e, __e, __e, __e, __e, \
211 __e, __e, __e, __e, __e, __e, __e, __e, \
212 __e, __e, __e, __e, __e, __e, __e, __e, \
213 __e, __e, __e, __e, __e, __e, __e, __e, \
214 __e, __e, __e, __e, __e, __e, __e, __e }; \
215 __tmp; \
216})
217
218#define VECTOR_INIT(x) \
219 (_Generic ((x), int: VECTOR_INIT_AUX ((x), v64si), \
220 unsigned: VECTOR_INIT_AUX ((x), v64usi), \
221 char: VECTOR_INIT_AUX ((x), v64qi), \
222 unsigned char: VECTOR_INIT_AUX ((x), v64uqi), \
223 short: VECTOR_INIT_AUX ((x), v64hi), \
224 unsigned short: VECTOR_INIT_AUX ((x), v64uhi), \
225 long: VECTOR_INIT_AUX ((x), v64di), \
226 unsigned long: VECTOR_INIT_AUX ((x), v64udi), \
227 float: VECTOR_INIT_AUX ((x), v64sf), \
228 double: VECTOR_INIT_AUX ((x), v64df)))
229
230
231#if defined (__GCN3__) || defined (__GCN5__) \
c7ec7bd1
AS
232 || defined (__CDNA1__) || defined (__CDNA2__) \
233 || defined (__RDNA2__)
d9d67745
AS
234#define CDNA3_PLUS 0
235#else
236#define CDNA3_PLUS 1
237#endif
238
239#define VECTOR_INIT_MASK(COUNT) \
240({ \
241 MASKMODE __mask; \
242 int count = (COUNT); \
243 if (count == 64) \
244 { \
245 if (sizeof (MASKMODE) < 512 || CDNA3_PLUS) \
246 asm ("v_mov%B0\t%0, -1" : "=v"(__mask) : "e"(-1L)); \
247 else \
248 asm ("v_mov_b32\t%L0, -1\n\t" \
249 "v_mov_b32\t%H0, -1" : "=v"(__mask) : "e"(-1L)); \
250 } \
251 else \
252 { \
253 long bitmask = (count == 64 ? -1 : (1<<count)-1); \
254 if (sizeof (MASKMODE) < 512 || CDNA3_PLUS) \
255 { \
256 asm ("v_mov%B0\t%0, 0" : "=v"(__mask) : "e"(-1L)); \
257 asm ("v_mov%B0\t%0, -1" : "+v"(__mask) : "e"(bitmask)); \
258 } \
259 else \
260 { \
261 asm ("v_mov_b32\t%L0, 0\n\t" \
262 "v_mov_b32\t%H0, 0" : "=v"(__mask) : "e"(-1L)); \
263 asm ("v_mov_b32\t%L0, -1\n\t" \
264 "v_mov_b32\t%H0, -1" : "+v"(__mask) : "e"(bitmask)); \
265 } \
266 } \
267 __mask; \
268})
269
270#define ALL_ZEROES_P(x) (COND_TO_BITMASK(x) == 0)
271
272#define COND_TO_BITMASK(x) \
273({ \
274 long __tmp = 0; \
275 __auto_type __x = __builtin_convertvector((x), typeof (__mask)) & __mask; \
276 __builtin_choose_expr (sizeof (__mask) != 512, \
277 ({ asm ("v_cmp_ne_u32_e64 %0, %1, 0" \
278 : "=Sg" (__tmp) \
279 : "v" (__x)); }), \
280 ({ asm ("v_cmp_ne_u64_e64 %0, %1, 0" \
281 : "=Sg" (__tmp) \
282 : "v" (__x)); })); \
283 __tmp; \
284})
285
286#define VECTOR_WHILE(cond, cond_var, prev_cond_var) \
287{ \
288 __auto_type cond_var = prev_cond_var; \
289 for (;;) { \
290 cond_var &= (cond); \
291 if (ALL_ZEROES_P (cond_var)) \
292 break;
293
294#define VECTOR_ENDWHILE \
295 } \
296}
297
298#define DEF_VARIANT(FUN, SUFFIX, OTYPE, TYPE, COUNT) \
299v##COUNT##OTYPE \
300FUN##v##COUNT##SUFFIX (v##COUNT##TYPE __arg1, v##COUNT##TYPE __arg2) \
301{ \
302 __auto_type __upsized_arg1 = VGPR_CAST (v64##TYPE, __arg1); \
303 __auto_type __upsized_arg2 = VGPR_CAST (v64##TYPE, __arg2); \
304 __auto_type __mask = VECTOR_INIT_MASK (COUNT); \
305 __auto_type __result = FUN##v64##SUFFIX##_aux (__upsized_arg1, __upsized_arg2, __mask); \
306 return VGPR_CAST (v##COUNT##OTYPE, __result); \
307}
308
309#define DEF_VARIANTS(FUN, SUFFIX, TYPE) \
310 DEF_VARIANT (FUN, SUFFIX, TYPE, TYPE, 2) \
311 DEF_VARIANT (FUN, SUFFIX, TYPE, TYPE, 4) \
312 DEF_VARIANT (FUN, SUFFIX, TYPE, TYPE, 8) \
313 DEF_VARIANT (FUN, SUFFIX, TYPE, TYPE, 16) \
314 DEF_VARIANT (FUN, SUFFIX, TYPE, TYPE, 32) \
315 DEF_VARIANT (FUN, SUFFIX, TYPE, TYPE, 64)
316
317#define DEF_VARIANTS_B(FUN, SUFFIX, OTYPE, TYPE) \
318 DEF_VARIANT (FUN, SUFFIX, OTYPE, TYPE, 2) \
319 DEF_VARIANT (FUN, SUFFIX, OTYPE, TYPE, 4) \
320 DEF_VARIANT (FUN, SUFFIX, OTYPE, TYPE, 8) \
321 DEF_VARIANT (FUN, SUFFIX, OTYPE, TYPE, 16) \
322 DEF_VARIANT (FUN, SUFFIX, OTYPE, TYPE, 32) \
323 DEF_VARIANT (FUN, SUFFIX, OTYPE, TYPE, 64)