]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64-builtins.cc
aarch64: Implement the ACLE instruction/data prefetch functions.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-builtins.cc
1 /* Builtins' description for AArch64 SIMD architecture.
2 Copyright (C) 2011-2023 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #define IN_TARGET_CODE 1
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "function.h"
28 #include "basic-block.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "ssa.h"
33 #include "memmodel.h"
34 #include "tm_p.h"
35 #include "expmed.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "explow.h"
42 #include "expr.h"
43 #include "langhooks.h"
44 #include "gimple-iterator.h"
45 #include "case-cfn-macros.h"
46 #include "emit-rtl.h"
47 #include "stringpool.h"
48 #include "attribs.h"
49 #include "gimple-fold.h"
50 #include "builtins.h"
51
52 #define v8qi_UP E_V8QImode
53 #define v8di_UP E_V8DImode
54 #define v4hi_UP E_V4HImode
55 #define v4hf_UP E_V4HFmode
56 #define v2si_UP E_V2SImode
57 #define v2sf_UP E_V2SFmode
58 #define v1df_UP E_V1DFmode
59 #define v1di_UP E_V1DImode
60 #define di_UP E_DImode
61 #define df_UP E_DFmode
62 #define v16qi_UP E_V16QImode
63 #define v8hi_UP E_V8HImode
64 #define v8hf_UP E_V8HFmode
65 #define v4si_UP E_V4SImode
66 #define v4sf_UP E_V4SFmode
67 #define v2di_UP E_V2DImode
68 #define v2df_UP E_V2DFmode
69 #define ti_UP E_TImode
70 #define oi_UP E_OImode
71 #define ci_UP E_CImode
72 #define xi_UP E_XImode
73 #define si_UP E_SImode
74 #define sf_UP E_SFmode
75 #define hi_UP E_HImode
76 #define hf_UP E_HFmode
77 #define qi_UP E_QImode
78 #define bf_UP E_BFmode
79 #define v4bf_UP E_V4BFmode
80 #define v8bf_UP E_V8BFmode
81 #define v2x8qi_UP E_V2x8QImode
82 #define v2x4hi_UP E_V2x4HImode
83 #define v2x4hf_UP E_V2x4HFmode
84 #define v2x4bf_UP E_V2x4BFmode
85 #define v2x2si_UP E_V2x2SImode
86 #define v2x2sf_UP E_V2x2SFmode
87 #define v2x1di_UP E_V2x1DImode
88 #define v2x1df_UP E_V2x1DFmode
89 #define v2x16qi_UP E_V2x16QImode
90 #define v2x8hi_UP E_V2x8HImode
91 #define v2x8hf_UP E_V2x8HFmode
92 #define v2x8bf_UP E_V2x8BFmode
93 #define v2x4si_UP E_V2x4SImode
94 #define v2x4sf_UP E_V2x4SFmode
95 #define v2x2di_UP E_V2x2DImode
96 #define v2x2df_UP E_V2x2DFmode
97 #define v3x8qi_UP E_V3x8QImode
98 #define v3x4hi_UP E_V3x4HImode
99 #define v3x4hf_UP E_V3x4HFmode
100 #define v3x4bf_UP E_V3x4BFmode
101 #define v3x2si_UP E_V3x2SImode
102 #define v3x2sf_UP E_V3x2SFmode
103 #define v3x1di_UP E_V3x1DImode
104 #define v3x1df_UP E_V3x1DFmode
105 #define v3x16qi_UP E_V3x16QImode
106 #define v3x8hi_UP E_V3x8HImode
107 #define v3x8hf_UP E_V3x8HFmode
108 #define v3x8bf_UP E_V3x8BFmode
109 #define v3x4si_UP E_V3x4SImode
110 #define v3x4sf_UP E_V3x4SFmode
111 #define v3x2di_UP E_V3x2DImode
112 #define v3x2df_UP E_V3x2DFmode
113 #define v4x8qi_UP E_V4x8QImode
114 #define v4x4hi_UP E_V4x4HImode
115 #define v4x4hf_UP E_V4x4HFmode
116 #define v4x4bf_UP E_V4x4BFmode
117 #define v4x2si_UP E_V4x2SImode
118 #define v4x2sf_UP E_V4x2SFmode
119 #define v4x1di_UP E_V4x1DImode
120 #define v4x1df_UP E_V4x1DFmode
121 #define v4x16qi_UP E_V4x16QImode
122 #define v4x8hi_UP E_V4x8HImode
123 #define v4x8hf_UP E_V4x8HFmode
124 #define v4x8bf_UP E_V4x8BFmode
125 #define v4x4si_UP E_V4x4SImode
126 #define v4x4sf_UP E_V4x4SFmode
127 #define v4x2di_UP E_V4x2DImode
128 #define v4x2df_UP E_V4x2DFmode
129 #define UP(X) X##_UP
130
131 #define MODE_d_bf16 E_V4BFmode
132 #define MODE_d_f16 E_V4HFmode
133 #define MODE_d_f32 E_V2SFmode
134 #define MODE_d_f64 E_V1DFmode
135 #define MODE_d_s8 E_V8QImode
136 #define MODE_d_s16 E_V4HImode
137 #define MODE_d_s32 E_V2SImode
138 #define MODE_d_s64 E_V1DImode
139 #define MODE_d_u8 E_V8QImode
140 #define MODE_d_u16 E_V4HImode
141 #define MODE_d_u32 E_V2SImode
142 #define MODE_d_u64 E_V1DImode
143 #define MODE_d_p8 E_V8QImode
144 #define MODE_d_p16 E_V4HImode
145 #define MODE_d_p64 E_V1DImode
146 #define MODE_q_bf16 E_V8BFmode
147 #define MODE_q_f16 E_V8HFmode
148 #define MODE_q_f32 E_V4SFmode
149 #define MODE_q_f64 E_V2DFmode
150 #define MODE_q_s8 E_V16QImode
151 #define MODE_q_s16 E_V8HImode
152 #define MODE_q_s32 E_V4SImode
153 #define MODE_q_s64 E_V2DImode
154 #define MODE_q_u8 E_V16QImode
155 #define MODE_q_u16 E_V8HImode
156 #define MODE_q_u32 E_V4SImode
157 #define MODE_q_u64 E_V2DImode
158 #define MODE_q_p8 E_V16QImode
159 #define MODE_q_p16 E_V8HImode
160 #define MODE_q_p64 E_V2DImode
161 #define MODE_q_p128 E_TImode
162
163 #define QUAL_bf16 qualifier_none
164 #define QUAL_f16 qualifier_none
165 #define QUAL_f32 qualifier_none
166 #define QUAL_f64 qualifier_none
167 #define QUAL_s8 qualifier_none
168 #define QUAL_s16 qualifier_none
169 #define QUAL_s32 qualifier_none
170 #define QUAL_s64 qualifier_none
171 #define QUAL_u8 qualifier_unsigned
172 #define QUAL_u16 qualifier_unsigned
173 #define QUAL_u32 qualifier_unsigned
174 #define QUAL_u64 qualifier_unsigned
175 #define QUAL_p8 qualifier_poly
176 #define QUAL_p16 qualifier_poly
177 #define QUAL_p64 qualifier_poly
178 #define QUAL_p128 qualifier_poly
179
180 #define LENGTH_d ""
181 #define LENGTH_q "q"
182
183 #define SIMD_INTR_MODE(suffix, length) MODE_##length##_##suffix
184 #define SIMD_INTR_QUAL(suffix) QUAL_##suffix
185 #define SIMD_INTR_LENGTH_CHAR(length) LENGTH_##length
186
187
188 #define SIMD_MAX_BUILTIN_ARGS 5
189
190 enum aarch64_type_qualifiers
191 {
192 /* T foo. */
193 qualifier_none = 0x0,
194 /* unsigned T foo. */
195 qualifier_unsigned = 0x1, /* 1 << 0 */
196 /* const T foo. */
197 qualifier_const = 0x2, /* 1 << 1 */
198 /* T *foo. */
199 qualifier_pointer = 0x4, /* 1 << 2 */
200 /* Used when expanding arguments if an operand could
201 be an immediate. */
202 qualifier_immediate = 0x8, /* 1 << 3 */
203 qualifier_maybe_immediate = 0x10, /* 1 << 4 */
204 /* void foo (...). */
205 qualifier_void = 0x20, /* 1 << 5 */
206 /* 1 << 6 is now unused */
207 /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum
208 rather than using the type of the operand. */
209 qualifier_map_mode = 0x80, /* 1 << 7 */
210 /* qualifier_pointer | qualifier_map_mode */
211 qualifier_pointer_map_mode = 0x84,
212 /* qualifier_const | qualifier_pointer | qualifier_map_mode */
213 qualifier_const_pointer_map_mode = 0x86,
214 /* Polynomial types. */
215 qualifier_poly = 0x100,
216 /* Lane indices - must be in range, and flipped for bigendian. */
217 qualifier_lane_index = 0x200,
218 /* Lane indices for single lane structure loads and stores. */
219 qualifier_struct_load_store_lane_index = 0x400,
220 /* Lane indices selected in pairs. - must be in range, and flipped for
221 bigendian. */
222 qualifier_lane_pair_index = 0x800,
223 /* Lane indices selected in quadtuplets. - must be in range, and flipped for
224 bigendian. */
225 qualifier_lane_quadtup_index = 0x1000,
226 };
227
228 /* Flags that describe what a function might do. */
229 const unsigned int FLAG_NONE = 0U;
230 const unsigned int FLAG_READ_FPCR = 1U << 0;
231 const unsigned int FLAG_RAISE_FP_EXCEPTIONS = 1U << 1;
232 const unsigned int FLAG_READ_MEMORY = 1U << 2;
233 const unsigned int FLAG_PREFETCH_MEMORY = 1U << 3;
234 const unsigned int FLAG_WRITE_MEMORY = 1U << 4;
235
236 /* Not all FP intrinsics raise FP exceptions or read FPCR register,
237 use this flag to suppress it. */
238 const unsigned int FLAG_AUTO_FP = 1U << 5;
239
240 const unsigned int FLAG_FP = FLAG_READ_FPCR | FLAG_RAISE_FP_EXCEPTIONS;
241 const unsigned int FLAG_ALL = FLAG_READ_FPCR | FLAG_RAISE_FP_EXCEPTIONS
242 | FLAG_READ_MEMORY | FLAG_PREFETCH_MEMORY | FLAG_WRITE_MEMORY;
243 const unsigned int FLAG_STORE = FLAG_WRITE_MEMORY | FLAG_AUTO_FP;
244 const unsigned int FLAG_LOAD = FLAG_READ_MEMORY | FLAG_AUTO_FP;
245
246 typedef struct
247 {
248 const char *name;
249 machine_mode mode;
250 const enum insn_code code;
251 unsigned int fcode;
252 enum aarch64_type_qualifiers *qualifiers;
253 unsigned int flags;
254 } aarch64_simd_builtin_datum;
255
256 static enum aarch64_type_qualifiers
257 aarch64_types_unop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
258 = { qualifier_none, qualifier_none };
259 #define TYPES_UNOP (aarch64_types_unop_qualifiers)
260 static enum aarch64_type_qualifiers
261 aarch64_types_unopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
262 = { qualifier_unsigned, qualifier_unsigned };
263 #define TYPES_UNOPU (aarch64_types_unopu_qualifiers)
264 static enum aarch64_type_qualifiers
265 aarch64_types_unopus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
266 = { qualifier_unsigned, qualifier_none };
267 #define TYPES_UNOPUS (aarch64_types_unopus_qualifiers)
268 static enum aarch64_type_qualifiers
269 aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
270 = { qualifier_none, qualifier_none, qualifier_maybe_immediate };
271 #define TYPES_BINOP (aarch64_types_binop_qualifiers)
272 static enum aarch64_type_qualifiers
273 aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
274 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned };
275 #define TYPES_BINOPU (aarch64_types_binopu_qualifiers)
276 static enum aarch64_type_qualifiers
277 aarch64_types_binop_uus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
278 = { qualifier_unsigned, qualifier_unsigned, qualifier_none };
279 #define TYPES_BINOP_UUS (aarch64_types_binop_uus_qualifiers)
280 static enum aarch64_type_qualifiers
281 aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
282 = { qualifier_none, qualifier_none, qualifier_unsigned };
283 #define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers)
284 static enum aarch64_type_qualifiers
285 aarch64_types_binop_uss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
286 = { qualifier_unsigned, qualifier_none, qualifier_none };
287 #define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers)
288 static enum aarch64_type_qualifiers
289 aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
290 = { qualifier_poly, qualifier_poly, qualifier_poly };
291 #define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
292 static enum aarch64_type_qualifiers
293 aarch64_types_binop_ppu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
294 = { qualifier_poly, qualifier_poly, qualifier_unsigned };
295 #define TYPES_BINOP_PPU (aarch64_types_binop_ppu_qualifiers)
296
297 static enum aarch64_type_qualifiers
298 aarch64_types_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
299 = { qualifier_none, qualifier_none, qualifier_none, qualifier_none };
300 #define TYPES_TERNOP (aarch64_types_ternop_qualifiers)
301 static enum aarch64_type_qualifiers
302 aarch64_types_ternop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
303 = { qualifier_none, qualifier_none, qualifier_none, qualifier_lane_index };
304 #define TYPES_TERNOP_LANE (aarch64_types_ternop_lane_qualifiers)
305 static enum aarch64_type_qualifiers
306 aarch64_types_ternopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
307 = { qualifier_unsigned, qualifier_unsigned,
308 qualifier_unsigned, qualifier_unsigned };
309 #define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers)
310 static enum aarch64_type_qualifiers
311 aarch64_types_ternopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
312 = { qualifier_unsigned, qualifier_unsigned,
313 qualifier_unsigned, qualifier_lane_index };
314 #define TYPES_TERNOPU_LANE (aarch64_types_ternopu_lane_qualifiers)
315 static enum aarch64_type_qualifiers
316 aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
317 = { qualifier_unsigned, qualifier_unsigned,
318 qualifier_unsigned, qualifier_immediate };
319 #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers)
320 static enum aarch64_type_qualifiers
321 aarch64_types_ternop_sssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
322 = { qualifier_none, qualifier_none, qualifier_none, qualifier_unsigned };
323 #define TYPES_TERNOP_SSSU (aarch64_types_ternop_sssu_qualifiers)
324 static enum aarch64_type_qualifiers
325 aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
326 = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none };
327 #define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers)
328 static enum aarch64_type_qualifiers
329 aarch64_types_ternop_suss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
330 = { qualifier_none, qualifier_unsigned, qualifier_none, qualifier_none };
331 #define TYPES_TERNOP_SUSS (aarch64_types_ternop_suss_qualifiers)
332 static enum aarch64_type_qualifiers
333 aarch64_types_binop_pppu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
334 = { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_unsigned };
335 #define TYPES_TERNOP_PPPU (aarch64_types_binop_pppu_qualifiers)
336
337 static enum aarch64_type_qualifiers
338 aarch64_types_quadop_lane_pair_qualifiers[SIMD_MAX_BUILTIN_ARGS]
339 = { qualifier_none, qualifier_none, qualifier_none,
340 qualifier_none, qualifier_lane_pair_index };
341 #define TYPES_QUADOP_LANE_PAIR (aarch64_types_quadop_lane_pair_qualifiers)
342 static enum aarch64_type_qualifiers
343 aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
344 = { qualifier_none, qualifier_none, qualifier_none,
345 qualifier_none, qualifier_lane_index };
346 #define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers)
347 static enum aarch64_type_qualifiers
348 aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
349 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
350 qualifier_unsigned, qualifier_lane_index };
351 #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers)
352
353 static enum aarch64_type_qualifiers
354 aarch64_types_quadopssus_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
355 = { qualifier_none, qualifier_none, qualifier_unsigned,
356 qualifier_none, qualifier_lane_quadtup_index };
357 #define TYPES_QUADOPSSUS_LANE_QUADTUP \
358 (aarch64_types_quadopssus_lane_quadtup_qualifiers)
359 static enum aarch64_type_qualifiers
360 aarch64_types_quadopsssu_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
361 = { qualifier_none, qualifier_none, qualifier_none,
362 qualifier_unsigned, qualifier_lane_quadtup_index };
363 #define TYPES_QUADOPSSSU_LANE_QUADTUP \
364 (aarch64_types_quadopsssu_lane_quadtup_qualifiers)
365
366 static enum aarch64_type_qualifiers
367 aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
368 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
369 qualifier_unsigned, qualifier_immediate };
370 #define TYPES_QUADOPUI (aarch64_types_quadopu_imm_qualifiers)
371
372 static enum aarch64_type_qualifiers
373 aarch64_types_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
374 = { qualifier_none, qualifier_none, qualifier_immediate };
375 #define TYPES_GETREG (aarch64_types_binop_imm_qualifiers)
376 #define TYPES_SHIFTIMM (aarch64_types_binop_imm_qualifiers)
377 static enum aarch64_type_qualifiers
378 aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
379 = { qualifier_unsigned, qualifier_none, qualifier_immediate };
380 #define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers)
381 static enum aarch64_type_qualifiers
382 aarch64_types_fcvt_from_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
383 = { qualifier_none, qualifier_unsigned, qualifier_immediate };
384 #define TYPES_FCVTIMM_SUS (aarch64_types_fcvt_from_unsigned_qualifiers)
385 static enum aarch64_type_qualifiers
386 aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
387 = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
388 #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
389 #define TYPES_USHIFT2IMM (aarch64_types_ternopu_imm_qualifiers)
390 static enum aarch64_type_qualifiers
391 aarch64_types_shift2_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
392 = { qualifier_unsigned, qualifier_unsigned, qualifier_none, qualifier_immediate };
393 #define TYPES_SHIFT2IMM_UUSS (aarch64_types_shift2_to_unsigned_qualifiers)
394
395 static enum aarch64_type_qualifiers
396 aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
397 = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate};
398 #define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers)
399 #define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers)
400 #define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers)
401 #define TYPES_SHIFT2IMM (aarch64_types_ternop_s_imm_qualifiers)
402
403 static enum aarch64_type_qualifiers
404 aarch64_types_ternop_p_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
405 = { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_immediate};
406 #define TYPES_SHIFTINSERTP (aarch64_types_ternop_p_imm_qualifiers)
407
408 static enum aarch64_type_qualifiers
409 aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS]
410 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
411 qualifier_immediate };
412 #define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers)
413
414 static enum aarch64_type_qualifiers
415 aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
416 = { qualifier_none, qualifier_const_pointer_map_mode };
417 #define TYPES_LOAD1 (aarch64_types_load1_qualifiers)
418 #define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers)
419 static enum aarch64_type_qualifiers
420 aarch64_types_load1_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
421 = { qualifier_unsigned, qualifier_const_pointer_map_mode };
422 #define TYPES_LOAD1_U (aarch64_types_load1_u_qualifiers)
423 #define TYPES_LOADSTRUCT_U (aarch64_types_load1_u_qualifiers)
424 static enum aarch64_type_qualifiers
425 aarch64_types_load1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
426 = { qualifier_poly, qualifier_const_pointer_map_mode };
427 #define TYPES_LOAD1_P (aarch64_types_load1_p_qualifiers)
428 #define TYPES_LOADSTRUCT_P (aarch64_types_load1_p_qualifiers)
429
430 static enum aarch64_type_qualifiers
431 aarch64_types_loadstruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
432 = { qualifier_none, qualifier_const_pointer_map_mode,
433 qualifier_none, qualifier_struct_load_store_lane_index };
434 #define TYPES_LOADSTRUCT_LANE (aarch64_types_loadstruct_lane_qualifiers)
435 static enum aarch64_type_qualifiers
436 aarch64_types_loadstruct_lane_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
437 = { qualifier_unsigned, qualifier_const_pointer_map_mode,
438 qualifier_unsigned, qualifier_struct_load_store_lane_index };
439 #define TYPES_LOADSTRUCT_LANE_U (aarch64_types_loadstruct_lane_u_qualifiers)
440 static enum aarch64_type_qualifiers
441 aarch64_types_loadstruct_lane_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
442 = { qualifier_poly, qualifier_const_pointer_map_mode,
443 qualifier_poly, qualifier_struct_load_store_lane_index };
444 #define TYPES_LOADSTRUCT_LANE_P (aarch64_types_loadstruct_lane_p_qualifiers)
445
446 static enum aarch64_type_qualifiers
447 aarch64_types_bsl_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
448 = { qualifier_poly, qualifier_unsigned,
449 qualifier_poly, qualifier_poly };
450 #define TYPES_BSL_P (aarch64_types_bsl_p_qualifiers)
451 static enum aarch64_type_qualifiers
452 aarch64_types_bsl_s_qualifiers[SIMD_MAX_BUILTIN_ARGS]
453 = { qualifier_none, qualifier_unsigned,
454 qualifier_none, qualifier_none };
455 #define TYPES_BSL_S (aarch64_types_bsl_s_qualifiers)
456 static enum aarch64_type_qualifiers
457 aarch64_types_bsl_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
458 = { qualifier_unsigned, qualifier_unsigned,
459 qualifier_unsigned, qualifier_unsigned };
460 #define TYPES_BSL_U (aarch64_types_bsl_u_qualifiers)
461
462 /* The first argument (return type) of a store should be void type,
463 which we represent with qualifier_void. Their first operand will be
464 a DImode pointer to the location to store to, so we must use
465 qualifier_map_mode | qualifier_pointer to build a pointer to the
466 element type of the vector. */
467 static enum aarch64_type_qualifiers
468 aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
469 = { qualifier_void, qualifier_pointer_map_mode, qualifier_none };
470 #define TYPES_STORE1 (aarch64_types_store1_qualifiers)
471 #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers)
472 static enum aarch64_type_qualifiers
473 aarch64_types_store1_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
474 = { qualifier_void, qualifier_pointer_map_mode, qualifier_unsigned };
475 #define TYPES_STORE1_U (aarch64_types_store1_u_qualifiers)
476 #define TYPES_STORESTRUCT_U (aarch64_types_store1_u_qualifiers)
477 static enum aarch64_type_qualifiers
478 aarch64_types_store1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
479 = { qualifier_void, qualifier_pointer_map_mode, qualifier_poly };
480 #define TYPES_STORE1_P (aarch64_types_store1_p_qualifiers)
481 #define TYPES_STORESTRUCT_P (aarch64_types_store1_p_qualifiers)
482
483 static enum aarch64_type_qualifiers
484 aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
485 = { qualifier_void, qualifier_pointer_map_mode,
486 qualifier_none, qualifier_struct_load_store_lane_index };
487 #define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers)
488 static enum aarch64_type_qualifiers
489 aarch64_types_storestruct_lane_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
490 = { qualifier_void, qualifier_pointer_map_mode,
491 qualifier_unsigned, qualifier_struct_load_store_lane_index };
492 #define TYPES_STORESTRUCT_LANE_U (aarch64_types_storestruct_lane_u_qualifiers)
493 static enum aarch64_type_qualifiers
494 aarch64_types_storestruct_lane_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
495 = { qualifier_void, qualifier_pointer_map_mode,
496 qualifier_poly, qualifier_struct_load_store_lane_index };
497 #define TYPES_STORESTRUCT_LANE_P (aarch64_types_storestruct_lane_p_qualifiers)
498
499 #define CF0(N, X) CODE_FOR_aarch64_##N##X
500 #define CF1(N, X) CODE_FOR_##N##X##1
501 #define CF2(N, X) CODE_FOR_##N##X##2
502 #define CF3(N, X) CODE_FOR_##N##X##3
503 #define CF4(N, X) CODE_FOR_##N##X##4
504 #define CF10(N, X) CODE_FOR_##N##X
505
506 /* Define cascading VAR<N> macros that are used from
507 aarch64-builtin-iterators.h to iterate over modes. These definitions
508 will end up generating a number of VAR1 expansions and code later on in the
509 file should redefine VAR1 to whatever it needs to process on a per-mode
510 basis. */
511 #define VAR2(T, N, MAP, FLAG, A, B) \
512 VAR1 (T, N, MAP, FLAG, A) \
513 VAR1 (T, N, MAP, FLAG, B)
514 #define VAR3(T, N, MAP, FLAG, A, B, C) \
515 VAR2 (T, N, MAP, FLAG, A, B) \
516 VAR1 (T, N, MAP, FLAG, C)
517 #define VAR4(T, N, MAP, FLAG, A, B, C, D) \
518 VAR3 (T, N, MAP, FLAG, A, B, C) \
519 VAR1 (T, N, MAP, FLAG, D)
520 #define VAR5(T, N, MAP, FLAG, A, B, C, D, E) \
521 VAR4 (T, N, MAP, FLAG, A, B, C, D) \
522 VAR1 (T, N, MAP, FLAG, E)
523 #define VAR6(T, N, MAP, FLAG, A, B, C, D, E, F) \
524 VAR5 (T, N, MAP, FLAG, A, B, C, D, E) \
525 VAR1 (T, N, MAP, FLAG, F)
526 #define VAR7(T, N, MAP, FLAG, A, B, C, D, E, F, G) \
527 VAR6 (T, N, MAP, FLAG, A, B, C, D, E, F) \
528 VAR1 (T, N, MAP, FLAG, G)
529 #define VAR8(T, N, MAP, FLAG, A, B, C, D, E, F, G, H) \
530 VAR7 (T, N, MAP, FLAG, A, B, C, D, E, F, G) \
531 VAR1 (T, N, MAP, FLAG, H)
532 #define VAR9(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I) \
533 VAR8 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H) \
534 VAR1 (T, N, MAP, FLAG, I)
535 #define VAR10(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J) \
536 VAR9 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I) \
537 VAR1 (T, N, MAP, FLAG, J)
538 #define VAR11(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K) \
539 VAR10 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J) \
540 VAR1 (T, N, MAP, FLAG, K)
541 #define VAR12(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L) \
542 VAR11 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K) \
543 VAR1 (T, N, MAP, FLAG, L)
544 #define VAR13(T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M) \
545 VAR12 (T, N, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L) \
546 VAR1 (T, N, MAP, FLAG, M)
547 #define VAR14(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
548 VAR13 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M) \
549 VAR1 (T, X, MAP, FLAG, N)
550 #define VAR15(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \
551 VAR14 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
552 VAR1 (T, X, MAP, FLAG, O)
553 #define VAR16(T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \
554 VAR15 (T, X, MAP, FLAG, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O) \
555 VAR1 (T, X, MAP, FLAG, P)
556
557 #include "aarch64-builtin-iterators.h"
558
559 /* The builtins below should be expanded through the standard optabs
560 CODE_FOR_[u]avg<mode>3_[floor,ceil]. However the mapping scheme in
561 aarch64-simd-builtins.def does not easily allow us to have a pre-mode
562 ("uavg") and post-mode string ("_ceil") in the CODE_FOR_* construction.
563 So the builtins use a name that is natural for AArch64 instructions
564 e.g. "aarch64_srhadd<mode>" and we re-map these to the optab-related
565 CODE_FOR_ here. */
566 #undef VAR1
567 #define VAR1(F,T1,T2,I,M) \
568 constexpr insn_code CODE_FOR_aarch64_##F##M = CODE_FOR_##T1##M##3##T2;
569
570 BUILTIN_VDQ_BHSI (srhadd, avg, _ceil, 0)
571 BUILTIN_VDQ_BHSI (urhadd, uavg, _ceil, 0)
572 BUILTIN_VDQ_BHSI (shadd, avg, _floor, 0)
573 BUILTIN_VDQ_BHSI (uhadd, uavg, _floor, 0)
574
575 #undef VAR1
576 #define VAR1(T, N, MAP, FLAG, A) \
577 {#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T, FLAG_##FLAG},
578
579 static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
580 #include "aarch64-simd-builtins.def"
581 };
582
583 /* There's only 8 CRC32 builtins. Probably not worth their own .def file. */
584 #define AARCH64_CRC32_BUILTINS \
585 CRC32_BUILTIN (crc32b, QI) \
586 CRC32_BUILTIN (crc32h, HI) \
587 CRC32_BUILTIN (crc32w, SI) \
588 CRC32_BUILTIN (crc32x, DI) \
589 CRC32_BUILTIN (crc32cb, QI) \
590 CRC32_BUILTIN (crc32ch, HI) \
591 CRC32_BUILTIN (crc32cw, SI) \
592 CRC32_BUILTIN (crc32cx, DI)
593
594 /* The next 8 FCMLA instrinsics require some special handling compared the
595 normal simd intrinsics. */
596 #define AARCH64_SIMD_FCMLA_LANEQ_BUILTINS \
597 FCMLA_LANEQ_BUILTIN (0, v2sf, fcmla, V2SF, false) \
598 FCMLA_LANEQ_BUILTIN (90, v2sf, fcmla, V2SF, false) \
599 FCMLA_LANEQ_BUILTIN (180, v2sf, fcmla, V2SF, false) \
600 FCMLA_LANEQ_BUILTIN (270, v2sf, fcmla, V2SF, false) \
601 FCMLA_LANEQ_BUILTIN (0, v4hf, fcmla_laneq, V4HF, true) \
602 FCMLA_LANEQ_BUILTIN (90, v4hf, fcmla_laneq, V4HF, true) \
603 FCMLA_LANEQ_BUILTIN (180, v4hf, fcmla_laneq, V4HF, true) \
604 FCMLA_LANEQ_BUILTIN (270, v4hf, fcmla_laneq, V4HF, true) \
605
606
607 /* vreinterpret intrinsics are defined for any pair of element types.
608 { _bf16 } { _bf16 }
609 { _f16 _f32 _f64 } { _f16 _f32 _f64 }
610 { _s8 _s16 _s32 _s64 } x { _s8 _s16 _s32 _s64 }
611 { _u8 _u16 _u32 _u64 } { _u8 _u16 _u32 _u64 }
612 { _p8 _p16 _p64 } { _p8 _p16 _p64 }. */
613 #define VREINTERPRET_BUILTIN2(A, B) \
614 VREINTERPRET_BUILTIN (A, B, d)
615
616 #define VREINTERPRET_BUILTINS1(A) \
617 VREINTERPRET_BUILTIN2 (A, bf16) \
618 VREINTERPRET_BUILTIN2 (A, f16) \
619 VREINTERPRET_BUILTIN2 (A, f32) \
620 VREINTERPRET_BUILTIN2 (A, f64) \
621 VREINTERPRET_BUILTIN2 (A, s8) \
622 VREINTERPRET_BUILTIN2 (A, s16) \
623 VREINTERPRET_BUILTIN2 (A, s32) \
624 VREINTERPRET_BUILTIN2 (A, s64) \
625 VREINTERPRET_BUILTIN2 (A, u8) \
626 VREINTERPRET_BUILTIN2 (A, u16) \
627 VREINTERPRET_BUILTIN2 (A, u32) \
628 VREINTERPRET_BUILTIN2 (A, u64) \
629 VREINTERPRET_BUILTIN2 (A, p8) \
630 VREINTERPRET_BUILTIN2 (A, p16) \
631 VREINTERPRET_BUILTIN2 (A, p64)
632
633 #define VREINTERPRET_BUILTINS \
634 VREINTERPRET_BUILTINS1 (bf16) \
635 VREINTERPRET_BUILTINS1 (f16) \
636 VREINTERPRET_BUILTINS1 (f32) \
637 VREINTERPRET_BUILTINS1 (f64) \
638 VREINTERPRET_BUILTINS1 (s8) \
639 VREINTERPRET_BUILTINS1 (s16) \
640 VREINTERPRET_BUILTINS1 (s32) \
641 VREINTERPRET_BUILTINS1 (s64) \
642 VREINTERPRET_BUILTINS1 (u8) \
643 VREINTERPRET_BUILTINS1 (u16) \
644 VREINTERPRET_BUILTINS1 (u32) \
645 VREINTERPRET_BUILTINS1 (u64) \
646 VREINTERPRET_BUILTINS1 (p8) \
647 VREINTERPRET_BUILTINS1 (p16) \
648 VREINTERPRET_BUILTINS1 (p64)
649
650 /* vreinterpretq intrinsics are additionally defined for p128.
651 { _bf16 } { _bf16 }
652 { _f16 _f32 _f64 } { _f16 _f32 _f64 }
653 { _s8 _s16 _s32 _s64 } x { _s8 _s16 _s32 _s64 }
654 { _u8 _u16 _u32 _u64 } { _u8 _u16 _u32 _u64 }
655 { _p8 _p16 _p64 _p128 } { _p8 _p16 _p64 _p128 }. */
656 #define VREINTERPRETQ_BUILTIN2(A, B) \
657 VREINTERPRET_BUILTIN (A, B, q)
658
659 #define VREINTERPRETQ_BUILTINS1(A) \
660 VREINTERPRETQ_BUILTIN2 (A, bf16) \
661 VREINTERPRETQ_BUILTIN2 (A, f16) \
662 VREINTERPRETQ_BUILTIN2 (A, f32) \
663 VREINTERPRETQ_BUILTIN2 (A, f64) \
664 VREINTERPRETQ_BUILTIN2 (A, s8) \
665 VREINTERPRETQ_BUILTIN2 (A, s16) \
666 VREINTERPRETQ_BUILTIN2 (A, s32) \
667 VREINTERPRETQ_BUILTIN2 (A, s64) \
668 VREINTERPRETQ_BUILTIN2 (A, u8) \
669 VREINTERPRETQ_BUILTIN2 (A, u16) \
670 VREINTERPRETQ_BUILTIN2 (A, u32) \
671 VREINTERPRETQ_BUILTIN2 (A, u64) \
672 VREINTERPRETQ_BUILTIN2 (A, p8) \
673 VREINTERPRETQ_BUILTIN2 (A, p16) \
674 VREINTERPRETQ_BUILTIN2 (A, p64) \
675 VREINTERPRETQ_BUILTIN2 (A, p128)
676
677 #define VREINTERPRETQ_BUILTINS \
678 VREINTERPRETQ_BUILTINS1 (bf16) \
679 VREINTERPRETQ_BUILTINS1 (f16) \
680 VREINTERPRETQ_BUILTINS1 (f32) \
681 VREINTERPRETQ_BUILTINS1 (f64) \
682 VREINTERPRETQ_BUILTINS1 (s8) \
683 VREINTERPRETQ_BUILTINS1 (s16) \
684 VREINTERPRETQ_BUILTINS1 (s32) \
685 VREINTERPRETQ_BUILTINS1 (s64) \
686 VREINTERPRETQ_BUILTINS1 (u8) \
687 VREINTERPRETQ_BUILTINS1 (u16) \
688 VREINTERPRETQ_BUILTINS1 (u32) \
689 VREINTERPRETQ_BUILTINS1 (u64) \
690 VREINTERPRETQ_BUILTINS1 (p8) \
691 VREINTERPRETQ_BUILTINS1 (p16) \
692 VREINTERPRETQ_BUILTINS1 (p64) \
693 VREINTERPRETQ_BUILTINS1 (p128)
694
695 #define AARCH64_SIMD_VREINTERPRET_BUILTINS \
696 VREINTERPRET_BUILTINS \
697 VREINTERPRETQ_BUILTINS
698
699 typedef struct
700 {
701 const char *name;
702 machine_mode mode;
703 const enum insn_code icode;
704 unsigned int fcode;
705 } aarch64_crc_builtin_datum;
706
707 /* Hold information about how to expand the FCMLA_LANEQ builtins. */
708 typedef struct
709 {
710 const char *name;
711 machine_mode mode;
712 const enum insn_code icode;
713 unsigned int fcode;
714 bool lane;
715 } aarch64_fcmla_laneq_builtin_datum;
716
717 /* Hold information about how to declare SIMD intrinsics. */
718 typedef struct
719 {
720 const char *name;
721 unsigned int fcode;
722 unsigned int op_count;
723 machine_mode op_modes[SIMD_MAX_BUILTIN_ARGS];
724 enum aarch64_type_qualifiers qualifiers[SIMD_MAX_BUILTIN_ARGS];
725 unsigned int flags;
726 bool skip;
727 } aarch64_simd_intrinsic_datum;
728
729 #define CRC32_BUILTIN(N, M) \
730 AARCH64_BUILTIN_##N,
731
732 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
733 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M,
734
735 #define VREINTERPRET_BUILTIN(A, B, L) \
736 AARCH64_SIMD_BUILTIN_VREINTERPRET##L##_##A##_##B,
737
738 #undef VAR1
739 #define VAR1(T, N, MAP, FLAG, A) \
740 AARCH64_SIMD_BUILTIN_##T##_##N##A,
741
742 enum aarch64_builtins
743 {
744 AARCH64_BUILTIN_MIN,
745
746 AARCH64_BUILTIN_GET_FPCR,
747 AARCH64_BUILTIN_SET_FPCR,
748 AARCH64_BUILTIN_GET_FPSR,
749 AARCH64_BUILTIN_SET_FPSR,
750
751 AARCH64_BUILTIN_GET_FPCR64,
752 AARCH64_BUILTIN_SET_FPCR64,
753 AARCH64_BUILTIN_GET_FPSR64,
754 AARCH64_BUILTIN_SET_FPSR64,
755
756 AARCH64_BUILTIN_RSQRT_DF,
757 AARCH64_BUILTIN_RSQRT_SF,
758 AARCH64_BUILTIN_RSQRT_V2DF,
759 AARCH64_BUILTIN_RSQRT_V2SF,
760 AARCH64_BUILTIN_RSQRT_V4SF,
761 AARCH64_SIMD_BUILTIN_BASE,
762 AARCH64_SIMD_BUILTIN_LANE_CHECK,
763 #include "aarch64-simd-builtins.def"
764 /* The first enum element which is based on an insn_data pattern. */
765 AARCH64_SIMD_PATTERN_START = AARCH64_SIMD_BUILTIN_LANE_CHECK + 1,
766 AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_PATTERN_START
767 + ARRAY_SIZE (aarch64_simd_builtin_data) - 1,
768 AARCH64_CRC32_BUILTIN_BASE,
769 AARCH64_CRC32_BUILTINS
770 AARCH64_CRC32_BUILTIN_MAX,
771 /* SIMD intrinsic builtins. */
772 AARCH64_SIMD_VREINTERPRET_BUILTINS
773 /* ARMv8.3-A Pointer Authentication Builtins. */
774 AARCH64_PAUTH_BUILTIN_AUTIA1716,
775 AARCH64_PAUTH_BUILTIN_PACIA1716,
776 AARCH64_PAUTH_BUILTIN_AUTIB1716,
777 AARCH64_PAUTH_BUILTIN_PACIB1716,
778 AARCH64_PAUTH_BUILTIN_XPACLRI,
779 /* Special cased Armv8.3-A Complex FMA by Lane quad Builtins. */
780 AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE,
781 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
782 /* Builtin for Arm8.3-a Javascript conversion instruction. */
783 AARCH64_JSCVT,
784 /* TME builtins. */
785 AARCH64_TME_BUILTIN_TSTART,
786 AARCH64_TME_BUILTIN_TCOMMIT,
787 AARCH64_TME_BUILTIN_TTEST,
788 AARCH64_TME_BUILTIN_TCANCEL,
789 /* Armv8.5-a RNG instruction builtins. */
790 AARCH64_BUILTIN_RNG_RNDR,
791 AARCH64_BUILTIN_RNG_RNDRRS,
792 /* MEMTAG builtins. */
793 AARCH64_MEMTAG_BUILTIN_START,
794 AARCH64_MEMTAG_BUILTIN_IRG,
795 AARCH64_MEMTAG_BUILTIN_GMI,
796 AARCH64_MEMTAG_BUILTIN_SUBP,
797 AARCH64_MEMTAG_BUILTIN_INC_TAG,
798 AARCH64_MEMTAG_BUILTIN_SET_TAG,
799 AARCH64_MEMTAG_BUILTIN_GET_TAG,
800 AARCH64_MEMTAG_BUILTIN_END,
801 /* LS64 builtins. */
802 AARCH64_LS64_BUILTIN_LD64B,
803 AARCH64_LS64_BUILTIN_ST64B,
804 AARCH64_LS64_BUILTIN_ST64BV,
805 AARCH64_LS64_BUILTIN_ST64BV0,
806 AARCH64_REV16,
807 AARCH64_REV16L,
808 AARCH64_REV16LL,
809 AARCH64_RBIT,
810 AARCH64_RBITL,
811 AARCH64_RBITLL,
812 /* System register builtins. */
813 AARCH64_RSR,
814 AARCH64_RSRP,
815 AARCH64_RSR64,
816 AARCH64_RSRF,
817 AARCH64_RSRF64,
818 AARCH64_RSR128,
819 AARCH64_WSR,
820 AARCH64_WSRP,
821 AARCH64_WSR64,
822 AARCH64_WSRF,
823 AARCH64_WSRF64,
824 AARCH64_WSR128,
825 AARCH64_PLD,
826 AARCH64_PLDX,
827 AARCH64_PLI,
828 AARCH64_PLIX,
829 AARCH64_BUILTIN_MAX
830 };
831
832 #undef CRC32_BUILTIN
833 #define CRC32_BUILTIN(N, M) \
834 {"__builtin_aarch64_"#N, E_##M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N},
835
836 static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = {
837 AARCH64_CRC32_BUILTINS
838 };
839
840
841 #undef FCMLA_LANEQ_BUILTIN
842 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
843 {"__builtin_aarch64_fcmla_laneq"#I#N, E_##M##mode, CODE_FOR_aarch64_##X##I##N, \
844 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M, T},
845
846 /* This structure contains how to manage the mapping form the builtin to the
847 instruction to generate in the backend and how to invoke the instruction. */
848 static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = {
849 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
850 };
851
852 #undef VREINTERPRET_BUILTIN
853 #define VREINTERPRET_BUILTIN(A, B, L) \
854 {"vreinterpret" SIMD_INTR_LENGTH_CHAR(L) "_" #A "_" #B, \
855 AARCH64_SIMD_BUILTIN_VREINTERPRET##L##_##A##_##B, \
856 2, \
857 { SIMD_INTR_MODE(A, L), SIMD_INTR_MODE(B, L) }, \
858 { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(B) }, \
859 FLAG_AUTO_FP, \
860 SIMD_INTR_MODE(A, L) == SIMD_INTR_MODE(B, L) \
861 && SIMD_INTR_QUAL(A) == SIMD_INTR_QUAL(B) \
862 },
863
864 static const aarch64_simd_intrinsic_datum aarch64_simd_intrinsic_data[] = {
865 AARCH64_SIMD_VREINTERPRET_BUILTINS
866 };
867
868
869 #undef CRC32_BUILTIN
870
871 static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
872
873 #define NUM_DREG_TYPES 6
874 #define NUM_QREG_TYPES 6
875
876 /* Internal scalar builtin types. These types are used to support
877 neon intrinsic builtins. They are _not_ user-visible types. Therefore
878 the mangling for these types are implementation defined. */
879 const char *aarch64_scalar_builtin_types[] = {
880 "__builtin_aarch64_simd_qi",
881 "__builtin_aarch64_simd_hi",
882 "__builtin_aarch64_simd_si",
883 "__builtin_aarch64_simd_hf",
884 "__builtin_aarch64_simd_sf",
885 "__builtin_aarch64_simd_di",
886 "__builtin_aarch64_simd_df",
887 "__builtin_aarch64_simd_poly8",
888 "__builtin_aarch64_simd_poly16",
889 "__builtin_aarch64_simd_poly64",
890 "__builtin_aarch64_simd_poly128",
891 "__builtin_aarch64_simd_ti",
892 "__builtin_aarch64_simd_uqi",
893 "__builtin_aarch64_simd_uhi",
894 "__builtin_aarch64_simd_usi",
895 "__builtin_aarch64_simd_udi",
896 "__builtin_aarch64_simd_ei",
897 "__builtin_aarch64_simd_oi",
898 "__builtin_aarch64_simd_ci",
899 "__builtin_aarch64_simd_xi",
900 "__builtin_aarch64_simd_bf",
901 NULL
902 };
903
904 #define ENTRY(E, M, Q, G) E,
905 enum aarch64_simd_type
906 {
907 #include "aarch64-simd-builtin-types.def"
908 ARM_NEON_H_TYPES_LAST
909 };
910 #undef ENTRY
911
912 struct GTY(()) aarch64_simd_type_info
913 {
914 enum aarch64_simd_type type;
915
916 /* Internal type name. */
917 const char *name;
918
919 /* Internal type name(mangled). The mangled names conform to the
920 AAPCS64 (see "Procedure Call Standard for the ARM 64-bit Architecture",
921 Appendix A). To qualify for emission with the mangled names defined in
922 that document, a vector type must not only be of the correct mode but also
923 be of the correct internal AdvSIMD vector type (e.g. __Int8x8_t); these
924 types are registered by aarch64_init_simd_builtin_types (). In other
925 words, vector types defined in other ways e.g. via vector_size attribute
926 will get default mangled names. */
927 const char *mangle;
928
929 /* Internal type. */
930 tree itype;
931
932 /* Element type. */
933 tree eltype;
934
935 /* Machine mode the internal type maps to. */
936 enum machine_mode mode;
937
938 /* Qualifiers. */
939 enum aarch64_type_qualifiers q;
940 };
941
942 #define ENTRY(E, M, Q, G) \
943 {E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, E_##M##mode, qualifier_##Q},
944 static GTY(()) struct aarch64_simd_type_info aarch64_simd_types [] = {
945 #include "aarch64-simd-builtin-types.def"
946 };
947 #undef ENTRY
948
949 static machine_mode aarch64_simd_tuple_modes[ARM_NEON_H_TYPES_LAST][3];
950 static GTY(()) tree aarch64_simd_tuple_types[ARM_NEON_H_TYPES_LAST][3];
951
952 static GTY(()) tree aarch64_simd_intOI_type_node = NULL_TREE;
953 static GTY(()) tree aarch64_simd_intCI_type_node = NULL_TREE;
954 static GTY(()) tree aarch64_simd_intXI_type_node = NULL_TREE;
955
956 /* The user-visible __fp16 type, and a pointer to that type. Used
957 across the back-end. */
958 tree aarch64_fp16_type_node = NULL_TREE;
959 tree aarch64_fp16_ptr_type_node = NULL_TREE;
960
961 /* Back-end node type for brain float (bfloat) types. */
962 tree aarch64_bf16_ptr_type_node = NULL_TREE;
963
964 /* Wrapper around add_builtin_function. NAME is the name of the built-in
965 function, TYPE is the function type, CODE is the function subcode
966 (relative to AARCH64_BUILTIN_GENERAL), and ATTRS is the function
967 attributes. */
968 static tree
969 aarch64_general_add_builtin (const char *name, tree type, unsigned int code,
970 tree attrs = NULL_TREE)
971 {
972 code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL;
973 return add_builtin_function (name, type, code, BUILT_IN_MD,
974 NULL, attrs);
975 }
976
977 static tree
978 aarch64_general_simulate_builtin (const char *name, tree fntype,
979 unsigned int code,
980 tree attrs = NULL_TREE)
981 {
982 code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL;
983 return simulate_builtin_function_decl (input_location, name, fntype,
984 code, NULL, attrs);
985 }
986
987 static const char *
988 aarch64_mangle_builtin_scalar_type (const_tree type)
989 {
990 int i = 0;
991
992 while (aarch64_scalar_builtin_types[i] != NULL)
993 {
994 const char *name = aarch64_scalar_builtin_types[i];
995
996 if (TREE_CODE (TYPE_NAME (type)) == TYPE_DECL
997 && DECL_NAME (TYPE_NAME (type))
998 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))), name))
999 return aarch64_scalar_builtin_types[i];
1000 i++;
1001 }
1002 return NULL;
1003 }
1004
1005 static const char *
1006 aarch64_mangle_builtin_vector_type (const_tree type)
1007 {
1008 tree attrs = TYPE_ATTRIBUTES (type);
1009 if (tree attr = lookup_attribute ("Advanced SIMD type", attrs))
1010 {
1011 tree mangled_name = TREE_VALUE (TREE_VALUE (attr));
1012 return IDENTIFIER_POINTER (mangled_name);
1013 }
1014
1015 return NULL;
1016 }
1017
1018 const char *
1019 aarch64_general_mangle_builtin_type (const_tree type)
1020 {
1021 const char *mangle;
1022 /* Walk through all the AArch64 builtins types tables to filter out the
1023 incoming type. */
1024 if ((mangle = aarch64_mangle_builtin_vector_type (type))
1025 || (mangle = aarch64_mangle_builtin_scalar_type (type)))
1026 return mangle;
1027
1028 return NULL;
1029 }
1030
1031 /* Helper function for aarch64_simd_builtin_type. */
1032 static tree
1033 aarch64_int_or_fp_type (machine_mode mode,
1034 enum aarch64_type_qualifiers qualifiers)
1035 {
1036 #define QUAL_TYPE(M) ((qualifiers & qualifier_unsigned) \
1037 ? unsigned_int##M##_type_node : int##M##_type_node);
1038 switch (mode)
1039 {
1040 case E_QImode:
1041 return QUAL_TYPE (QI);
1042 case E_HImode:
1043 return QUAL_TYPE (HI);
1044 case E_SImode:
1045 return QUAL_TYPE (SI);
1046 case E_DImode:
1047 return QUAL_TYPE (DI);
1048 case E_TImode:
1049 return QUAL_TYPE (TI);
1050 case E_OImode:
1051 return aarch64_simd_intOI_type_node;
1052 case E_CImode:
1053 return aarch64_simd_intCI_type_node;
1054 case E_XImode:
1055 return aarch64_simd_intXI_type_node;
1056 case E_HFmode:
1057 return aarch64_fp16_type_node;
1058 case E_SFmode:
1059 return float_type_node;
1060 case E_DFmode:
1061 return double_type_node;
1062 case E_BFmode:
1063 return bfloat16_type_node;
1064 default:
1065 gcc_unreachable ();
1066 }
1067 #undef QUAL_TYPE
1068 }
1069
1070 /* Helper function for aarch64_simd_builtin_type. */
1071 static tree
1072 aarch64_lookup_simd_type_in_table (machine_mode mode,
1073 enum aarch64_type_qualifiers qualifiers)
1074 {
1075 int i;
1076 int nelts = ARRAY_SIZE (aarch64_simd_types);
1077 int q = qualifiers & (qualifier_poly | qualifier_unsigned);
1078
1079 for (i = 0; i < nelts; i++)
1080 {
1081 if (aarch64_simd_types[i].mode == mode
1082 && aarch64_simd_types[i].q == q)
1083 return aarch64_simd_types[i].itype;
1084 if (aarch64_simd_tuple_types[i][0] != NULL_TREE)
1085 for (int j = 0; j < 3; j++)
1086 if (aarch64_simd_tuple_modes[i][j] == mode
1087 && aarch64_simd_types[i].q == q)
1088 return aarch64_simd_tuple_types[i][j];
1089 }
1090
1091 return NULL_TREE;
1092 }
1093
1094 /* Return a type for an operand with specified mode and qualifiers. */
1095 static tree
1096 aarch64_simd_builtin_type (machine_mode mode,
1097 enum aarch64_type_qualifiers qualifiers)
1098 {
1099 tree type = NULL_TREE;
1100
1101 /* For pointers, we want a pointer to the basic type of the vector. */
1102 if ((qualifiers & qualifier_pointer) && VECTOR_MODE_P (mode))
1103 mode = GET_MODE_INNER (mode);
1104
1105 /* Non-poly scalar modes map to standard types not in the table. */
1106 if ((qualifiers & qualifier_poly) || VECTOR_MODE_P (mode))
1107 type = aarch64_lookup_simd_type_in_table (mode, qualifiers);
1108 else
1109 type = aarch64_int_or_fp_type (mode, qualifiers);
1110
1111 gcc_assert (type != NULL_TREE);
1112
1113 /* Add qualifiers. */
1114 if (qualifiers & qualifier_const)
1115 type = build_qualified_type (type, TYPE_QUAL_CONST);
1116 if (qualifiers & qualifier_pointer)
1117 type = build_pointer_type (type);
1118
1119 return type;
1120 }
1121
1122 static void
1123 aarch64_init_simd_builtin_types (void)
1124 {
1125 int i;
1126 int nelts = ARRAY_SIZE (aarch64_simd_types);
1127 tree tdecl;
1128
1129 /* Init all the element types built by the front-end. */
1130 aarch64_simd_types[Int8x8_t].eltype = intQI_type_node;
1131 aarch64_simd_types[Int8x16_t].eltype = intQI_type_node;
1132 aarch64_simd_types[Int16x4_t].eltype = intHI_type_node;
1133 aarch64_simd_types[Int16x8_t].eltype = intHI_type_node;
1134 aarch64_simd_types[Int32x2_t].eltype = intSI_type_node;
1135 aarch64_simd_types[Int32x4_t].eltype = intSI_type_node;
1136 aarch64_simd_types[Int64x1_t].eltype = intDI_type_node;
1137 aarch64_simd_types[Int64x2_t].eltype = intDI_type_node;
1138 aarch64_simd_types[Uint8x8_t].eltype = unsigned_intQI_type_node;
1139 aarch64_simd_types[Uint8x16_t].eltype = unsigned_intQI_type_node;
1140 aarch64_simd_types[Uint16x4_t].eltype = unsigned_intHI_type_node;
1141 aarch64_simd_types[Uint16x8_t].eltype = unsigned_intHI_type_node;
1142 aarch64_simd_types[Uint32x2_t].eltype = unsigned_intSI_type_node;
1143 aarch64_simd_types[Uint32x4_t].eltype = unsigned_intSI_type_node;
1144 aarch64_simd_types[Uint64x1_t].eltype = unsigned_intDI_type_node;
1145 aarch64_simd_types[Uint64x2_t].eltype = unsigned_intDI_type_node;
1146
1147 /* Poly types are a world of their own. */
1148 aarch64_simd_types[Poly8_t].eltype = aarch64_simd_types[Poly8_t].itype =
1149 build_distinct_type_copy (unsigned_intQI_type_node);
1150 /* Prevent front-ends from transforming Poly8_t arrays into string
1151 literals. */
1152 TYPE_STRING_FLAG (aarch64_simd_types[Poly8_t].eltype) = false;
1153
1154 aarch64_simd_types[Poly16_t].eltype = aarch64_simd_types[Poly16_t].itype =
1155 build_distinct_type_copy (unsigned_intHI_type_node);
1156 aarch64_simd_types[Poly64_t].eltype = aarch64_simd_types[Poly64_t].itype =
1157 build_distinct_type_copy (unsigned_intDI_type_node);
1158 aarch64_simd_types[Poly128_t].eltype = aarch64_simd_types[Poly128_t].itype =
1159 build_distinct_type_copy (unsigned_intTI_type_node);
1160 /* Init poly vector element types with scalar poly types. */
1161 aarch64_simd_types[Poly8x8_t].eltype = aarch64_simd_types[Poly8_t].itype;
1162 aarch64_simd_types[Poly8x16_t].eltype = aarch64_simd_types[Poly8_t].itype;
1163 aarch64_simd_types[Poly16x4_t].eltype = aarch64_simd_types[Poly16_t].itype;
1164 aarch64_simd_types[Poly16x8_t].eltype = aarch64_simd_types[Poly16_t].itype;
1165 aarch64_simd_types[Poly64x1_t].eltype = aarch64_simd_types[Poly64_t].itype;
1166 aarch64_simd_types[Poly64x2_t].eltype = aarch64_simd_types[Poly64_t].itype;
1167
1168 /* Continue with standard types. */
1169 aarch64_simd_types[Float16x4_t].eltype = aarch64_fp16_type_node;
1170 aarch64_simd_types[Float16x8_t].eltype = aarch64_fp16_type_node;
1171 aarch64_simd_types[Float32x2_t].eltype = float_type_node;
1172 aarch64_simd_types[Float32x4_t].eltype = float_type_node;
1173 aarch64_simd_types[Float64x1_t].eltype = double_type_node;
1174 aarch64_simd_types[Float64x2_t].eltype = double_type_node;
1175
1176 /* Init Bfloat vector types with underlying __bf16 type. */
1177 aarch64_simd_types[Bfloat16x4_t].eltype = bfloat16_type_node;
1178 aarch64_simd_types[Bfloat16x8_t].eltype = bfloat16_type_node;
1179
1180 for (i = 0; i < nelts; i++)
1181 {
1182 tree eltype = aarch64_simd_types[i].eltype;
1183 machine_mode mode = aarch64_simd_types[i].mode;
1184
1185 if (aarch64_simd_types[i].itype == NULL)
1186 {
1187 tree type = build_vector_type (eltype, GET_MODE_NUNITS (mode));
1188 type = build_distinct_type_copy (type);
1189 SET_TYPE_STRUCTURAL_EQUALITY (type);
1190
1191 tree mangled_name = get_identifier (aarch64_simd_types[i].mangle);
1192 tree value = tree_cons (NULL_TREE, mangled_name, NULL_TREE);
1193 TYPE_ATTRIBUTES (type)
1194 = tree_cons (get_identifier ("Advanced SIMD type"), value,
1195 TYPE_ATTRIBUTES (type));
1196 aarch64_simd_types[i].itype = type;
1197 }
1198
1199 tdecl = add_builtin_type (aarch64_simd_types[i].name,
1200 aarch64_simd_types[i].itype);
1201 TYPE_NAME (aarch64_simd_types[i].itype) = tdecl;
1202 }
1203
1204 #define AARCH64_BUILD_SIGNED_TYPE(mode) \
1205 make_signed_type (GET_MODE_PRECISION (mode));
1206 aarch64_simd_intOI_type_node = AARCH64_BUILD_SIGNED_TYPE (OImode);
1207 aarch64_simd_intCI_type_node = AARCH64_BUILD_SIGNED_TYPE (CImode);
1208 aarch64_simd_intXI_type_node = AARCH64_BUILD_SIGNED_TYPE (XImode);
1209 #undef AARCH64_BUILD_SIGNED_TYPE
1210
1211 tdecl = add_builtin_type
1212 ("__builtin_aarch64_simd_oi" , aarch64_simd_intOI_type_node);
1213 TYPE_NAME (aarch64_simd_intOI_type_node) = tdecl;
1214 tdecl = add_builtin_type
1215 ("__builtin_aarch64_simd_ci" , aarch64_simd_intCI_type_node);
1216 TYPE_NAME (aarch64_simd_intCI_type_node) = tdecl;
1217 tdecl = add_builtin_type
1218 ("__builtin_aarch64_simd_xi" , aarch64_simd_intXI_type_node);
1219 TYPE_NAME (aarch64_simd_intXI_type_node) = tdecl;
1220 }
1221
1222 static void
1223 aarch64_init_simd_builtin_scalar_types (void)
1224 {
1225 /* Define typedefs for all the standard scalar types. */
1226 (*lang_hooks.types.register_builtin_type) (intQI_type_node,
1227 "__builtin_aarch64_simd_qi");
1228 (*lang_hooks.types.register_builtin_type) (intHI_type_node,
1229 "__builtin_aarch64_simd_hi");
1230 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node,
1231 "__builtin_aarch64_simd_hf");
1232 (*lang_hooks.types.register_builtin_type) (intSI_type_node,
1233 "__builtin_aarch64_simd_si");
1234 (*lang_hooks.types.register_builtin_type) (float_type_node,
1235 "__builtin_aarch64_simd_sf");
1236 (*lang_hooks.types.register_builtin_type) (intDI_type_node,
1237 "__builtin_aarch64_simd_di");
1238 (*lang_hooks.types.register_builtin_type) (double_type_node,
1239 "__builtin_aarch64_simd_df");
1240 (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
1241 "__builtin_aarch64_simd_poly8");
1242 (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node,
1243 "__builtin_aarch64_simd_poly16");
1244 (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node,
1245 "__builtin_aarch64_simd_poly64");
1246 (*lang_hooks.types.register_builtin_type) (unsigned_intTI_type_node,
1247 "__builtin_aarch64_simd_poly128");
1248 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
1249 "__builtin_aarch64_simd_ti");
1250 (*lang_hooks.types.register_builtin_type) (bfloat16_type_node,
1251 "__builtin_aarch64_simd_bf");
1252 /* Unsigned integer types for various mode sizes. */
1253 (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
1254 "__builtin_aarch64_simd_uqi");
1255 (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node,
1256 "__builtin_aarch64_simd_uhi");
1257 (*lang_hooks.types.register_builtin_type) (unsigned_intSI_type_node,
1258 "__builtin_aarch64_simd_usi");
1259 (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node,
1260 "__builtin_aarch64_simd_udi");
1261 }
1262
1263 /* Return a set of FLAG_* flags derived from FLAGS
1264 that describe what a function with result MODE could do,
1265 taking the command-line flags into account. */
1266 static unsigned int
1267 aarch64_call_properties (unsigned int flags, machine_mode mode)
1268 {
1269 if (!(flags & FLAG_AUTO_FP) && FLOAT_MODE_P (mode))
1270 flags |= FLAG_FP;
1271
1272 /* -fno-trapping-math means that we can assume any FP exceptions
1273 are not user-visible. */
1274 if (!flag_trapping_math)
1275 flags &= ~FLAG_RAISE_FP_EXCEPTIONS;
1276
1277 return flags;
1278 }
1279
1280 /* Return true if calls to a function with flags F and mode MODE
1281 could modify some form of global state. */
1282 static bool
1283 aarch64_modifies_global_state_p (unsigned int f, machine_mode mode)
1284 {
1285 unsigned int flags = aarch64_call_properties (f, mode);
1286
1287 if (flags & FLAG_RAISE_FP_EXCEPTIONS)
1288 return true;
1289
1290 if (flags & FLAG_PREFETCH_MEMORY)
1291 return true;
1292
1293 return flags & FLAG_WRITE_MEMORY;
1294 }
1295
1296 /* Return true if calls to a function with flags F and mode MODE
1297 could read some form of global state. */
1298 static bool
1299 aarch64_reads_global_state_p (unsigned int f, machine_mode mode)
1300 {
1301 unsigned int flags = aarch64_call_properties (f, mode);
1302
1303 if (flags & FLAG_READ_FPCR)
1304 return true;
1305
1306 return flags & FLAG_READ_MEMORY;
1307 }
1308
1309 /* Return true if calls to a function with flags F and mode MODE
1310 could raise a signal. */
1311 static bool
1312 aarch64_could_trap_p (unsigned int f, machine_mode mode)
1313 {
1314 unsigned int flags = aarch64_call_properties (f, mode);
1315
1316 if (flags & FLAG_RAISE_FP_EXCEPTIONS)
1317 return true;
1318
1319 if (flags & (FLAG_READ_MEMORY | FLAG_WRITE_MEMORY))
1320 return true;
1321
1322 return false;
1323 }
1324
1325 /* Add attribute NAME to ATTRS. */
1326 static tree
1327 aarch64_add_attribute (const char *name, tree attrs)
1328 {
1329 return tree_cons (get_identifier (name), NULL_TREE, attrs);
1330 }
1331
1332 /* Return the appropriate attributes for a function that has
1333 flags F and mode MODE. */
1334 static tree
1335 aarch64_get_attributes (unsigned int f, machine_mode mode)
1336 {
1337 tree attrs = NULL_TREE;
1338
1339 if (!aarch64_modifies_global_state_p (f, mode))
1340 {
1341 if (aarch64_reads_global_state_p (f, mode))
1342 attrs = aarch64_add_attribute ("pure", attrs);
1343 else
1344 attrs = aarch64_add_attribute ("const", attrs);
1345 }
1346
1347 if (!flag_non_call_exceptions || !aarch64_could_trap_p (f, mode))
1348 attrs = aarch64_add_attribute ("nothrow", attrs);
1349
1350 return aarch64_add_attribute ("leaf", attrs);
1351 }
1352
1353 /* Due to the architecture not providing lane variant of the lane instructions
1354 for fcmla we can't use the standard simd builtin expansion code, but we
1355 still want the majority of the validation that would normally be done. */
1356
1357 void
1358 aarch64_init_fcmla_laneq_builtins (void)
1359 {
1360 unsigned int i = 0;
1361
1362 for (i = 0; i < ARRAY_SIZE (aarch64_fcmla_lane_builtin_data); ++i)
1363 {
1364 aarch64_fcmla_laneq_builtin_datum* d
1365 = &aarch64_fcmla_lane_builtin_data[i];
1366 tree argtype = aarch64_simd_builtin_type (d->mode, qualifier_none);
1367 machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
1368 tree quadtype = aarch64_simd_builtin_type (quadmode, qualifier_none);
1369 tree lanetype
1370 = aarch64_simd_builtin_type (SImode, qualifier_lane_pair_index);
1371 tree ftype = build_function_type_list (argtype, argtype, argtype,
1372 quadtype, lanetype, NULL_TREE);
1373 tree attrs = aarch64_get_attributes (FLAG_FP, d->mode);
1374 tree fndecl
1375 = aarch64_general_add_builtin (d->name, ftype, d->fcode, attrs);
1376
1377 aarch64_builtin_decls[d->fcode] = fndecl;
1378 }
1379 }
1380
1381 void
1382 aarch64_init_simd_intrinsics (void)
1383 {
1384 unsigned int i = 0;
1385
1386 for (i = 0; i < ARRAY_SIZE (aarch64_simd_intrinsic_data); ++i)
1387 {
1388 auto d = &aarch64_simd_intrinsic_data[i];
1389
1390 if (d->skip)
1391 continue;
1392
1393 tree return_type = void_type_node;
1394 tree args = void_list_node;
1395
1396 for (int op_num = d->op_count - 1; op_num >= 0; op_num--)
1397 {
1398 machine_mode op_mode = d->op_modes[op_num];
1399 enum aarch64_type_qualifiers qualifiers = d->qualifiers[op_num];
1400
1401 tree eltype = aarch64_simd_builtin_type (op_mode, qualifiers);
1402
1403 if (op_num == 0)
1404 return_type = eltype;
1405 else
1406 args = tree_cons (NULL_TREE, eltype, args);
1407 }
1408
1409 tree ftype = build_function_type (return_type, args);
1410 tree attrs = aarch64_get_attributes (d->flags, d->op_modes[0]);
1411 unsigned int code
1412 = (d->fcode << AARCH64_BUILTIN_SHIFT | AARCH64_BUILTIN_GENERAL);
1413 tree fndecl = simulate_builtin_function_decl (input_location, d->name,
1414 ftype, code, NULL, attrs);
1415 aarch64_builtin_decls[d->fcode] = fndecl;
1416 }
1417 }
1418
1419 void
1420 aarch64_init_simd_builtin_functions (bool called_from_pragma)
1421 {
1422 unsigned int i, fcode = AARCH64_SIMD_PATTERN_START;
1423
1424 if (!called_from_pragma)
1425 {
1426 tree lane_check_fpr = build_function_type_list (void_type_node,
1427 size_type_node,
1428 size_type_node,
1429 intSI_type_node,
1430 NULL);
1431 aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK]
1432 = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi",
1433 lane_check_fpr,
1434 AARCH64_SIMD_BUILTIN_LANE_CHECK);
1435 }
1436
1437 for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
1438 {
1439 bool print_type_signature_p = false;
1440 char type_signature[SIMD_MAX_BUILTIN_ARGS + 1] = { 0 };
1441 aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i];
1442 char namebuf[60];
1443 tree ftype = NULL;
1444 tree fndecl = NULL;
1445
1446 d->fcode = fcode;
1447
1448 /* We must track two variables here. op_num is
1449 the operand number as in the RTL pattern. This is
1450 required to access the mode (e.g. V4SF mode) of the
1451 argument, from which the base type can be derived.
1452 arg_num is an index in to the qualifiers data, which
1453 gives qualifiers to the type (e.g. const unsigned).
1454 The reason these two variables may differ by one is the
1455 void return type. While all return types take the 0th entry
1456 in the qualifiers array, there is no operand for them in the
1457 RTL pattern. */
1458 int op_num = insn_data[d->code].n_operands - 1;
1459 int arg_num = d->qualifiers[0] & qualifier_void
1460 ? op_num + 1
1461 : op_num;
1462 tree return_type = void_type_node, args = void_list_node;
1463 tree eltype;
1464
1465 int struct_mode_args = 0;
1466 for (int j = op_num; j >= 0; j--)
1467 {
1468 machine_mode op_mode = insn_data[d->code].operand[j].mode;
1469 if (aarch64_advsimd_struct_mode_p (op_mode))
1470 struct_mode_args++;
1471 }
1472
1473 if ((called_from_pragma && struct_mode_args == 0)
1474 || (!called_from_pragma && struct_mode_args > 0))
1475 continue;
1476
1477 /* Build a function type directly from the insn_data for this
1478 builtin. The build_function_type () function takes care of
1479 removing duplicates for us. */
1480 for (; op_num >= 0; arg_num--, op_num--)
1481 {
1482 machine_mode op_mode = insn_data[d->code].operand[op_num].mode;
1483 enum aarch64_type_qualifiers qualifiers = d->qualifiers[arg_num];
1484
1485 if (qualifiers & qualifier_unsigned)
1486 {
1487 type_signature[op_num] = 'u';
1488 print_type_signature_p = true;
1489 }
1490 else if (qualifiers & qualifier_poly)
1491 {
1492 type_signature[op_num] = 'p';
1493 print_type_signature_p = true;
1494 }
1495 else
1496 type_signature[op_num] = 's';
1497
1498 /* Some builtins have different user-facing types
1499 for certain arguments, encoded in d->mode. */
1500 if (qualifiers & qualifier_map_mode)
1501 op_mode = d->mode;
1502
1503 eltype = aarch64_simd_builtin_type (op_mode, qualifiers);
1504
1505 /* If we have reached arg_num == 0, we are at a non-void
1506 return type. Otherwise, we are still processing
1507 arguments. */
1508 if (arg_num == 0)
1509 return_type = eltype;
1510 else
1511 args = tree_cons (NULL_TREE, eltype, args);
1512 }
1513
1514 ftype = build_function_type (return_type, args);
1515
1516 gcc_assert (ftype != NULL);
1517
1518 if (print_type_signature_p)
1519 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s_%s",
1520 d->name, type_signature);
1521 else
1522 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s",
1523 d->name);
1524
1525 tree attrs = aarch64_get_attributes (d->flags, d->mode);
1526
1527 if (called_from_pragma)
1528 {
1529 unsigned int raw_code
1530 = (fcode << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL;
1531 fndecl = simulate_builtin_function_decl (input_location, namebuf,
1532 ftype, raw_code, NULL,
1533 attrs);
1534 }
1535 else
1536 fndecl = aarch64_general_add_builtin (namebuf, ftype, fcode, attrs);
1537
1538 aarch64_builtin_decls[fcode] = fndecl;
1539 }
1540 }
1541
1542 /* Register the tuple type that contains NUM_VECTORS of the AdvSIMD type
1543 indexed by TYPE_INDEX. */
1544 static void
1545 register_tuple_type (unsigned int num_vectors, unsigned int type_index)
1546 {
1547 aarch64_simd_type_info *type = &aarch64_simd_types[type_index];
1548
1549 /* Synthesize the name of the user-visible vector tuple type. */
1550 const char *vector_type_name = type->name;
1551 char tuple_type_name[sizeof ("bfloat16x4x2_t")];
1552 snprintf (tuple_type_name, sizeof (tuple_type_name), "%.*sx%d_t",
1553 (int) strlen (vector_type_name) - 4, vector_type_name + 2,
1554 num_vectors);
1555 tuple_type_name[0] = TOLOWER (tuple_type_name[0]);
1556
1557 tree vector_type = type->itype;
1558 tree array_type = build_array_type_nelts (vector_type, num_vectors);
1559 if (type->mode == DImode)
1560 {
1561 if (num_vectors == 2)
1562 SET_TYPE_MODE (array_type, V2x1DImode);
1563 else if (num_vectors == 3)
1564 SET_TYPE_MODE (array_type, V3x1DImode);
1565 else if (num_vectors == 4)
1566 SET_TYPE_MODE (array_type, V4x1DImode);
1567 }
1568
1569 unsigned int alignment
1570 = known_eq (GET_MODE_SIZE (type->mode), 16) ? 128 : 64;
1571 machine_mode tuple_mode = TYPE_MODE_RAW (array_type);
1572 gcc_assert (VECTOR_MODE_P (tuple_mode)
1573 && TYPE_MODE (array_type) == tuple_mode
1574 && TYPE_ALIGN (array_type) == alignment);
1575
1576 tree field = build_decl (input_location, FIELD_DECL,
1577 get_identifier ("val"), array_type);
1578
1579 tree t = lang_hooks.types.simulate_record_decl (input_location,
1580 tuple_type_name,
1581 make_array_slice (&field,
1582 1));
1583 gcc_assert (TYPE_MODE_RAW (t) == TYPE_MODE (t)
1584 && (flag_pack_struct
1585 || maximum_field_alignment
1586 || (TYPE_MODE_RAW (t) == tuple_mode
1587 && TYPE_ALIGN (t) == alignment)));
1588
1589 aarch64_simd_tuple_modes[type_index][num_vectors - 2] = tuple_mode;
1590 aarch64_simd_tuple_types[type_index][num_vectors - 2] = t;
1591 }
1592
1593 static bool
1594 aarch64_scalar_builtin_type_p (aarch64_simd_type t)
1595 {
1596 return (t == Poly8_t || t == Poly16_t || t == Poly64_t || t == Poly128_t);
1597 }
1598
1599 /* Enable AARCH64_FL_* flags EXTRA_FLAGS on top of the base Advanced SIMD
1600 set. */
1601 aarch64_simd_switcher::aarch64_simd_switcher (aarch64_feature_flags extra_flags)
1602 : m_old_asm_isa_flags (aarch64_asm_isa_flags),
1603 m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY)
1604 {
1605 /* Changing the ISA flags should be enough here. We shouldn't need to
1606 pay the compile-time cost of a full target switch. */
1607 global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
1608 aarch64_set_asm_isa_flags (AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags);
1609 }
1610
1611 aarch64_simd_switcher::~aarch64_simd_switcher ()
1612 {
1613 if (m_old_general_regs_only)
1614 global_options.x_target_flags |= MASK_GENERAL_REGS_ONLY;
1615 aarch64_set_asm_isa_flags (m_old_asm_isa_flags);
1616 }
1617
1618 /* Implement #pragma GCC aarch64 "arm_neon.h".
1619
1620 The types and functions defined here need to be available internally
1621 during LTO as well. */
1622 void
1623 handle_arm_neon_h (void)
1624 {
1625 aarch64_simd_switcher simd;
1626
1627 /* Register the AdvSIMD vector tuple types. */
1628 for (unsigned int i = 0; i < ARM_NEON_H_TYPES_LAST; i++)
1629 for (unsigned int count = 2; count <= 4; ++count)
1630 if (!aarch64_scalar_builtin_type_p (aarch64_simd_types[i].type))
1631 register_tuple_type (count, i);
1632
1633 aarch64_init_simd_builtin_functions (true);
1634 aarch64_init_simd_intrinsics ();
1635 }
1636
1637 static void
1638 aarch64_init_simd_builtins (void)
1639 {
1640 aarch64_init_simd_builtin_types ();
1641
1642 /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics.
1643 Therefore we need to preserve the old __builtin scalar types. It can be
1644 removed once all the intrinsics become strongly typed using the qualifier
1645 system. */
1646 aarch64_init_simd_builtin_scalar_types ();
1647
1648 aarch64_init_simd_builtin_functions (false);
1649 if (in_lto_p)
1650 handle_arm_neon_h ();
1651
1652 /* Initialize the remaining fcmla_laneq intrinsics. */
1653 aarch64_init_fcmla_laneq_builtins ();
1654 }
1655
1656 static void
1657 aarch64_init_crc32_builtins ()
1658 {
1659 tree usi_type = aarch64_simd_builtin_type (SImode, qualifier_unsigned);
1660 unsigned int i = 0;
1661
1662 for (i = 0; i < ARRAY_SIZE (aarch64_crc_builtin_data); ++i)
1663 {
1664 aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i];
1665 tree argtype = aarch64_simd_builtin_type (d->mode, qualifier_unsigned);
1666 tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE);
1667 tree attrs = aarch64_get_attributes (FLAG_NONE, d->mode);
1668 tree fndecl
1669 = aarch64_general_add_builtin (d->name, ftype, d->fcode, attrs);
1670
1671 aarch64_builtin_decls[d->fcode] = fndecl;
1672 }
1673 }
1674
1675 /* Add builtins for reciprocal square root. */
1676
1677 void
1678 aarch64_init_builtin_rsqrt (void)
1679 {
1680 tree fndecl = NULL;
1681 tree ftype = NULL;
1682
1683 tree V2SF_type_node = build_vector_type (float_type_node, 2);
1684 tree V2DF_type_node = build_vector_type (double_type_node, 2);
1685 tree V4SF_type_node = build_vector_type (float_type_node, 4);
1686
1687 struct builtin_decls_data
1688 {
1689 tree type_node;
1690 const char *builtin_name;
1691 int function_code;
1692 };
1693
1694 builtin_decls_data bdda[] =
1695 {
1696 { double_type_node, "__builtin_aarch64_rsqrt_df", AARCH64_BUILTIN_RSQRT_DF },
1697 { float_type_node, "__builtin_aarch64_rsqrt_sf", AARCH64_BUILTIN_RSQRT_SF },
1698 { V2DF_type_node, "__builtin_aarch64_rsqrt_v2df", AARCH64_BUILTIN_RSQRT_V2DF },
1699 { V2SF_type_node, "__builtin_aarch64_rsqrt_v2sf", AARCH64_BUILTIN_RSQRT_V2SF },
1700 { V4SF_type_node, "__builtin_aarch64_rsqrt_v4sf", AARCH64_BUILTIN_RSQRT_V4SF }
1701 };
1702
1703 builtin_decls_data *bdd = bdda;
1704 builtin_decls_data *bdd_end = bdd + (ARRAY_SIZE (bdda));
1705
1706 for (; bdd < bdd_end; bdd++)
1707 {
1708 ftype = build_function_type_list (bdd->type_node, bdd->type_node, NULL_TREE);
1709 tree attrs = aarch64_get_attributes (FLAG_FP, TYPE_MODE (bdd->type_node));
1710 fndecl = aarch64_general_add_builtin (bdd->builtin_name,
1711 ftype, bdd->function_code, attrs);
1712 aarch64_builtin_decls[bdd->function_code] = fndecl;
1713 }
1714 }
1715
1716 /* Initialize the backend types that support the user-visible __fp16
1717 type, also initialize a pointer to that type, to be used when
1718 forming HFAs. */
1719
1720 static void
1721 aarch64_init_fp16_types (void)
1722 {
1723 aarch64_fp16_type_node = make_node (REAL_TYPE);
1724 TYPE_PRECISION (aarch64_fp16_type_node) = 16;
1725 layout_type (aarch64_fp16_type_node);
1726
1727 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16");
1728 aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node);
1729 }
1730
1731 /* Initialize the backend REAL_TYPE type supporting bfloat types. */
1732 static void
1733 aarch64_init_bf16_types (void)
1734 {
1735 lang_hooks.types.register_builtin_type (bfloat16_type_node, "__bf16");
1736 aarch64_bf16_ptr_type_node = build_pointer_type (bfloat16_type_node);
1737 }
1738
1739 /* Pointer authentication builtins that will become NOP on legacy platform.
1740 Currently, these builtins are for internal use only (libgcc EH unwinder). */
1741
1742 void
1743 aarch64_init_pauth_hint_builtins (void)
1744 {
1745 /* Pointer Authentication builtins. */
1746 tree ftype_pointer_auth
1747 = build_function_type_list (ptr_type_node, ptr_type_node,
1748 unsigned_intDI_type_node, NULL_TREE);
1749 tree ftype_pointer_strip
1750 = build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE);
1751
1752 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIA1716]
1753 = aarch64_general_add_builtin ("__builtin_aarch64_autia1716",
1754 ftype_pointer_auth,
1755 AARCH64_PAUTH_BUILTIN_AUTIA1716);
1756 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIA1716]
1757 = aarch64_general_add_builtin ("__builtin_aarch64_pacia1716",
1758 ftype_pointer_auth,
1759 AARCH64_PAUTH_BUILTIN_PACIA1716);
1760 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIB1716]
1761 = aarch64_general_add_builtin ("__builtin_aarch64_autib1716",
1762 ftype_pointer_auth,
1763 AARCH64_PAUTH_BUILTIN_AUTIB1716);
1764 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIB1716]
1765 = aarch64_general_add_builtin ("__builtin_aarch64_pacib1716",
1766 ftype_pointer_auth,
1767 AARCH64_PAUTH_BUILTIN_PACIB1716);
1768 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_XPACLRI]
1769 = aarch64_general_add_builtin ("__builtin_aarch64_xpaclri",
1770 ftype_pointer_strip,
1771 AARCH64_PAUTH_BUILTIN_XPACLRI);
1772 }
1773
1774 /* Initialize the transactional memory extension (TME) builtins. */
1775 static void
1776 aarch64_init_tme_builtins (void)
1777 {
1778 tree ftype_uint64_void
1779 = build_function_type_list (uint64_type_node, NULL);
1780 tree ftype_void_void
1781 = build_function_type_list (void_type_node, NULL);
1782 tree ftype_void_uint64
1783 = build_function_type_list (void_type_node, uint64_type_node, NULL);
1784
1785 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TSTART]
1786 = aarch64_general_add_builtin ("__builtin_aarch64_tstart",
1787 ftype_uint64_void,
1788 AARCH64_TME_BUILTIN_TSTART);
1789 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TTEST]
1790 = aarch64_general_add_builtin ("__builtin_aarch64_ttest",
1791 ftype_uint64_void,
1792 AARCH64_TME_BUILTIN_TTEST);
1793 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCOMMIT]
1794 = aarch64_general_add_builtin ("__builtin_aarch64_tcommit",
1795 ftype_void_void,
1796 AARCH64_TME_BUILTIN_TCOMMIT);
1797 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCANCEL]
1798 = aarch64_general_add_builtin ("__builtin_aarch64_tcancel",
1799 ftype_void_uint64,
1800 AARCH64_TME_BUILTIN_TCANCEL);
1801 }
1802
1803 /* Add builtins for Random Number instructions. */
1804
1805 static void
1806 aarch64_init_rng_builtins (void)
1807 {
1808 tree unsigned_ptr_type = build_pointer_type (unsigned_intDI_type_node);
1809 tree ftype
1810 = build_function_type_list (integer_type_node, unsigned_ptr_type, NULL);
1811 aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDR]
1812 = aarch64_general_add_builtin ("__builtin_aarch64_rndr", ftype,
1813 AARCH64_BUILTIN_RNG_RNDR);
1814 aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDRRS]
1815 = aarch64_general_add_builtin ("__builtin_aarch64_rndrrs", ftype,
1816 AARCH64_BUILTIN_RNG_RNDRRS);
1817 }
1818
1819 /* Add builtins for reading system register. */
1820 static void
1821 aarch64_init_rwsr_builtins (void)
1822 {
1823 tree fntype = NULL;
1824 tree const_char_ptr_type
1825 = build_pointer_type (build_type_variant (char_type_node, true, false));
1826
1827 #define AARCH64_INIT_RWSR_BUILTINS_DECL(F, N, T) \
1828 aarch64_builtin_decls[AARCH64_##F] \
1829 = aarch64_general_add_builtin ("__builtin_aarch64_"#N, T, AARCH64_##F);
1830
1831 fntype
1832 = build_function_type_list (uint32_type_node, const_char_ptr_type, NULL);
1833 AARCH64_INIT_RWSR_BUILTINS_DECL (RSR, rsr, fntype);
1834
1835 fntype
1836 = build_function_type_list (ptr_type_node, const_char_ptr_type, NULL);
1837 AARCH64_INIT_RWSR_BUILTINS_DECL (RSRP, rsrp, fntype);
1838
1839 fntype
1840 = build_function_type_list (uint64_type_node, const_char_ptr_type, NULL);
1841 AARCH64_INIT_RWSR_BUILTINS_DECL (RSR64, rsr64, fntype);
1842
1843 fntype
1844 = build_function_type_list (float_type_node, const_char_ptr_type, NULL);
1845 AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF, rsrf, fntype);
1846
1847 fntype
1848 = build_function_type_list (double_type_node, const_char_ptr_type, NULL);
1849 AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF64, rsrf64, fntype);
1850
1851 fntype
1852 = build_function_type_list (uint128_type_node, const_char_ptr_type, NULL);
1853 AARCH64_INIT_RWSR_BUILTINS_DECL (RSR128, rsr128, fntype);
1854
1855 fntype
1856 = build_function_type_list (void_type_node, const_char_ptr_type,
1857 uint32_type_node, NULL);
1858
1859 AARCH64_INIT_RWSR_BUILTINS_DECL (WSR, wsr, fntype);
1860
1861 fntype
1862 = build_function_type_list (void_type_node, const_char_ptr_type,
1863 const_ptr_type_node, NULL);
1864 AARCH64_INIT_RWSR_BUILTINS_DECL (WSRP, wsrp, fntype);
1865
1866 fntype
1867 = build_function_type_list (void_type_node, const_char_ptr_type,
1868 uint64_type_node, NULL);
1869 AARCH64_INIT_RWSR_BUILTINS_DECL (WSR64, wsr64, fntype);
1870
1871 fntype
1872 = build_function_type_list (void_type_node, const_char_ptr_type,
1873 float_type_node, NULL);
1874 AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF, wsrf, fntype);
1875
1876 fntype
1877 = build_function_type_list (void_type_node, const_char_ptr_type,
1878 double_type_node, NULL);
1879 AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF64, wsrf64, fntype);
1880
1881 fntype
1882 = build_function_type_list (void_type_node, const_char_ptr_type,
1883 uint128_type_node, NULL);
1884 AARCH64_INIT_RWSR_BUILTINS_DECL (WSR128, wsr128, fntype);
1885 }
1886
1887 /* Add builtins for data and instrution prefetch. */
1888 static void
1889 aarch64_init_prefetch_builtin (void)
1890 {
1891 #define AARCH64_INIT_PREFETCH_BUILTIN(INDEX, N) \
1892 aarch64_builtin_decls[INDEX] = \
1893 aarch64_general_add_builtin ("__builtin_aarch64_" N, ftype, INDEX)
1894
1895 tree ftype;
1896 tree cv_argtype;
1897 cv_argtype = build_qualified_type (void_type_node, TYPE_QUAL_CONST
1898 | TYPE_QUAL_VOLATILE);
1899 cv_argtype = build_pointer_type (cv_argtype);
1900
1901 ftype = build_function_type_list (void_type_node, cv_argtype, NULL);
1902 AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLD, "pld");
1903 AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLI, "pli");
1904
1905 ftype = build_function_type_list (void_type_node, unsigned_type_node,
1906 unsigned_type_node, unsigned_type_node,
1907 cv_argtype, NULL);
1908 AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLDX, "pldx");
1909
1910 ftype = build_function_type_list (void_type_node, unsigned_type_node,
1911 unsigned_type_node, cv_argtype, NULL);
1912 AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLIX, "plix");
1913 }
1914
1915 /* Initialize the memory tagging extension (MTE) builtins. */
1916 struct
1917 {
1918 tree ftype;
1919 enum insn_code icode;
1920 } aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_END -
1921 AARCH64_MEMTAG_BUILTIN_START - 1];
1922
1923 static void
1924 aarch64_init_memtag_builtins (void)
1925 {
1926 tree fntype = NULL;
1927
1928 #define AARCH64_INIT_MEMTAG_BUILTINS_DECL(F, N, I, T) \
1929 aarch64_builtin_decls[AARCH64_MEMTAG_BUILTIN_##F] \
1930 = aarch64_general_add_builtin ("__builtin_aarch64_memtag_"#N, \
1931 T, AARCH64_MEMTAG_BUILTIN_##F); \
1932 aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_##F - \
1933 AARCH64_MEMTAG_BUILTIN_START - 1] = \
1934 {T, CODE_FOR_##I};
1935
1936 fntype = build_function_type_list (ptr_type_node, ptr_type_node,
1937 uint64_type_node, NULL);
1938 AARCH64_INIT_MEMTAG_BUILTINS_DECL (IRG, irg, irg, fntype);
1939
1940 fntype = build_function_type_list (uint64_type_node, ptr_type_node,
1941 uint64_type_node, NULL);
1942 AARCH64_INIT_MEMTAG_BUILTINS_DECL (GMI, gmi, gmi, fntype);
1943
1944 fntype = build_function_type_list (ptrdiff_type_node, ptr_type_node,
1945 ptr_type_node, NULL);
1946 AARCH64_INIT_MEMTAG_BUILTINS_DECL (SUBP, subp, subp, fntype);
1947
1948 fntype = build_function_type_list (ptr_type_node, ptr_type_node,
1949 unsigned_type_node, NULL);
1950 AARCH64_INIT_MEMTAG_BUILTINS_DECL (INC_TAG, inc_tag, addg, fntype);
1951
1952 fntype = build_function_type_list (void_type_node, ptr_type_node, NULL);
1953 AARCH64_INIT_MEMTAG_BUILTINS_DECL (SET_TAG, set_tag, stg, fntype);
1954
1955 fntype = build_function_type_list (ptr_type_node, ptr_type_node, NULL);
1956 AARCH64_INIT_MEMTAG_BUILTINS_DECL (GET_TAG, get_tag, ldg, fntype);
1957
1958 #undef AARCH64_INIT_MEMTAG_BUILTINS_DECL
1959 }
1960
1961 /* Add builtins for Load/store 64 Byte instructions. */
1962
1963 typedef struct
1964 {
1965 const char *name;
1966 unsigned int code;
1967 tree type;
1968 } ls64_builtins_data;
1969
1970 static GTY(()) tree ls64_arm_data_t = NULL_TREE;
1971
1972 static void
1973 aarch64_init_ls64_builtins_types (void)
1974 {
1975 /* Synthesize:
1976
1977 typedef struct {
1978 uint64_t val[8];
1979 } __arm_data512_t; */
1980 const char *tuple_type_name = "__arm_data512_t";
1981 tree node_type = get_typenode_from_name (UINT64_TYPE);
1982 tree array_type = build_array_type_nelts (node_type, 8);
1983 SET_TYPE_MODE (array_type, V8DImode);
1984
1985 gcc_assert (TYPE_MODE_RAW (array_type) == TYPE_MODE (array_type));
1986 gcc_assert (TYPE_ALIGN (array_type) == 64);
1987
1988 tree field = build_decl (input_location, FIELD_DECL,
1989 get_identifier ("val"), array_type);
1990
1991 ls64_arm_data_t = lang_hooks.types.simulate_record_decl (input_location,
1992 tuple_type_name,
1993 make_array_slice (&field, 1));
1994
1995 gcc_assert (TYPE_MODE (ls64_arm_data_t) == V8DImode);
1996 gcc_assert (TYPE_MODE_RAW (ls64_arm_data_t) == TYPE_MODE (ls64_arm_data_t));
1997 gcc_assert (TYPE_ALIGN (ls64_arm_data_t) == 64);
1998 }
1999
2000 static void
2001 aarch64_init_ls64_builtins (void)
2002 {
2003 aarch64_init_ls64_builtins_types ();
2004
2005 ls64_builtins_data data[4] = {
2006 {"__arm_ld64b", AARCH64_LS64_BUILTIN_LD64B,
2007 build_function_type_list (ls64_arm_data_t,
2008 const_ptr_type_node, NULL_TREE)},
2009 {"__arm_st64b", AARCH64_LS64_BUILTIN_ST64B,
2010 build_function_type_list (void_type_node, ptr_type_node,
2011 ls64_arm_data_t, NULL_TREE)},
2012 {"__arm_st64bv", AARCH64_LS64_BUILTIN_ST64BV,
2013 build_function_type_list (uint64_type_node, ptr_type_node,
2014 ls64_arm_data_t, NULL_TREE)},
2015 {"__arm_st64bv0", AARCH64_LS64_BUILTIN_ST64BV0,
2016 build_function_type_list (uint64_type_node, ptr_type_node,
2017 ls64_arm_data_t, NULL_TREE)},
2018 };
2019
2020 for (size_t i = 0; i < ARRAY_SIZE (data); ++i)
2021 aarch64_builtin_decls[data[i].code]
2022 = aarch64_general_simulate_builtin (data[i].name, data[i].type,
2023 data[i].code);
2024 }
2025
2026 static void
2027 aarch64_init_data_intrinsics (void)
2028 {
2029 tree uint32_fntype = build_function_type_list (uint32_type_node,
2030 uint32_type_node, NULL_TREE);
2031 tree ulong_fntype = build_function_type_list (long_unsigned_type_node,
2032 long_unsigned_type_node,
2033 NULL_TREE);
2034 tree uint64_fntype = build_function_type_list (uint64_type_node,
2035 uint64_type_node, NULL_TREE);
2036 aarch64_builtin_decls[AARCH64_REV16]
2037 = aarch64_general_add_builtin ("__builtin_aarch64_rev16", uint32_fntype,
2038 AARCH64_REV16);
2039 aarch64_builtin_decls[AARCH64_REV16L]
2040 = aarch64_general_add_builtin ("__builtin_aarch64_rev16l", ulong_fntype,
2041 AARCH64_REV16L);
2042 aarch64_builtin_decls[AARCH64_REV16LL]
2043 = aarch64_general_add_builtin ("__builtin_aarch64_rev16ll", uint64_fntype,
2044 AARCH64_REV16LL);
2045 aarch64_builtin_decls[AARCH64_RBIT]
2046 = aarch64_general_add_builtin ("__builtin_aarch64_rbit", uint32_fntype,
2047 AARCH64_RBIT);
2048 aarch64_builtin_decls[AARCH64_RBITL]
2049 = aarch64_general_add_builtin ("__builtin_aarch64_rbitl", ulong_fntype,
2050 AARCH64_RBITL);
2051 aarch64_builtin_decls[AARCH64_RBITLL]
2052 = aarch64_general_add_builtin ("__builtin_aarch64_rbitll", uint64_fntype,
2053 AARCH64_RBITLL);
2054 }
2055
2056 /* Implement #pragma GCC aarch64 "arm_acle.h". */
2057 void
2058 handle_arm_acle_h (void)
2059 {
2060 if (TARGET_LS64)
2061 aarch64_init_ls64_builtins ();
2062 }
2063
2064 /* Initialize fpsr fpcr getters and setters. */
2065
2066 static void
2067 aarch64_init_fpsr_fpcr_builtins (void)
2068 {
2069 tree ftype_set
2070 = build_function_type_list (void_type_node, unsigned_type_node, NULL);
2071 tree ftype_get
2072 = build_function_type_list (unsigned_type_node, NULL);
2073
2074 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]
2075 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr",
2076 ftype_get,
2077 AARCH64_BUILTIN_GET_FPCR);
2078 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]
2079 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr",
2080 ftype_set,
2081 AARCH64_BUILTIN_SET_FPCR);
2082 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]
2083 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr",
2084 ftype_get,
2085 AARCH64_BUILTIN_GET_FPSR);
2086 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]
2087 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr",
2088 ftype_set,
2089 AARCH64_BUILTIN_SET_FPSR);
2090
2091 ftype_set
2092 = build_function_type_list (void_type_node, long_long_unsigned_type_node,
2093 NULL);
2094 ftype_get
2095 = build_function_type_list (long_long_unsigned_type_node, NULL);
2096
2097 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR64]
2098 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr64",
2099 ftype_get,
2100 AARCH64_BUILTIN_GET_FPCR64);
2101 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR64]
2102 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr64",
2103 ftype_set,
2104 AARCH64_BUILTIN_SET_FPCR64);
2105 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR64]
2106 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr64",
2107 ftype_get,
2108 AARCH64_BUILTIN_GET_FPSR64);
2109 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR64]
2110 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr64",
2111 ftype_set,
2112 AARCH64_BUILTIN_SET_FPSR64);
2113 }
2114
2115 /* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group. */
2116
2117 void
2118 aarch64_general_init_builtins (void)
2119 {
2120 aarch64_init_fpsr_fpcr_builtins ();
2121
2122 aarch64_init_fp16_types ();
2123
2124 aarch64_init_bf16_types ();
2125
2126 {
2127 aarch64_simd_switcher simd;
2128 aarch64_init_simd_builtins ();
2129 }
2130
2131 aarch64_init_crc32_builtins ();
2132 aarch64_init_builtin_rsqrt ();
2133 aarch64_init_rng_builtins ();
2134 aarch64_init_data_intrinsics ();
2135
2136 aarch64_init_rwsr_builtins ();
2137 aarch64_init_prefetch_builtin ();
2138
2139 tree ftype_jcvt
2140 = build_function_type_list (intSI_type_node, double_type_node, NULL);
2141 aarch64_builtin_decls[AARCH64_JSCVT]
2142 = aarch64_general_add_builtin ("__builtin_aarch64_jcvtzs", ftype_jcvt,
2143 AARCH64_JSCVT);
2144
2145 /* Initialize pointer authentication builtins which are backed by instructions
2146 in NOP encoding space.
2147
2148 NOTE: these builtins are supposed to be used by libgcc unwinder only, as
2149 there is no support on return address signing under ILP32, we don't
2150 register them. */
2151 if (!TARGET_ILP32)
2152 aarch64_init_pauth_hint_builtins ();
2153
2154 if (TARGET_TME)
2155 aarch64_init_tme_builtins ();
2156
2157 if (TARGET_MEMTAG)
2158 aarch64_init_memtag_builtins ();
2159
2160 if (in_lto_p)
2161 handle_arm_acle_h ();
2162 }
2163
2164 /* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group. */
2165 tree
2166 aarch64_general_builtin_decl (unsigned code, bool)
2167 {
2168 if (code >= AARCH64_BUILTIN_MAX)
2169 return error_mark_node;
2170
2171 return aarch64_builtin_decls[code];
2172 }
2173
2174 bool
2175 aarch64_general_check_builtin_call (location_t location, vec<location_t>,
2176 unsigned int code, tree fndecl,
2177 unsigned int nargs ATTRIBUTE_UNUSED, tree *args)
2178 {
2179 switch (code)
2180 {
2181 case AARCH64_RSR:
2182 case AARCH64_RSRP:
2183 case AARCH64_RSR64:
2184 case AARCH64_RSRF:
2185 case AARCH64_RSRF64:
2186 case AARCH64_WSR:
2187 case AARCH64_WSRP:
2188 case AARCH64_WSR64:
2189 case AARCH64_WSRF:
2190 case AARCH64_WSRF64:
2191 tree addr = STRIP_NOPS (args[0]);
2192 if (TREE_CODE (TREE_TYPE (addr)) != POINTER_TYPE
2193 || TREE_CODE (addr) != ADDR_EXPR
2194 || TREE_CODE (TREE_OPERAND (addr, 0)) != STRING_CST)
2195 {
2196 error_at (location, "first argument to %qD must be a string literal",
2197 fndecl);
2198 return false;
2199 }
2200 }
2201 /* Default behavior. */
2202 return true;
2203 }
2204
2205 typedef enum
2206 {
2207 SIMD_ARG_COPY_TO_REG,
2208 SIMD_ARG_CONSTANT,
2209 SIMD_ARG_LANE_INDEX,
2210 SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
2211 SIMD_ARG_LANE_PAIR_INDEX,
2212 SIMD_ARG_LANE_QUADTUP_INDEX,
2213 SIMD_ARG_STOP
2214 } builtin_simd_arg;
2215
2216
2217 static rtx
2218 aarch64_simd_expand_args (rtx target, int icode, int have_retval,
2219 tree exp, builtin_simd_arg *args,
2220 machine_mode builtin_mode)
2221 {
2222 rtx pat;
2223 rtx op[SIMD_MAX_BUILTIN_ARGS + 1]; /* First element for result operand. */
2224 int opc = 0;
2225
2226 if (have_retval)
2227 {
2228 machine_mode tmode = insn_data[icode].operand[0].mode;
2229 if (!target
2230 || GET_MODE (target) != tmode
2231 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
2232 target = gen_reg_rtx (tmode);
2233 op[opc++] = target;
2234 }
2235
2236 for (;;)
2237 {
2238 builtin_simd_arg thisarg = args[opc - have_retval];
2239
2240 if (thisarg == SIMD_ARG_STOP)
2241 break;
2242 else
2243 {
2244 tree arg = CALL_EXPR_ARG (exp, opc - have_retval);
2245 machine_mode mode = insn_data[icode].operand[opc].mode;
2246 op[opc] = expand_normal (arg);
2247
2248 switch (thisarg)
2249 {
2250 case SIMD_ARG_COPY_TO_REG:
2251 if (POINTER_TYPE_P (TREE_TYPE (arg)))
2252 op[opc] = convert_memory_address (Pmode, op[opc]);
2253 /*gcc_assert (GET_MODE (op[opc]) == mode); */
2254 if (!(*insn_data[icode].operand[opc].predicate)
2255 (op[opc], mode))
2256 op[opc] = copy_to_mode_reg (mode, op[opc]);
2257 break;
2258
2259 case SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX:
2260 gcc_assert (opc > 1);
2261 if (CONST_INT_P (op[opc]))
2262 {
2263 unsigned int nunits
2264 = GET_MODE_NUNITS (builtin_mode).to_constant ();
2265 aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
2266 /* Keep to GCC-vector-extension lane indices in the RTL. */
2267 op[opc] = aarch64_endian_lane_rtx (builtin_mode,
2268 INTVAL (op[opc]));
2269 }
2270 goto constant_arg;
2271
2272 case SIMD_ARG_LANE_INDEX:
2273 /* Must be a previous operand into which this is an index. */
2274 gcc_assert (opc > 0);
2275 if (CONST_INT_P (op[opc]))
2276 {
2277 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
2278 unsigned int nunits
2279 = GET_MODE_NUNITS (vmode).to_constant ();
2280 aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
2281 /* Keep to GCC-vector-extension lane indices in the RTL. */
2282 op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc]));
2283 }
2284 /* If the lane index isn't a constant then error out. */
2285 goto constant_arg;
2286
2287 case SIMD_ARG_LANE_PAIR_INDEX:
2288 /* Must be a previous operand into which this is an index and
2289 index is restricted to nunits / 2. */
2290 gcc_assert (opc > 0);
2291 if (CONST_INT_P (op[opc]))
2292 {
2293 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
2294 unsigned int nunits
2295 = GET_MODE_NUNITS (vmode).to_constant ();
2296 aarch64_simd_lane_bounds (op[opc], 0, nunits / 2, exp);
2297 /* Keep to GCC-vector-extension lane indices in the RTL. */
2298 int lane = INTVAL (op[opc]);
2299 op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane),
2300 SImode);
2301 }
2302 /* If the lane index isn't a constant then error out. */
2303 goto constant_arg;
2304 case SIMD_ARG_LANE_QUADTUP_INDEX:
2305 /* Must be a previous operand into which this is an index and
2306 index is restricted to nunits / 4. */
2307 gcc_assert (opc > 0);
2308 if (CONST_INT_P (op[opc]))
2309 {
2310 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
2311 unsigned int nunits
2312 = GET_MODE_NUNITS (vmode).to_constant ();
2313 aarch64_simd_lane_bounds (op[opc], 0, nunits / 4, exp);
2314 /* Keep to GCC-vector-extension lane indices in the RTL. */
2315 int lane = INTVAL (op[opc]);
2316 op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane),
2317 SImode);
2318 }
2319 /* If the lane index isn't a constant then error out. */
2320 goto constant_arg;
2321 case SIMD_ARG_CONSTANT:
2322 constant_arg:
2323 if (!(*insn_data[icode].operand[opc].predicate)
2324 (op[opc], mode))
2325 {
2326 error_at (EXPR_LOCATION (exp),
2327 "argument %d must be a constant immediate",
2328 opc + 1 - have_retval);
2329 return const0_rtx;
2330 }
2331 break;
2332
2333 case SIMD_ARG_STOP:
2334 gcc_unreachable ();
2335 }
2336
2337 opc++;
2338 }
2339 }
2340
2341 switch (opc)
2342 {
2343 case 1:
2344 pat = GEN_FCN (icode) (op[0]);
2345 break;
2346
2347 case 2:
2348 pat = GEN_FCN (icode) (op[0], op[1]);
2349 break;
2350
2351 case 3:
2352 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
2353 break;
2354
2355 case 4:
2356 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
2357 break;
2358
2359 case 5:
2360 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
2361 break;
2362
2363 case 6:
2364 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
2365 break;
2366
2367 default:
2368 gcc_unreachable ();
2369 }
2370
2371 if (!pat)
2372 return NULL_RTX;
2373
2374 emit_insn (pat);
2375
2376 return target;
2377 }
2378
2379 /* Expand an AArch64 AdvSIMD builtin(intrinsic). */
2380 rtx
2381 aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
2382 {
2383 if (fcode == AARCH64_SIMD_BUILTIN_LANE_CHECK)
2384 {
2385 rtx totalsize = expand_normal (CALL_EXPR_ARG (exp, 0));
2386 rtx elementsize = expand_normal (CALL_EXPR_ARG (exp, 1));
2387 if (CONST_INT_P (totalsize) && CONST_INT_P (elementsize)
2388 && UINTVAL (elementsize) != 0
2389 && UINTVAL (totalsize) != 0)
2390 {
2391 rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 2));
2392 if (CONST_INT_P (lane_idx))
2393 aarch64_simd_lane_bounds (lane_idx, 0,
2394 UINTVAL (totalsize)
2395 / UINTVAL (elementsize),
2396 exp);
2397 else
2398 error_at (EXPR_LOCATION (exp),
2399 "lane index must be a constant immediate");
2400 }
2401 else
2402 error_at (EXPR_LOCATION (exp),
2403 "total size and element size must be a nonzero "
2404 "constant immediate");
2405 /* Don't generate any RTL. */
2406 return const0_rtx;
2407 }
2408 aarch64_simd_builtin_datum *d =
2409 &aarch64_simd_builtin_data[fcode - AARCH64_SIMD_PATTERN_START];
2410 enum insn_code icode = d->code;
2411 builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS + 1];
2412 int num_args = insn_data[d->code].n_operands;
2413 int is_void = 0;
2414 int k;
2415
2416 is_void = !!(d->qualifiers[0] & qualifier_void);
2417
2418 num_args += is_void;
2419
2420 for (k = 1; k < num_args; k++)
2421 {
2422 /* We have four arrays of data, each indexed in a different fashion.
2423 qualifiers - element 0 always describes the function return type.
2424 operands - element 0 is either the operand for return value (if
2425 the function has a non-void return type) or the operand for the
2426 first argument.
2427 expr_args - element 0 always holds the first argument.
2428 args - element 0 is always used for the return type. */
2429 int qualifiers_k = k;
2430 int operands_k = k - is_void;
2431 int expr_args_k = k - 1;
2432
2433 if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
2434 args[k] = SIMD_ARG_LANE_INDEX;
2435 else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
2436 args[k] = SIMD_ARG_LANE_PAIR_INDEX;
2437 else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index)
2438 args[k] = SIMD_ARG_LANE_QUADTUP_INDEX;
2439 else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
2440 args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
2441 else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
2442 args[k] = SIMD_ARG_CONSTANT;
2443 else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
2444 {
2445 rtx arg
2446 = expand_normal (CALL_EXPR_ARG (exp,
2447 (expr_args_k)));
2448 /* Handle constants only if the predicate allows it. */
2449 bool op_const_int_p =
2450 (CONST_INT_P (arg)
2451 && (*insn_data[icode].operand[operands_k].predicate)
2452 (arg, insn_data[icode].operand[operands_k].mode));
2453 args[k] = op_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG;
2454 }
2455 else
2456 args[k] = SIMD_ARG_COPY_TO_REG;
2457
2458 }
2459 args[k] = SIMD_ARG_STOP;
2460
2461 /* The interface to aarch64_simd_expand_args expects a 0 if
2462 the function is void, and a 1 if it is not. */
2463 return aarch64_simd_expand_args
2464 (target, icode, !is_void, exp, &args[1], d->mode);
2465 }
2466
2467 rtx
2468 aarch64_crc32_expand_builtin (int fcode, tree exp, rtx target)
2469 {
2470 rtx pat;
2471 aarch64_crc_builtin_datum *d
2472 = &aarch64_crc_builtin_data[fcode - (AARCH64_CRC32_BUILTIN_BASE + 1)];
2473 enum insn_code icode = d->icode;
2474 tree arg0 = CALL_EXPR_ARG (exp, 0);
2475 tree arg1 = CALL_EXPR_ARG (exp, 1);
2476 rtx op0 = expand_normal (arg0);
2477 rtx op1 = expand_normal (arg1);
2478 machine_mode tmode = insn_data[icode].operand[0].mode;
2479 machine_mode mode0 = insn_data[icode].operand[1].mode;
2480 machine_mode mode1 = insn_data[icode].operand[2].mode;
2481
2482 if (! target
2483 || GET_MODE (target) != tmode
2484 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
2485 target = gen_reg_rtx (tmode);
2486
2487 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
2488 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
2489
2490 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
2491 op0 = copy_to_mode_reg (mode0, op0);
2492 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
2493 op1 = copy_to_mode_reg (mode1, op1);
2494
2495 pat = GEN_FCN (icode) (target, op0, op1);
2496 if (!pat)
2497 return NULL_RTX;
2498
2499 emit_insn (pat);
2500 return target;
2501 }
2502
2503 /* Function to expand reciprocal square root builtins. */
2504
2505 static rtx
2506 aarch64_expand_builtin_rsqrt (int fcode, tree exp, rtx target)
2507 {
2508 tree arg0 = CALL_EXPR_ARG (exp, 0);
2509 rtx op0 = expand_normal (arg0);
2510
2511 rtx (*gen) (rtx, rtx);
2512
2513 switch (fcode)
2514 {
2515 case AARCH64_BUILTIN_RSQRT_DF:
2516 gen = gen_rsqrtdf2;
2517 break;
2518 case AARCH64_BUILTIN_RSQRT_SF:
2519 gen = gen_rsqrtsf2;
2520 break;
2521 case AARCH64_BUILTIN_RSQRT_V2DF:
2522 gen = gen_rsqrtv2df2;
2523 break;
2524 case AARCH64_BUILTIN_RSQRT_V2SF:
2525 gen = gen_rsqrtv2sf2;
2526 break;
2527 case AARCH64_BUILTIN_RSQRT_V4SF:
2528 gen = gen_rsqrtv4sf2;
2529 break;
2530 default: gcc_unreachable ();
2531 }
2532
2533 if (!target)
2534 target = gen_reg_rtx (GET_MODE (op0));
2535
2536 emit_insn (gen (target, op0));
2537
2538 return target;
2539 }
2540
2541 /* Expand a FCMLA lane expression EXP with code FCODE and
2542 result going to TARGET if that is convenient. */
2543
2544 rtx
2545 aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode)
2546 {
2547 int bcode = fcode - AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE - 1;
2548 aarch64_fcmla_laneq_builtin_datum* d
2549 = &aarch64_fcmla_lane_builtin_data[bcode];
2550 machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
2551 rtx op0 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 0)));
2552 rtx op1 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 1)));
2553 rtx op2 = force_reg (quadmode, expand_normal (CALL_EXPR_ARG (exp, 2)));
2554 tree tmp = CALL_EXPR_ARG (exp, 3);
2555 rtx lane_idx = expand_expr (tmp, NULL_RTX, VOIDmode, EXPAND_INITIALIZER);
2556
2557 /* Validate that the lane index is a constant. */
2558 if (!CONST_INT_P (lane_idx))
2559 {
2560 error_at (EXPR_LOCATION (exp),
2561 "argument %d must be a constant immediate", 4);
2562 return const0_rtx;
2563 }
2564
2565 /* Validate that the index is within the expected range. */
2566 int nunits = GET_MODE_NUNITS (quadmode).to_constant ();
2567 aarch64_simd_lane_bounds (lane_idx, 0, nunits / 2, exp);
2568
2569 /* Generate the correct register and mode. */
2570 int lane = INTVAL (lane_idx);
2571
2572 if (lane < nunits / 4)
2573 op2 = simplify_gen_subreg (d->mode, op2, quadmode,
2574 subreg_lowpart_offset (d->mode, quadmode));
2575 else
2576 {
2577 /* Select the upper 64 bits, either a V2SF or V4HF, this however
2578 is quite messy, as the operation required even though simple
2579 doesn't have a simple RTL pattern, and seems it's quite hard to
2580 define using a single RTL pattern. The target generic version
2581 gen_highpart_mode generates code that isn't optimal. */
2582 rtx temp1 = gen_reg_rtx (d->mode);
2583 rtx temp2 = gen_reg_rtx (DImode);
2584 temp1 = simplify_gen_subreg (d->mode, op2, quadmode,
2585 subreg_lowpart_offset (d->mode, quadmode));
2586 temp1 = simplify_gen_subreg (V2DImode, temp1, d->mode, 0);
2587 if (BYTES_BIG_ENDIAN)
2588 emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const0_rtx));
2589 else
2590 emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const1_rtx));
2591 op2 = simplify_gen_subreg (d->mode, temp2, GET_MODE (temp2), 0);
2592
2593 /* And recalculate the index. */
2594 lane -= nunits / 4;
2595 }
2596
2597 /* Keep to GCC-vector-extension lane indices in the RTL, only nunits / 4
2598 (max nunits in range check) are valid. Which means only 0-1, so we
2599 only need to know the order in a V2mode. */
2600 lane_idx = aarch64_endian_lane_rtx (V2DImode, lane);
2601
2602 if (!target
2603 || !REG_P (target)
2604 || GET_MODE (target) != d->mode)
2605 target = gen_reg_rtx (d->mode);
2606
2607 rtx pat = NULL_RTX;
2608
2609 if (d->lane)
2610 pat = GEN_FCN (d->icode) (target, op0, op1, op2, lane_idx);
2611 else
2612 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
2613
2614 if (!pat)
2615 return NULL_RTX;
2616
2617 emit_insn (pat);
2618 return target;
2619 }
2620
2621 /* Function to expand an expression EXP which calls one of the Transactional
2622 Memory Extension (TME) builtins FCODE with the result going to TARGET. */
2623 static rtx
2624 aarch64_expand_builtin_tme (int fcode, tree exp, rtx target)
2625 {
2626 switch (fcode)
2627 {
2628 case AARCH64_TME_BUILTIN_TSTART:
2629 target = gen_reg_rtx (DImode);
2630 emit_insn (GEN_FCN (CODE_FOR_tstart) (target));
2631 break;
2632
2633 case AARCH64_TME_BUILTIN_TTEST:
2634 target = gen_reg_rtx (DImode);
2635 emit_insn (GEN_FCN (CODE_FOR_ttest) (target));
2636 break;
2637
2638 case AARCH64_TME_BUILTIN_TCOMMIT:
2639 emit_insn (GEN_FCN (CODE_FOR_tcommit) ());
2640 break;
2641
2642 case AARCH64_TME_BUILTIN_TCANCEL:
2643 {
2644 tree arg0 = CALL_EXPR_ARG (exp, 0);
2645 rtx op0 = expand_normal (arg0);
2646 if (CONST_INT_P (op0) && UINTVAL (op0) <= 65536)
2647 emit_insn (GEN_FCN (CODE_FOR_tcancel) (op0));
2648 else
2649 {
2650 error_at (EXPR_LOCATION (exp),
2651 "argument must be a 16-bit constant immediate");
2652 return const0_rtx;
2653 }
2654 }
2655 break;
2656
2657 default :
2658 gcc_unreachable ();
2659 }
2660 return target;
2661 }
2662
2663 /* Function to expand an expression EXP which calls one of the Load/Store
2664 64 Byte extension (LS64) builtins FCODE with the result going to TARGET. */
2665 static rtx
2666 aarch64_expand_builtin_ls64 (int fcode, tree exp, rtx target)
2667 {
2668 expand_operand ops[3];
2669
2670 switch (fcode)
2671 {
2672 case AARCH64_LS64_BUILTIN_LD64B:
2673 {
2674 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
2675 create_output_operand (&ops[0], target, V8DImode);
2676 create_input_operand (&ops[1], op0, DImode);
2677 expand_insn (CODE_FOR_ld64b, 2, ops);
2678 return ops[0].value;
2679 }
2680 case AARCH64_LS64_BUILTIN_ST64B:
2681 {
2682 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
2683 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
2684 create_input_operand (&ops[0], op0, DImode);
2685 create_input_operand (&ops[1], op1, V8DImode);
2686 expand_insn (CODE_FOR_st64b, 2, ops);
2687 return const0_rtx;
2688 }
2689 case AARCH64_LS64_BUILTIN_ST64BV:
2690 {
2691 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
2692 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
2693 create_output_operand (&ops[0], target, DImode);
2694 create_input_operand (&ops[1], op0, DImode);
2695 create_input_operand (&ops[2], op1, V8DImode);
2696 expand_insn (CODE_FOR_st64bv, 3, ops);
2697 return ops[0].value;
2698 }
2699 case AARCH64_LS64_BUILTIN_ST64BV0:
2700 {
2701 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
2702 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
2703 create_output_operand (&ops[0], target, DImode);
2704 create_input_operand (&ops[1], op0, DImode);
2705 create_input_operand (&ops[2], op1, V8DImode);
2706 expand_insn (CODE_FOR_st64bv0, 3, ops);
2707 return ops[0].value;
2708 }
2709 }
2710
2711 gcc_unreachable ();
2712 }
2713
2714 /* Expand a random number builtin EXP with code FCODE, putting the result
2715 int TARGET. If IGNORE is true the return value is ignored. */
2716
2717 rtx
2718 aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
2719 {
2720 rtx pat;
2721 enum insn_code icode;
2722 if (fcode == AARCH64_BUILTIN_RNG_RNDR)
2723 icode = CODE_FOR_aarch64_rndr;
2724 else if (fcode == AARCH64_BUILTIN_RNG_RNDRRS)
2725 icode = CODE_FOR_aarch64_rndrrs;
2726 else
2727 gcc_unreachable ();
2728
2729 rtx rand = gen_reg_rtx (DImode);
2730 pat = GEN_FCN (icode) (rand);
2731 if (!pat)
2732 return NULL_RTX;
2733
2734 tree arg0 = CALL_EXPR_ARG (exp, 0);
2735 rtx res_addr = expand_normal (arg0);
2736 res_addr = convert_memory_address (Pmode, res_addr);
2737 rtx res_mem = gen_rtx_MEM (DImode, res_addr);
2738 emit_insn (pat);
2739 emit_move_insn (res_mem, rand);
2740 /* If the status result is unused don't generate the CSET code. */
2741 if (ignore)
2742 return target;
2743
2744 rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM);
2745 rtx cmp_rtx = gen_rtx_fmt_ee (EQ, SImode, cc_reg, const0_rtx);
2746 emit_insn (gen_aarch64_cstoresi (target, cmp_rtx, cc_reg));
2747 return target;
2748 }
2749
2750 /* Expand the read/write system register builtin EXPs. */
2751 rtx
2752 aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode)
2753 {
2754 tree arg0, arg1;
2755 rtx const_str, input_val, subreg;
2756 enum machine_mode mode;
2757 enum insn_code icode;
2758 class expand_operand ops[2];
2759
2760 arg0 = CALL_EXPR_ARG (exp, 0);
2761
2762 bool write_op = (fcode == AARCH64_WSR
2763 || fcode == AARCH64_WSRP
2764 || fcode == AARCH64_WSR64
2765 || fcode == AARCH64_WSRF
2766 || fcode == AARCH64_WSRF64
2767 || fcode == AARCH64_WSR128);
2768
2769 bool op128 = (fcode == AARCH64_RSR128 || fcode == AARCH64_WSR128);
2770 enum machine_mode sysreg_mode = op128 ? TImode : DImode;
2771
2772 if (op128 && !TARGET_D128)
2773 {
2774 error_at (EXPR_LOCATION (exp), "128-bit system register support requires"
2775 " the %<d128%> extension");
2776 return const0_rtx;
2777 }
2778
2779 /* Argument 0 (system register name) must be a string literal. */
2780 gcc_assert (TREE_CODE (arg0) == ADDR_EXPR
2781 && TREE_CODE (TREE_TYPE (arg0)) == POINTER_TYPE
2782 && TREE_CODE (TREE_OPERAND (arg0, 0)) == STRING_CST);
2783
2784 const char *name_input = TREE_STRING_POINTER (TREE_OPERAND (arg0, 0));
2785
2786 tree len_tree = c_strlen (arg0, 1);
2787 if (len_tree == NULL_TREE)
2788 {
2789 error_at (EXPR_LOCATION (exp), "invalid system register name provided");
2790 return const0_rtx;
2791 }
2792
2793 size_t len = TREE_INT_CST_LOW (len_tree);
2794 char *sysreg_name = xstrdup (name_input);
2795
2796 for (unsigned pos = 0; pos <= len; pos++)
2797 sysreg_name[pos] = TOLOWER (sysreg_name[pos]);
2798
2799 const char* name_output = aarch64_retrieve_sysreg ((const char *) sysreg_name,
2800 write_op, op128);
2801 if (name_output == NULL)
2802 {
2803 error_at (EXPR_LOCATION (exp), "invalid system register name %qs",
2804 sysreg_name);
2805 return const0_rtx;
2806 }
2807
2808 /* Assign the string corresponding to the system register name to an RTX. */
2809 const_str = rtx_alloc (CONST_STRING);
2810 PUT_CODE (const_str, CONST_STRING);
2811 XSTR (const_str, 0) = ggc_strdup (name_output);
2812
2813 /* Set up expander operands and call instruction expansion. */
2814 if (write_op)
2815 {
2816 arg1 = CALL_EXPR_ARG (exp, 1);
2817 mode = TYPE_MODE (TREE_TYPE (arg1));
2818 input_val = copy_to_mode_reg (mode, expand_normal (arg1));
2819
2820 icode = (op128 ? CODE_FOR_aarch64_write_sysregti
2821 : CODE_FOR_aarch64_write_sysregdi);
2822
2823 switch (fcode)
2824 {
2825 case AARCH64_WSR:
2826 case AARCH64_WSRP:
2827 case AARCH64_WSR64:
2828 case AARCH64_WSRF64:
2829 case AARCH64_WSR128:
2830 subreg = lowpart_subreg (sysreg_mode, input_val, mode);
2831 break;
2832 case AARCH64_WSRF:
2833 subreg = gen_lowpart_SUBREG (SImode, input_val);
2834 subreg = gen_lowpart_SUBREG (DImode, subreg);
2835 break;
2836 }
2837
2838 create_fixed_operand (&ops[0], const_str);
2839 create_input_operand (&ops[1], subreg, sysreg_mode);
2840 expand_insn (icode, 2, ops);
2841
2842 return target;
2843 }
2844
2845 /* Read operations are implied by !write_op. */
2846 gcc_assert (call_expr_nargs (exp) == 1);
2847
2848 icode = (op128 ? CODE_FOR_aarch64_read_sysregti
2849 : CODE_FOR_aarch64_read_sysregdi);
2850
2851 /* Emit the initial read_sysregdi rtx. */
2852 create_output_operand (&ops[0], target, sysreg_mode);
2853 create_fixed_operand (&ops[1], const_str);
2854 expand_insn (icode, 2, ops);
2855 target = ops[0].value;
2856
2857 /* Do any necessary post-processing on the result. */
2858 switch (fcode)
2859 {
2860 case AARCH64_RSR:
2861 case AARCH64_RSRP:
2862 case AARCH64_RSR64:
2863 case AARCH64_RSRF64:
2864 case AARCH64_RSR128:
2865 return lowpart_subreg (TYPE_MODE (TREE_TYPE (exp)), target, sysreg_mode);
2866 case AARCH64_RSRF:
2867 subreg = gen_lowpart_SUBREG (SImode, target);
2868 return gen_lowpart_SUBREG (SFmode, subreg);
2869 default:
2870 gcc_unreachable ();
2871 }
2872 }
2873
2874 /* Ensure argument ARGNO in EXP represents a const-type argument in the range
2875 [MINVAL, MAXVAL). */
2876 static HOST_WIDE_INT
2877 require_const_argument (tree exp, unsigned int argno, HOST_WIDE_INT minval,
2878 HOST_WIDE_INT maxval)
2879 {
2880 maxval--;
2881 tree arg = CALL_EXPR_ARG (exp, argno);
2882 if (TREE_CODE (arg) != INTEGER_CST)
2883 error_at (EXPR_LOCATION (exp), "Constant-type argument expected");
2884
2885 auto argval = wi::to_widest (arg);
2886
2887 if (argval < minval || argval > maxval)
2888 error_at (EXPR_LOCATION (exp),
2889 "argument %d must be a constant immediate "
2890 "in range [%wd,%wd]", argno + 1, minval, maxval);
2891
2892 HOST_WIDE_INT retval = argval.to_shwi ();
2893 return retval;
2894 }
2895
2896
2897 /* Expand a prefetch builtin EXP. */
2898 void
2899 aarch64_expand_prefetch_builtin (tree exp, int fcode)
2900 {
2901 int kind_id = -1;
2902 int level_id = -1;
2903 int rettn_id = -1;
2904 char prfop[11];
2905 class expand_operand ops[2];
2906
2907 static const char *kind_s[] = {"PLD", "PST", "PLI"};
2908 static const char *level_s[] = {"L1", "L2", "L3", "SLC"};
2909 static const char *rettn_s[] = {"KEEP", "STRM"};
2910
2911 /* Each of the four prefetch builtins takes a different number of arguments,
2912 but proceeds to call the PRFM insn which requires 4 pieces of information
2913 to be fully defined. Where one of these takes less than 4 arguments, set
2914 sensible defaults. */
2915 switch (fcode)
2916 {
2917 case AARCH64_PLDX:
2918 break;
2919 case AARCH64_PLIX:
2920 kind_id = 2;
2921 break;
2922 case AARCH64_PLI:
2923 case AARCH64_PLD:
2924 kind_id = (fcode == AARCH64_PLD) ? 0 : 2;
2925 level_id = 0;
2926 rettn_id = 0;
2927 break;
2928 default:
2929 gcc_unreachable ();
2930 }
2931
2932 /* Any -1 id variable is to be user-supplied. Here we fill these in and run
2933 bounds checks on them. "PLI" is used only implicitly by AARCH64_PLI &
2934 AARCH64_PLIX, never explicitly. */
2935 int argno = 0;
2936 if (kind_id < 0)
2937 kind_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE (kind_s) - 1);
2938 if (level_id < 0)
2939 level_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE (level_s));
2940 if (rettn_id < 0)
2941 rettn_id = require_const_argument (exp, argno++, 0, ARRAY_SIZE (rettn_s));
2942 rtx address = expand_expr (CALL_EXPR_ARG (exp, argno), NULL_RTX, Pmode,
2943 EXPAND_NORMAL);
2944
2945 if (seen_error ())
2946 return;
2947
2948 sprintf (prfop, "%s%s%s", kind_s[kind_id],
2949 level_s[level_id],
2950 rettn_s[rettn_id]);
2951
2952 rtx const_str = rtx_alloc (CONST_STRING);
2953 PUT_CODE (const_str, CONST_STRING);
2954 XSTR (const_str, 0) = ggc_strdup (prfop);
2955
2956 create_fixed_operand (&ops[0], const_str);
2957 create_address_operand (&ops[1], address);
2958 maybe_expand_insn (CODE_FOR_aarch64_pldx, 2, ops);
2959 }
2960
2961 /* Expand an expression EXP that calls a MEMTAG built-in FCODE
2962 with result going to TARGET. */
2963 static rtx
2964 aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target)
2965 {
2966 if (TARGET_ILP32)
2967 {
2968 error ("Memory Tagging Extension does not support %<-mabi=ilp32%>");
2969 return const0_rtx;
2970 }
2971
2972 rtx pat = NULL;
2973 enum insn_code icode = aarch64_memtag_builtin_data[fcode -
2974 AARCH64_MEMTAG_BUILTIN_START - 1].icode;
2975
2976 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
2977 machine_mode mode0 = GET_MODE (op0);
2978 op0 = force_reg (mode0 == VOIDmode ? DImode : mode0, op0);
2979 op0 = convert_to_mode (DImode, op0, true);
2980
2981 switch (fcode)
2982 {
2983 case AARCH64_MEMTAG_BUILTIN_IRG:
2984 case AARCH64_MEMTAG_BUILTIN_GMI:
2985 case AARCH64_MEMTAG_BUILTIN_SUBP:
2986 case AARCH64_MEMTAG_BUILTIN_INC_TAG:
2987 {
2988 if (! target
2989 || GET_MODE (target) != DImode
2990 || ! (*insn_data[icode].operand[0].predicate) (target, DImode))
2991 target = gen_reg_rtx (DImode);
2992
2993 if (fcode == AARCH64_MEMTAG_BUILTIN_INC_TAG)
2994 {
2995 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
2996
2997 if ((*insn_data[icode].operand[3].predicate) (op1, QImode))
2998 {
2999 pat = GEN_FCN (icode) (target, op0, const0_rtx, op1);
3000 break;
3001 }
3002 error_at (EXPR_LOCATION (exp),
3003 "argument %d must be a constant immediate "
3004 "in range [0,15]", 2);
3005 return const0_rtx;
3006 }
3007 else
3008 {
3009 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
3010 machine_mode mode1 = GET_MODE (op1);
3011 op1 = force_reg (mode1 == VOIDmode ? DImode : mode1, op1);
3012 op1 = convert_to_mode (DImode, op1, true);
3013 pat = GEN_FCN (icode) (target, op0, op1);
3014 }
3015 break;
3016 }
3017 case AARCH64_MEMTAG_BUILTIN_GET_TAG:
3018 target = op0;
3019 pat = GEN_FCN (icode) (target, op0, const0_rtx);
3020 break;
3021 case AARCH64_MEMTAG_BUILTIN_SET_TAG:
3022 pat = GEN_FCN (icode) (op0, op0, const0_rtx);
3023 break;
3024 default:
3025 gcc_unreachable();
3026 }
3027
3028 if (!pat)
3029 return NULL_RTX;
3030
3031 emit_insn (pat);
3032 return target;
3033 }
3034
3035 /* Function to expand an expression EXP which calls one of the ACLE Data
3036 Intrinsic builtins FCODE with the result going to TARGET. */
3037 static rtx
3038 aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx target)
3039 {
3040 expand_operand ops[2];
3041 machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
3042 create_output_operand (&ops[0], target, mode);
3043 create_input_operand (&ops[1], expand_normal (CALL_EXPR_ARG (exp, 0)), mode);
3044 enum insn_code icode;
3045
3046 switch (fcode)
3047 {
3048 case AARCH64_REV16:
3049 case AARCH64_REV16L:
3050 case AARCH64_REV16LL:
3051 icode = code_for_aarch64_rev16 (mode);
3052 break;
3053 case AARCH64_RBIT:
3054 case AARCH64_RBITL:
3055 case AARCH64_RBITLL:
3056 icode = code_for_aarch64_rbit (mode);
3057 break;
3058 default:
3059 gcc_unreachable ();
3060 }
3061
3062 expand_insn (icode, 2, ops);
3063 return ops[0].value;
3064 }
3065
3066 /* Expand an expression EXP as fpsr or fpcr setter (depending on
3067 UNSPEC) using MODE. */
3068 static void
3069 aarch64_expand_fpsr_fpcr_setter (int unspec, machine_mode mode, tree exp)
3070 {
3071 tree arg = CALL_EXPR_ARG (exp, 0);
3072 rtx op = force_reg (mode, expand_normal (arg));
3073 emit_insn (gen_aarch64_set (unspec, mode, op));
3074 }
3075
3076 /* Expand a fpsr or fpcr getter (depending on UNSPEC) using MODE.
3077 Return the target. */
3078 static rtx
3079 aarch64_expand_fpsr_fpcr_getter (enum insn_code icode, machine_mode mode,
3080 rtx target)
3081 {
3082 expand_operand op;
3083 create_output_operand (&op, target, mode);
3084 expand_insn (icode, 1, &op);
3085 return op.value;
3086 }
3087
3088 /* Expand an expression EXP that calls built-in function FCODE,
3089 with result going to TARGET if that's convenient. IGNORE is true
3090 if the result of the builtin is ignored. */
3091 rtx
3092 aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
3093 int ignore)
3094 {
3095 int icode;
3096 rtx op0;
3097 tree arg0;
3098
3099 switch (fcode)
3100 {
3101 case AARCH64_BUILTIN_GET_FPCR:
3102 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrsi,
3103 SImode, target);
3104 case AARCH64_BUILTIN_SET_FPCR:
3105 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, SImode, exp);
3106 return target;
3107 case AARCH64_BUILTIN_GET_FPSR:
3108 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrsi,
3109 SImode, target);
3110 case AARCH64_BUILTIN_SET_FPSR:
3111 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, SImode, exp);
3112 return target;
3113 case AARCH64_BUILTIN_GET_FPCR64:
3114 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpcrdi,
3115 DImode, target);
3116 case AARCH64_BUILTIN_SET_FPCR64:
3117 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPCR, DImode, exp);
3118 return target;
3119 case AARCH64_BUILTIN_GET_FPSR64:
3120 return aarch64_expand_fpsr_fpcr_getter (CODE_FOR_aarch64_get_fpsrdi,
3121 DImode, target);
3122 case AARCH64_BUILTIN_SET_FPSR64:
3123 aarch64_expand_fpsr_fpcr_setter (UNSPECV_SET_FPSR, DImode, exp);
3124 return target;
3125 case AARCH64_PAUTH_BUILTIN_AUTIA1716:
3126 case AARCH64_PAUTH_BUILTIN_PACIA1716:
3127 case AARCH64_PAUTH_BUILTIN_AUTIB1716:
3128 case AARCH64_PAUTH_BUILTIN_PACIB1716:
3129 case AARCH64_PAUTH_BUILTIN_XPACLRI:
3130 arg0 = CALL_EXPR_ARG (exp, 0);
3131 op0 = force_reg (Pmode, expand_normal (arg0));
3132
3133 if (fcode == AARCH64_PAUTH_BUILTIN_XPACLRI)
3134 {
3135 rtx lr = gen_rtx_REG (Pmode, R30_REGNUM);
3136 icode = CODE_FOR_xpaclri;
3137 emit_move_insn (lr, op0);
3138 emit_insn (GEN_FCN (icode) ());
3139 return lr;
3140 }
3141 else
3142 {
3143 tree arg1 = CALL_EXPR_ARG (exp, 1);
3144 rtx op1 = force_reg (Pmode, expand_normal (arg1));
3145 switch (fcode)
3146 {
3147 case AARCH64_PAUTH_BUILTIN_AUTIA1716:
3148 icode = CODE_FOR_autia1716;
3149 break;
3150 case AARCH64_PAUTH_BUILTIN_AUTIB1716:
3151 icode = CODE_FOR_autib1716;
3152 break;
3153 case AARCH64_PAUTH_BUILTIN_PACIA1716:
3154 icode = CODE_FOR_pacia1716;
3155 break;
3156 case AARCH64_PAUTH_BUILTIN_PACIB1716:
3157 icode = CODE_FOR_pacib1716;
3158 break;
3159 default:
3160 icode = 0;
3161 gcc_unreachable ();
3162 }
3163
3164 rtx x16_reg = gen_rtx_REG (Pmode, R16_REGNUM);
3165 rtx x17_reg = gen_rtx_REG (Pmode, R17_REGNUM);
3166 emit_move_insn (x17_reg, op0);
3167 emit_move_insn (x16_reg, op1);
3168 emit_insn (GEN_FCN (icode) ());
3169 return x17_reg;
3170 }
3171
3172 case AARCH64_JSCVT:
3173 {
3174 expand_operand ops[2];
3175 create_output_operand (&ops[0], target, SImode);
3176 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
3177 create_input_operand (&ops[1], op0, DFmode);
3178 expand_insn (CODE_FOR_aarch64_fjcvtzs, 2, ops);
3179 return ops[0].value;
3180 }
3181
3182 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF:
3183 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF:
3184 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF:
3185 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V2SF:
3186 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF:
3187 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF:
3188 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF:
3189 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF:
3190 return aarch64_expand_fcmla_builtin (exp, target, fcode);
3191 case AARCH64_BUILTIN_RNG_RNDR:
3192 case AARCH64_BUILTIN_RNG_RNDRRS:
3193 return aarch64_expand_rng_builtin (exp, target, fcode, ignore);
3194 case AARCH64_RSR:
3195 case AARCH64_RSRP:
3196 case AARCH64_RSR64:
3197 case AARCH64_RSRF:
3198 case AARCH64_RSRF64:
3199 case AARCH64_RSR128:
3200 case AARCH64_WSR:
3201 case AARCH64_WSRP:
3202 case AARCH64_WSR64:
3203 case AARCH64_WSRF:
3204 case AARCH64_WSRF64:
3205 case AARCH64_WSR128:
3206 return aarch64_expand_rwsr_builtin (exp, target, fcode);
3207 case AARCH64_PLD:
3208 case AARCH64_PLDX:
3209 case AARCH64_PLI:
3210 case AARCH64_PLIX:
3211 aarch64_expand_prefetch_builtin (exp, fcode);
3212 return target;
3213 }
3214
3215 if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
3216 return aarch64_simd_expand_builtin (fcode, exp, target);
3217 else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX)
3218 return aarch64_crc32_expand_builtin (fcode, exp, target);
3219
3220 if (fcode == AARCH64_BUILTIN_RSQRT_DF
3221 || fcode == AARCH64_BUILTIN_RSQRT_SF
3222 || fcode == AARCH64_BUILTIN_RSQRT_V2DF
3223 || fcode == AARCH64_BUILTIN_RSQRT_V2SF
3224 || fcode == AARCH64_BUILTIN_RSQRT_V4SF)
3225 return aarch64_expand_builtin_rsqrt (fcode, exp, target);
3226
3227 if (fcode == AARCH64_TME_BUILTIN_TSTART
3228 || fcode == AARCH64_TME_BUILTIN_TCOMMIT
3229 || fcode == AARCH64_TME_BUILTIN_TTEST
3230 || fcode == AARCH64_TME_BUILTIN_TCANCEL)
3231 return aarch64_expand_builtin_tme (fcode, exp, target);
3232
3233 if (fcode == AARCH64_LS64_BUILTIN_LD64B
3234 || fcode == AARCH64_LS64_BUILTIN_ST64B
3235 || fcode == AARCH64_LS64_BUILTIN_ST64BV
3236 || fcode == AARCH64_LS64_BUILTIN_ST64BV0)
3237 return aarch64_expand_builtin_ls64 (fcode, exp, target);
3238
3239 if (fcode >= AARCH64_MEMTAG_BUILTIN_START
3240 && fcode <= AARCH64_MEMTAG_BUILTIN_END)
3241 return aarch64_expand_builtin_memtag (fcode, exp, target);
3242 if (fcode >= AARCH64_REV16
3243 && fcode <= AARCH64_RBITLL)
3244 return aarch64_expand_builtin_data_intrinsic (fcode, exp, target);
3245
3246 gcc_unreachable ();
3247 }
3248
3249 /* Return builtin for reciprocal square root. */
3250
3251 tree
3252 aarch64_general_builtin_rsqrt (unsigned int fn)
3253 {
3254 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df)
3255 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF];
3256 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2sf)
3257 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2SF];
3258 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv4sf)
3259 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V4SF];
3260 return NULL_TREE;
3261 }
3262
3263 /* Return true if the lane check can be removed as there is no
3264 error going to be emitted. */
3265 static bool
3266 aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2)
3267 {
3268 if (TREE_CODE (arg0) != INTEGER_CST)
3269 return false;
3270 if (TREE_CODE (arg1) != INTEGER_CST)
3271 return false;
3272 if (TREE_CODE (arg2) != INTEGER_CST)
3273 return false;
3274
3275 auto totalsize = wi::to_widest (arg0);
3276 auto elementsize = wi::to_widest (arg1);
3277 if (totalsize == 0 || elementsize == 0)
3278 return false;
3279 auto lane = wi::to_widest (arg2);
3280 auto high = wi::udiv_trunc (totalsize, elementsize);
3281 return wi::ltu_p (lane, high);
3282 }
3283
3284 #undef VAR1
3285 #define VAR1(T, N, MAP, FLAG, A) \
3286 case AARCH64_SIMD_BUILTIN_##T##_##N##A:
3287
3288 #undef VREINTERPRET_BUILTIN
3289 #define VREINTERPRET_BUILTIN(A, B, L) \
3290 case AARCH64_SIMD_BUILTIN_VREINTERPRET##L##_##A##_##B:
3291
3292
3293 /* Try to fold a call to the built-in function with subcode FCODE. The
3294 function is passed the N_ARGS arguments in ARGS and it returns a value
3295 of type TYPE. Return the new expression on success and NULL_TREE on
3296 failure. */
3297 tree
3298 aarch64_general_fold_builtin (unsigned int fcode, tree type,
3299 unsigned int n_args ATTRIBUTE_UNUSED, tree *args)
3300 {
3301 switch (fcode)
3302 {
3303 BUILTIN_VDQF (UNOP, abs, 2, ALL)
3304 return fold_build1 (ABS_EXPR, type, args[0]);
3305 VAR1 (UNOP, floatv2si, 2, ALL, v2sf)
3306 VAR1 (UNOP, floatv4si, 2, ALL, v4sf)
3307 VAR1 (UNOP, floatv2di, 2, ALL, v2df)
3308 return fold_build1 (FLOAT_EXPR, type, args[0]);
3309 AARCH64_SIMD_VREINTERPRET_BUILTINS
3310 return fold_build1 (VIEW_CONVERT_EXPR, type, args[0]);
3311 case AARCH64_SIMD_BUILTIN_LANE_CHECK:
3312 gcc_assert (n_args == 3);
3313 if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
3314 return void_node;
3315 break;
3316 default:
3317 break;
3318 }
3319
3320 return NULL_TREE;
3321 }
3322
3323 enum aarch64_simd_type
3324 get_mem_type_for_load_store (unsigned int fcode)
3325 {
3326 switch (fcode)
3327 {
3328 VAR1 (LOAD1, ld1, 0, LOAD, v8qi)
3329 VAR1 (STORE1, st1, 0, STORE, v8qi)
3330 return Int8x8_t;
3331 VAR1 (LOAD1, ld1, 0, LOAD, v16qi)
3332 VAR1 (STORE1, st1, 0, STORE, v16qi)
3333 return Int8x16_t;
3334 VAR1 (LOAD1, ld1, 0, LOAD, v4hi)
3335 VAR1 (STORE1, st1, 0, STORE, v4hi)
3336 return Int16x4_t;
3337 VAR1 (LOAD1, ld1, 0, LOAD, v8hi)
3338 VAR1 (STORE1, st1, 0, STORE, v8hi)
3339 return Int16x8_t;
3340 VAR1 (LOAD1, ld1, 0, LOAD, v2si)
3341 VAR1 (STORE1, st1, 0, STORE, v2si)
3342 return Int32x2_t;
3343 VAR1 (LOAD1, ld1, 0, LOAD, v4si)
3344 VAR1 (STORE1, st1, 0, STORE, v4si)
3345 return Int32x4_t;
3346 VAR1 (LOAD1, ld1, 0, LOAD, v2di)
3347 VAR1 (STORE1, st1, 0, STORE, v2di)
3348 return Int64x2_t;
3349 VAR1 (LOAD1_U, ld1, 0, LOAD, v8qi)
3350 VAR1 (STORE1_U, st1, 0, STORE, v8qi)
3351 return Uint8x8_t;
3352 VAR1 (LOAD1_U, ld1, 0, LOAD, v16qi)
3353 VAR1 (STORE1_U, st1, 0, STORE, v16qi)
3354 return Uint8x16_t;
3355 VAR1 (LOAD1_U, ld1, 0, LOAD, v4hi)
3356 VAR1 (STORE1_U, st1, 0, STORE, v4hi)
3357 return Uint16x4_t;
3358 VAR1 (LOAD1_U, ld1, 0, LOAD, v8hi)
3359 VAR1 (STORE1_U, st1, 0, STORE, v8hi)
3360 return Uint16x8_t;
3361 VAR1 (LOAD1_U, ld1, 0, LOAD, v2si)
3362 VAR1 (STORE1_U, st1, 0, STORE, v2si)
3363 return Uint32x2_t;
3364 VAR1 (LOAD1_U, ld1, 0, LOAD, v4si)
3365 VAR1 (STORE1_U, st1, 0, STORE, v4si)
3366 return Uint32x4_t;
3367 VAR1 (LOAD1_U, ld1, 0, LOAD, v2di)
3368 VAR1 (STORE1_U, st1, 0, STORE, v2di)
3369 return Uint64x2_t;
3370 VAR1 (LOAD1_P, ld1, 0, LOAD, v8qi)
3371 VAR1 (STORE1_P, st1, 0, STORE, v8qi)
3372 return Poly8x8_t;
3373 VAR1 (LOAD1_P, ld1, 0, LOAD, v16qi)
3374 VAR1 (STORE1_P, st1, 0, STORE, v16qi)
3375 return Poly8x16_t;
3376 VAR1 (LOAD1_P, ld1, 0, LOAD, v4hi)
3377 VAR1 (STORE1_P, st1, 0, STORE, v4hi)
3378 return Poly16x4_t;
3379 VAR1 (LOAD1_P, ld1, 0, LOAD, v8hi)
3380 VAR1 (STORE1_P, st1, 0, STORE, v8hi)
3381 return Poly16x8_t;
3382 VAR1 (LOAD1_P, ld1, 0, LOAD, v2di)
3383 VAR1 (STORE1_P, st1, 0, STORE, v2di)
3384 return Poly64x2_t;
3385 VAR1 (LOAD1, ld1, 0, LOAD, v4hf)
3386 VAR1 (STORE1, st1, 0, STORE, v4hf)
3387 return Float16x4_t;
3388 VAR1 (LOAD1, ld1, 0, LOAD, v8hf)
3389 VAR1 (STORE1, st1, 0, STORE, v8hf)
3390 return Float16x8_t;
3391 VAR1 (LOAD1, ld1, 0, LOAD, v4bf)
3392 VAR1 (STORE1, st1, 0, STORE, v4bf)
3393 return Bfloat16x4_t;
3394 VAR1 (LOAD1, ld1, 0, LOAD, v8bf)
3395 VAR1 (STORE1, st1, 0, STORE, v8bf)
3396 return Bfloat16x8_t;
3397 VAR1 (LOAD1, ld1, 0, LOAD, v2sf)
3398 VAR1 (STORE1, st1, 0, STORE, v2sf)
3399 return Float32x2_t;
3400 VAR1 (LOAD1, ld1, 0, LOAD, v4sf)
3401 VAR1 (STORE1, st1, 0, STORE, v4sf)
3402 return Float32x4_t;
3403 VAR1 (LOAD1, ld1, 0, LOAD, v2df)
3404 VAR1 (STORE1, st1, 0, STORE, v2df)
3405 return Float64x2_t;
3406 default:
3407 gcc_unreachable ();
3408 break;
3409 }
3410 }
3411
3412 /* We've seen a vector load from address ADDR. Record it in
3413 vector_load_decls, if appropriate. */
3414 static void
3415 aarch64_record_vector_load_arg (tree addr)
3416 {
3417 tree decl = aarch64_vector_load_decl (addr);
3418 if (!decl)
3419 return;
3420 if (!cfun->machine->vector_load_decls)
3421 cfun->machine->vector_load_decls = hash_set<tree>::create_ggc (31);
3422 cfun->machine->vector_load_decls->add (decl);
3423 }
3424
3425 /* Try to fold STMT, given that it's a call to the built-in function with
3426 subcode FCODE. Return the new statement on success and null on
3427 failure. */
3428 gimple *
3429 aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
3430 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED)
3431 {
3432 gimple *new_stmt = NULL;
3433 unsigned nargs = gimple_call_num_args (stmt);
3434 tree *args = (nargs > 0
3435 ? gimple_call_arg_ptr (stmt, 0)
3436 : &error_mark_node);
3437
3438 /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int
3439 and unsigned int; it will distinguish according to the types of
3440 the arguments to the __builtin. */
3441 switch (fcode)
3442 {
3443 BUILTIN_VALL (UNOP, reduc_plus_scal_, 10, ALL)
3444 BUILTIN_VDQ_I (UNOPU, reduc_plus_scal_, 10, NONE)
3445 new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS,
3446 1, args[0]);
3447 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
3448 break;
3449
3450 /* Lower sqrt builtins to gimple/internal function sqrt. */
3451 BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP)
3452 new_stmt = gimple_build_call_internal (IFN_SQRT,
3453 1, args[0]);
3454 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
3455 break;
3456
3457 BUILTIN_VDC (BINOP, combine, 0, AUTO_FP)
3458 BUILTIN_VD_I (BINOPU, combine, 0, NONE)
3459 BUILTIN_VDC_P (BINOPP, combine, 0, NONE)
3460 {
3461 tree first_part, second_part;
3462 if (BYTES_BIG_ENDIAN)
3463 {
3464 second_part = args[0];
3465 first_part = args[1];
3466 }
3467 else
3468 {
3469 first_part = args[0];
3470 second_part = args[1];
3471 }
3472 tree ret_type = gimple_call_return_type (stmt);
3473 tree ctor = build_constructor_va (ret_type, 2, NULL_TREE, first_part,
3474 NULL_TREE, second_part);
3475 new_stmt = gimple_build_assign (gimple_call_lhs (stmt), ctor);
3476 }
3477 break;
3478
3479 /*lower store and load neon builtins to gimple. */
3480 BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD)
3481 BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD)
3482 BUILTIN_VALLP_NO_DI (LOAD1_P, ld1, 0, LOAD)
3483 /* Punt until after inlining, so that we stand more chance of
3484 recording something meaningful in vector_load_decls. */
3485 if (!cfun->after_inlining)
3486 break;
3487 aarch64_record_vector_load_arg (args[0]);
3488 if (!BYTES_BIG_ENDIAN)
3489 {
3490 enum aarch64_simd_type mem_type
3491 = get_mem_type_for_load_store(fcode);
3492 aarch64_simd_type_info simd_type
3493 = aarch64_simd_types[mem_type];
3494 tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
3495 VOIDmode, true);
3496 tree zero = build_zero_cst (elt_ptr_type);
3497 /* Use element type alignment. */
3498 tree access_type
3499 = build_aligned_type (simd_type.itype,
3500 TYPE_ALIGN (simd_type.eltype));
3501 new_stmt
3502 = gimple_build_assign (gimple_get_lhs (stmt),
3503 fold_build2 (MEM_REF,
3504 access_type,
3505 args[0], zero));
3506 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
3507 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3508 }
3509 break;
3510
3511 BUILTIN_VALL_F16 (STORE1, st1, 0, STORE)
3512 BUILTIN_VDQ_I (STORE1_U, st1, 0, STORE)
3513 BUILTIN_VALLP_NO_DI (STORE1_P, st1, 0, STORE)
3514 if (!BYTES_BIG_ENDIAN)
3515 {
3516 enum aarch64_simd_type mem_type
3517 = get_mem_type_for_load_store(fcode);
3518 aarch64_simd_type_info simd_type
3519 = aarch64_simd_types[mem_type];
3520 tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
3521 VOIDmode, true);
3522 tree zero = build_zero_cst (elt_ptr_type);
3523 /* Use element type alignment. */
3524 tree access_type
3525 = build_aligned_type (simd_type.itype,
3526 TYPE_ALIGN (simd_type.eltype));
3527 new_stmt
3528 = gimple_build_assign (fold_build2 (MEM_REF, access_type,
3529 args[0], zero),
3530 args[1]);
3531 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
3532 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3533 }
3534 break;
3535
3536 BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10, ALL)
3537 BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10, ALL)
3538 new_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
3539 1, args[0]);
3540 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
3541 break;
3542 BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10, ALL)
3543 BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10, ALL)
3544 new_stmt = gimple_build_call_internal (IFN_REDUC_MIN,
3545 1, args[0]);
3546 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
3547 break;
3548 BUILTIN_VSDQ_I_DI (BINOP, ashl, 3, NONE)
3549 if (TREE_CODE (args[1]) == INTEGER_CST
3550 && wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
3551 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3552 LSHIFT_EXPR, args[0], args[1]);
3553 break;
3554 BUILTIN_VSDQ_I_DI (BINOP, sshl, 0, NONE)
3555 BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0, NONE)
3556 {
3557 tree cst = args[1];
3558 tree ctype = TREE_TYPE (cst);
3559 /* Left shifts can be both scalar or vector, e.g. uint64x1_t is
3560 treated as a scalar type not a vector one. */
3561 if ((cst = uniform_integer_cst_p (cst)) != NULL_TREE)
3562 {
3563 wide_int wcst = wi::to_wide (cst);
3564 tree unit_ty = TREE_TYPE (cst);
3565
3566 wide_int abs_cst = wi::abs (wcst);
3567 if (wi::geu_p (abs_cst, element_precision (args[0])))
3568 break;
3569
3570 if (wi::neg_p (wcst, TYPE_SIGN (ctype)))
3571 {
3572 tree final_cst;
3573 final_cst = wide_int_to_tree (unit_ty, abs_cst);
3574 if (TREE_CODE (cst) != INTEGER_CST)
3575 final_cst = build_uniform_cst (ctype, final_cst);
3576
3577 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3578 RSHIFT_EXPR, args[0],
3579 final_cst);
3580 }
3581 else
3582 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3583 LSHIFT_EXPR, args[0], args[1]);
3584 }
3585 }
3586 break;
3587 BUILTIN_VDQ_I (SHIFTIMM, ashr, 3, NONE)
3588 VAR1 (SHIFTIMM, ashr_simd, 0, NONE, di)
3589 BUILTIN_VDQ_I (USHIFTIMM, lshr, 3, NONE)
3590 VAR1 (USHIFTIMM, lshr_simd, 0, NONE, di)
3591 if (TREE_CODE (args[1]) == INTEGER_CST
3592 && wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
3593 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3594 RSHIFT_EXPR, args[0], args[1]);
3595 break;
3596 BUILTIN_GPF (BINOP, fmulx, 0, ALL)
3597 {
3598 gcc_assert (nargs == 2);
3599 bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST;
3600 bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST;
3601 if (a0_cst_p || a1_cst_p)
3602 {
3603 if (a0_cst_p && a1_cst_p)
3604 {
3605 tree t0 = TREE_TYPE (args[0]);
3606 real_value a0 = (TREE_REAL_CST (args[0]));
3607 real_value a1 = (TREE_REAL_CST (args[1]));
3608 if (real_equal (&a1, &dconst0))
3609 std::swap (a0, a1);
3610 /* According to real_equal (), +0 equals -0. */
3611 if (real_equal (&a0, &dconst0) && real_isinf (&a1))
3612 {
3613 real_value res = dconst2;
3614 res.sign = a0.sign ^ a1.sign;
3615 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3616 REAL_CST,
3617 build_real (t0, res));
3618 }
3619 else
3620 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3621 MULT_EXPR,
3622 args[0], args[1]);
3623 }
3624 else /* a0_cst_p ^ a1_cst_p. */
3625 {
3626 real_value const_part = a0_cst_p
3627 ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]);
3628 if (!real_equal (&const_part, &dconst0)
3629 && !real_isinf (&const_part))
3630 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3631 MULT_EXPR, args[0],
3632 args[1]);
3633 }
3634 }
3635 if (new_stmt)
3636 {
3637 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
3638 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
3639 }
3640 break;
3641 }
3642 case AARCH64_SIMD_BUILTIN_LANE_CHECK:
3643 if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
3644 {
3645 unlink_stmt_vdef (stmt);
3646 release_defs (stmt);
3647 new_stmt = gimple_build_nop ();
3648 }
3649 break;
3650 default:
3651 break;
3652 }
3653
3654 /* GIMPLE assign statements (unlike calls) require a non-null lhs. If we
3655 created an assign statement with a null lhs, then fix this by assigning
3656 to a new (and subsequently unused) variable. */
3657 if (new_stmt && is_gimple_assign (new_stmt) && !gimple_assign_lhs (new_stmt))
3658 {
3659 tree new_lhs = make_ssa_name (gimple_call_return_type (stmt));
3660 gimple_assign_set_lhs (new_stmt, new_lhs);
3661 }
3662
3663 return new_stmt;
3664 }
3665
3666 void
3667 aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
3668 {
3669 const unsigned AARCH64_FE_INVALID = 1;
3670 const unsigned AARCH64_FE_DIVBYZERO = 2;
3671 const unsigned AARCH64_FE_OVERFLOW = 4;
3672 const unsigned AARCH64_FE_UNDERFLOW = 8;
3673 const unsigned AARCH64_FE_INEXACT = 16;
3674 const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID
3675 | AARCH64_FE_DIVBYZERO
3676 | AARCH64_FE_OVERFLOW
3677 | AARCH64_FE_UNDERFLOW
3678 | AARCH64_FE_INEXACT);
3679 const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8;
3680 tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr;
3681 tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr;
3682 tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr;
3683 tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv;
3684
3685 /* Generate the equivalence of :
3686 unsigned int fenv_cr;
3687 fenv_cr = __builtin_aarch64_get_fpcr ();
3688
3689 unsigned int fenv_sr;
3690 fenv_sr = __builtin_aarch64_get_fpsr ();
3691
3692 Now set all exceptions to non-stop
3693 unsigned int mask_cr
3694 = ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT);
3695 unsigned int masked_cr;
3696 masked_cr = fenv_cr & mask_cr;
3697
3698 And clear all exception flags
3699 unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT;
3700 unsigned int masked_cr;
3701 masked_sr = fenv_sr & mask_sr;
3702
3703 __builtin_aarch64_set_cr (masked_cr);
3704 __builtin_aarch64_set_sr (masked_sr); */
3705
3706 fenv_cr = create_tmp_var_raw (unsigned_type_node);
3707 fenv_sr = create_tmp_var_raw (unsigned_type_node);
3708
3709 get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR];
3710 set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR];
3711 get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR];
3712 set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR];
3713
3714 mask_cr = build_int_cst (unsigned_type_node,
3715 ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT));
3716 mask_sr = build_int_cst (unsigned_type_node,
3717 ~(AARCH64_FE_ALL_EXCEPT));
3718
3719 ld_fenv_cr = build4 (TARGET_EXPR, unsigned_type_node,
3720 fenv_cr, build_call_expr (get_fpcr, 0),
3721 NULL_TREE, NULL_TREE);
3722 ld_fenv_sr = build4 (TARGET_EXPR, unsigned_type_node,
3723 fenv_sr, build_call_expr (get_fpsr, 0),
3724 NULL_TREE, NULL_TREE);
3725
3726 masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr);
3727 masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr);
3728
3729 hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr);
3730 hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr);
3731
3732 hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr,
3733 hold_fnclex_sr);
3734 masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr,
3735 masked_fenv_sr);
3736 ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr);
3737
3738 *hold = build2 (COMPOUND_EXPR, void_type_node,
3739 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
3740 hold_fnclex);
3741
3742 /* Store the value of masked_fenv to clear the exceptions:
3743 __builtin_aarch64_set_fpsr (masked_fenv_sr); */
3744
3745 *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr);
3746
3747 /* Generate the equivalent of :
3748 unsigned int new_fenv_var;
3749 new_fenv_var = __builtin_aarch64_get_fpsr ();
3750
3751 __builtin_aarch64_set_fpsr (fenv_sr);
3752
3753 __atomic_feraiseexcept (new_fenv_var); */
3754
3755 new_fenv_var = create_tmp_var_raw (unsigned_type_node);
3756 reload_fenv = build4 (TARGET_EXPR, unsigned_type_node,
3757 new_fenv_var, build_call_expr (get_fpsr, 0),
3758 NULL_TREE, NULL_TREE);
3759 restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr);
3760 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
3761 update_call = build_call_expr (atomic_feraiseexcept, 1,
3762 fold_convert (integer_type_node, new_fenv_var));
3763 *update = build2 (COMPOUND_EXPR, void_type_node,
3764 build2 (COMPOUND_EXPR, void_type_node,
3765 reload_fenv, restore_fnenv), update_call);
3766 }
3767
3768 /* Resolve overloaded MEMTAG build-in functions. */
3769 #define AARCH64_BUILTIN_SUBCODE(F) \
3770 (DECL_MD_FUNCTION_CODE (F) >> AARCH64_BUILTIN_SHIFT)
3771
3772 static tree
3773 aarch64_resolve_overloaded_memtag (location_t loc,
3774 tree fndecl, void *pass_params)
3775 {
3776 vec<tree, va_gc> *params = static_cast<vec<tree, va_gc> *> (pass_params);
3777 unsigned param_num = params ? params->length() : 0;
3778 unsigned int fcode = AARCH64_BUILTIN_SUBCODE (fndecl);
3779 tree inittype = aarch64_memtag_builtin_data[
3780 fcode - AARCH64_MEMTAG_BUILTIN_START - 1].ftype;
3781 unsigned arg_num = list_length (TYPE_ARG_TYPES (inittype)) - 1;
3782
3783 if (param_num != arg_num)
3784 {
3785 TREE_TYPE (fndecl) = inittype;
3786 return NULL_TREE;
3787 }
3788 tree retype = NULL;
3789
3790 if (fcode == AARCH64_MEMTAG_BUILTIN_SUBP)
3791 {
3792 tree t0 = TREE_TYPE ((*params)[0]);
3793 tree t1 = TREE_TYPE ((*params)[1]);
3794
3795 if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE)
3796 t0 = ptr_type_node;
3797 if (t1 == error_mark_node || TREE_CODE (t1) != POINTER_TYPE)
3798 t1 = ptr_type_node;
3799
3800 if (TYPE_MODE (t0) != DImode)
3801 warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit",
3802 (int)tree_to_shwi (DECL_SIZE ((*params)[0])));
3803
3804 if (TYPE_MODE (t1) != DImode)
3805 warning_at (loc, 1, "expected 64-bit address but argument 2 is %d-bit",
3806 (int)tree_to_shwi (DECL_SIZE ((*params)[1])));
3807
3808 retype = build_function_type_list (ptrdiff_type_node, t0, t1, NULL);
3809 }
3810 else
3811 {
3812 tree t0 = TREE_TYPE ((*params)[0]);
3813
3814 if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE)
3815 {
3816 TREE_TYPE (fndecl) = inittype;
3817 return NULL_TREE;
3818 }
3819
3820 if (TYPE_MODE (t0) != DImode)
3821 warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit",
3822 (int)tree_to_shwi (DECL_SIZE ((*params)[0])));
3823
3824 switch (fcode)
3825 {
3826 case AARCH64_MEMTAG_BUILTIN_IRG:
3827 retype = build_function_type_list (t0, t0, uint64_type_node, NULL);
3828 break;
3829 case AARCH64_MEMTAG_BUILTIN_GMI:
3830 retype = build_function_type_list (uint64_type_node, t0,
3831 uint64_type_node, NULL);
3832 break;
3833 case AARCH64_MEMTAG_BUILTIN_INC_TAG:
3834 retype = build_function_type_list (t0, t0, unsigned_type_node, NULL);
3835 break;
3836 case AARCH64_MEMTAG_BUILTIN_SET_TAG:
3837 retype = build_function_type_list (void_type_node, t0, NULL);
3838 break;
3839 case AARCH64_MEMTAG_BUILTIN_GET_TAG:
3840 retype = build_function_type_list (t0, t0, NULL);
3841 break;
3842 default:
3843 return NULL_TREE;
3844 }
3845 }
3846
3847 if (!retype || retype == error_mark_node)
3848 TREE_TYPE (fndecl) = inittype;
3849 else
3850 TREE_TYPE (fndecl) = retype;
3851
3852 return NULL_TREE;
3853 }
3854
3855 /* Called at aarch64_resolve_overloaded_builtin in aarch64-c.cc. */
3856 tree
3857 aarch64_resolve_overloaded_builtin_general (location_t loc, tree function,
3858 void *pass_params)
3859 {
3860 unsigned int fcode = AARCH64_BUILTIN_SUBCODE (function);
3861
3862 if (fcode >= AARCH64_MEMTAG_BUILTIN_START
3863 && fcode <= AARCH64_MEMTAG_BUILTIN_END)
3864 return aarch64_resolve_overloaded_memtag(loc, function, pass_params);
3865
3866 return NULL_TREE;
3867 }
3868
3869 #undef AARCH64_CHECK_BUILTIN_MODE
3870 #undef AARCH64_FIND_FRINT_VARIANT
3871 #undef CF0
3872 #undef CF1
3873 #undef CF2
3874 #undef CF3
3875 #undef CF4
3876 #undef CF10
3877 #undef VAR1
3878 #undef VAR2
3879 #undef VAR3
3880 #undef VAR4
3881 #undef VAR5
3882 #undef VAR6
3883 #undef VAR7
3884 #undef VAR8
3885 #undef VAR9
3886 #undef VAR10
3887 #undef VAR11
3888
3889 #include "gt-aarch64-builtins.h"