]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64-sve-builtins-base.cc
Merge remote-tracking branch 'origin/master' into devel/c++-contracts
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-sve-builtins-base.cc
CommitLineData
624d0f07 1/* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
7adcbafe 2 Copyright (C) 2018-2022 Free Software Foundation, Inc.
624d0f07
RS
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "tm.h"
24#include "tree.h"
25#include "rtl.h"
26#include "tm_p.h"
27#include "memmodel.h"
28#include "insn-codes.h"
29#include "optabs.h"
30#include "recog.h"
31#include "expr.h"
32#include "basic-block.h"
33#include "function.h"
34#include "fold-const.h"
35#include "gimple.h"
36#include "gimple-iterator.h"
37#include "gimplify.h"
38#include "explow.h"
39#include "emit-rtl.h"
40#include "tree-vector-builder.h"
41#include "rtx-vector-builder.h"
42#include "vec-perm-indices.h"
43#include "aarch64-sve-builtins.h"
44#include "aarch64-sve-builtins-shapes.h"
45#include "aarch64-sve-builtins-base.h"
46#include "aarch64-sve-builtins-functions.h"
494bec02 47#include "ssa.h"
624d0f07
RS
48
49using namespace aarch64_sve;
50
51namespace {
52
0a09a948
RS
53/* Return the UNSPEC_CMLA* unspec for rotation amount ROT. */
54static int
55unspec_cmla (int rot)
56{
57 switch (rot)
58 {
59 case 0: return UNSPEC_CMLA;
60 case 90: return UNSPEC_CMLA90;
61 case 180: return UNSPEC_CMLA180;
62 case 270: return UNSPEC_CMLA270;
63 default: gcc_unreachable ();
64 }
65}
66
67/* Return the UNSPEC_FCMLA* unspec for rotation amount ROT. */
68static int
69unspec_fcmla (int rot)
70{
71 switch (rot)
72 {
73 case 0: return UNSPEC_FCMLA;
74 case 90: return UNSPEC_FCMLA90;
75 case 180: return UNSPEC_FCMLA180;
76 case 270: return UNSPEC_FCMLA270;
77 default: gcc_unreachable ();
78 }
79}
80
81/* Return the UNSPEC_COND_FCMLA* unspec for rotation amount ROT. */
82static int
83unspec_cond_fcmla (int rot)
84{
85 switch (rot)
86 {
87 case 0: return UNSPEC_COND_FCMLA;
88 case 90: return UNSPEC_COND_FCMLA90;
89 case 180: return UNSPEC_COND_FCMLA180;
90 case 270: return UNSPEC_COND_FCMLA270;
91 default: gcc_unreachable ();
92 }
93}
94
624d0f07
RS
95/* Expand a call to svmad, or svmla after reordering its operands.
96 Make _m forms merge with argument MERGE_ARGNO. */
97static rtx
98expand_mad (function_expander &e,
99 unsigned int merge_argno = DEFAULT_MERGE_ARGNO)
100{
101 if (e.pred == PRED_x)
102 {
103 insn_code icode;
104 if (e.type_suffix (0).integer_p)
105 icode = code_for_aarch64_pred_fma (e.vector_mode (0));
106 else
107 icode = code_for_aarch64_pred (UNSPEC_COND_FMLA, e.vector_mode (0));
108 return e.use_pred_x_insn (icode);
109 }
110
111 insn_code icode = e.direct_optab_handler (cond_fma_optab);
112 return e.use_cond_insn (icode, merge_argno);
113}
114
0a09a948
RS
115/* Expand a call to svmla_lane or svmls_lane using floating-point unspec
116 UNSPEC. */
117static rtx
118expand_mla_mls_lane (function_expander &e, int unspec)
119{
120 /* Put the operands in the normal (fma ...) order, with the accumulator
121 last. This fits naturally since that's also the unprinted operand
122 in the asm output. */
123 e.rotate_inputs_left (0, 4);
124 insn_code icode = code_for_aarch64_lane (unspec, e.vector_mode (0));
125 return e.use_exact_insn (icode);
126}
127
624d0f07
RS
128/* Expand a call to svmsb, or svmls after reordering its operands.
129 Make _m forms merge with argument MERGE_ARGNO. */
130static rtx
131expand_msb (function_expander &e,
132 unsigned int merge_argno = DEFAULT_MERGE_ARGNO)
133{
134 if (e.pred == PRED_x)
135 {
136 insn_code icode;
137 if (e.type_suffix (0).integer_p)
138 icode = code_for_aarch64_pred_fnma (e.vector_mode (0));
139 else
140 icode = code_for_aarch64_pred (UNSPEC_COND_FMLS, e.vector_mode (0));
141 return e.use_pred_x_insn (icode);
142 }
143
144 insn_code icode = e.direct_optab_handler (cond_fnma_optab);
145 return e.use_cond_insn (icode, merge_argno);
146}
147
148class svabd_impl : public function_base
149{
150public:
151 rtx
ff171cb1 152 expand (function_expander &e) const override
624d0f07
RS
153 {
154 /* The integer operations are represented as the subtraction of the
155 minimum from the maximum, with the signedness of the instruction
156 keyed off the signedness of the maximum operation. */
157 rtx_code max_code = e.type_suffix (0).unsigned_p ? UMAX : SMAX;
158 insn_code icode;
159 if (e.pred == PRED_x)
160 {
161 if (e.type_suffix (0).integer_p)
162 icode = code_for_aarch64_pred_abd (max_code, e.vector_mode (0));
163 else
164 icode = code_for_aarch64_pred_abd (e.vector_mode (0));
165 return e.use_pred_x_insn (icode);
166 }
167
168 if (e.type_suffix (0).integer_p)
169 icode = code_for_aarch64_cond_abd (max_code, e.vector_mode (0));
170 else
171 icode = code_for_aarch64_cond_abd (e.vector_mode (0));
172 return e.use_cond_insn (icode);
173 }
174};
175
176/* Implements svacge, svacgt, svacle and svaclt. */
177class svac_impl : public function_base
178{
179public:
f95d3d5d 180 CONSTEXPR svac_impl (int unspec) : m_unspec (unspec) {}
624d0f07
RS
181
182 rtx
ff171cb1 183 expand (function_expander &e) const override
624d0f07
RS
184 {
185 e.add_ptrue_hint (0, e.gp_mode (0));
186 insn_code icode = code_for_aarch64_pred_fac (m_unspec, e.vector_mode (0));
187 return e.use_exact_insn (icode);
188 }
189
190 /* The unspec code for the underlying comparison. */
191 int m_unspec;
192};
193
194class svadda_impl : public function_base
195{
196public:
197 rtx
ff171cb1 198 expand (function_expander &e) const override
624d0f07
RS
199 {
200 /* Put the predicate last, as required by mask_fold_left_plus_optab. */
201 e.rotate_inputs_left (0, 3);
202 machine_mode mode = e.vector_mode (0);
203 insn_code icode = direct_optab_handler (mask_fold_left_plus_optab, mode);
204 return e.use_exact_insn (icode);
205 }
206};
207
208/* Implements svadr[bhwd]. */
209class svadr_bhwd_impl : public function_base
210{
211public:
f95d3d5d 212 CONSTEXPR svadr_bhwd_impl (unsigned int shift) : m_shift (shift) {}
624d0f07
RS
213
214 rtx
ff171cb1 215 expand (function_expander &e) const override
624d0f07
RS
216 {
217 machine_mode mode = GET_MODE (e.args[0]);
218 if (m_shift == 0)
219 return e.use_exact_insn (code_for_aarch64_adr (mode));
220
221 /* Turn the access size into an extra shift argument. */
222 rtx shift = gen_int_mode (m_shift, GET_MODE_INNER (mode));
223 e.args.quick_push (expand_vector_broadcast (mode, shift));
224 return e.use_exact_insn (code_for_aarch64_adr_shift (mode));
225 }
226
227 /* How many bits left to shift the vector displacement. */
228 unsigned int m_shift;
229};
230
624d0f07
RS
231class svbic_impl : public function_base
232{
233public:
234 rtx
ff171cb1 235 expand (function_expander &e) const override
624d0f07
RS
236 {
237 /* Convert svbic of a constant into svand of its inverse. */
238 if (CONST_INT_P (e.args[2]))
239 {
240 machine_mode mode = GET_MODE_INNER (e.vector_mode (0));
241 e.args[2] = simplify_unary_operation (NOT, mode, e.args[2], mode);
242 return e.map_to_rtx_codes (AND, AND, -1);
243 }
244
245 if (e.type_suffix_ids[0] == TYPE_SUFFIX_b)
246 {
247 gcc_assert (e.pred == PRED_z);
248 return e.use_exact_insn (CODE_FOR_aarch64_pred_bicvnx16bi_z);
249 }
250
251 if (e.pred == PRED_x)
252 return e.use_unpred_insn (code_for_aarch64_bic (e.vector_mode (0)));
253
254 return e.use_cond_insn (code_for_cond_bic (e.vector_mode (0)));
255 }
256};
257
258/* Implements svbrkn, svbrkpa and svbrkpb. */
259class svbrk_binary_impl : public function_base
260{
261public:
f95d3d5d 262 CONSTEXPR svbrk_binary_impl (int unspec) : m_unspec (unspec) {}
624d0f07
RS
263
264 rtx
ff171cb1 265 expand (function_expander &e) const override
624d0f07
RS
266 {
267 return e.use_exact_insn (code_for_aarch64_brk (m_unspec));
268 }
269
270 /* The unspec code associated with the operation. */
271 int m_unspec;
272};
273
274/* Implements svbrka and svbrkb. */
275class svbrk_unary_impl : public function_base
276{
277public:
f95d3d5d 278 CONSTEXPR svbrk_unary_impl (int unspec) : m_unspec (unspec) {}
624d0f07
RS
279
280 rtx
ff171cb1 281 expand (function_expander &e) const override
624d0f07
RS
282 {
283 return e.use_cond_insn (code_for_aarch64_brk (m_unspec));
284 }
285
286 /* The unspec code associated with the operation. */
287 int m_unspec;
288};
289
290class svcadd_impl : public function_base
291{
292public:
293 rtx
ff171cb1 294 expand (function_expander &e) const override
624d0f07
RS
295 {
296 /* Convert the rotation amount into a specific unspec. */
0a09a948
RS
297 int rot = INTVAL (e.args.pop ());
298 if (rot == 90)
299 return e.map_to_unspecs (UNSPEC_CADD90, UNSPEC_CADD90,
300 UNSPEC_COND_FCADD90);
301 if (rot == 270)
302 return e.map_to_unspecs (UNSPEC_CADD270, UNSPEC_CADD270,
303 UNSPEC_COND_FCADD270);
304 gcc_unreachable ();
624d0f07
RS
305 }
306};
307
308/* Implements svclasta and svclastb. */
309class svclast_impl : public quiet<function_base>
310{
311public:
f95d3d5d 312 CONSTEXPR svclast_impl (int unspec) : m_unspec (unspec) {}
624d0f07
RS
313
314 rtx
ff171cb1 315 expand (function_expander &e) const override
624d0f07
RS
316 {
317 /* Match the fold_extract_optab order. */
318 std::swap (e.args[0], e.args[1]);
319 machine_mode mode = e.vector_mode (0);
320 insn_code icode;
321 if (e.mode_suffix_id == MODE_n)
322 icode = code_for_fold_extract (m_unspec, mode);
323 else
324 icode = code_for_aarch64_fold_extract_vector (m_unspec, mode);
325 return e.use_exact_insn (icode);
326 }
327
328 /* The unspec code associated with the operation. */
329 int m_unspec;
330};
331
332class svcmla_impl : public function_base
333{
334public:
335 rtx
ff171cb1 336 expand (function_expander &e) const override
624d0f07
RS
337 {
338 /* Convert the rotation amount into a specific unspec. */
0a09a948
RS
339 int rot = INTVAL (e.args.pop ());
340 if (e.type_suffix (0).float_p)
341 {
342 /* Make the operand order the same as the one used by the fma optabs,
343 with the accumulator last. */
344 e.rotate_inputs_left (1, 4);
345 return e.map_to_unspecs (-1, -1, unspec_cond_fcmla (rot), 3);
346 }
347 else
348 {
349 int cmla = unspec_cmla (rot);
350 return e.map_to_unspecs (cmla, cmla, -1);
351 }
624d0f07
RS
352 }
353};
354
355class svcmla_lane_impl : public function_base
356{
357public:
358 rtx
ff171cb1 359 expand (function_expander &e) const override
624d0f07
RS
360 {
361 /* Convert the rotation amount into a specific unspec. */
0a09a948
RS
362 int rot = INTVAL (e.args.pop ());
363 machine_mode mode = e.vector_mode (0);
364 if (e.type_suffix (0).float_p)
365 {
366 /* Make the operand order the same as the one used by the fma optabs,
367 with the accumulator last. */
368 e.rotate_inputs_left (0, 4);
369 insn_code icode = code_for_aarch64_lane (unspec_fcmla (rot), mode);
370 return e.use_exact_insn (icode);
371 }
372 else
373 {
374 insn_code icode = code_for_aarch64_lane (unspec_cmla (rot), mode);
375 return e.use_exact_insn (icode);
376 }
624d0f07
RS
377 }
378};
379
380/* Implements svcmp<cc> (except svcmpuo, which is handled separately). */
381class svcmp_impl : public function_base
382{
383public:
f95d3d5d 384 CONSTEXPR svcmp_impl (tree_code code, int unspec_for_fp)
624d0f07
RS
385 : m_code (code), m_unspec_for_fp (unspec_for_fp) {}
386
0435b10d 387 gimple *
ff171cb1 388 fold (gimple_folder &f) const override
0435b10d
RS
389 {
390 tree pg = gimple_call_arg (f.call, 0);
391 tree rhs1 = gimple_call_arg (f.call, 1);
392 tree rhs2 = gimple_call_arg (f.call, 2);
393
394 /* Convert a ptrue-predicated integer comparison into the corresponding
395 gimple-level operation. */
396 if (integer_all_onesp (pg)
397 && f.type_suffix (0).element_bytes == 1
398 && f.type_suffix (0).integer_p)
399 {
400 gimple_seq stmts = NULL;
401 rhs2 = f.force_vector (stmts, TREE_TYPE (rhs1), rhs2);
402 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
403 return gimple_build_assign (f.lhs, m_code, rhs1, rhs2);
404 }
405
406 return NULL;
407 }
408
624d0f07 409 rtx
ff171cb1 410 expand (function_expander &e) const override
624d0f07
RS
411 {
412 machine_mode mode = e.vector_mode (0);
413
414 /* Comparisons are UNSPEC_PRED_Z operations and so need a hint
415 operand. */
416 e.add_ptrue_hint (0, e.gp_mode (0));
417
418 if (e.type_suffix (0).integer_p)
419 {
420 bool unsigned_p = e.type_suffix (0).unsigned_p;
421 rtx_code code = get_rtx_code (m_code, unsigned_p);
422 return e.use_exact_insn (code_for_aarch64_pred_cmp (code, mode));
423 }
424
425 insn_code icode = code_for_aarch64_pred_fcm (m_unspec_for_fp, mode);
426 return e.use_exact_insn (icode);
427 }
428
429 /* The tree code associated with the comparison. */
430 tree_code m_code;
431
432 /* The unspec code to use for floating-point comparisons. */
433 int m_unspec_for_fp;
434};
435
436/* Implements svcmp<cc>_wide. */
437class svcmp_wide_impl : public function_base
438{
439public:
f95d3d5d 440 CONSTEXPR svcmp_wide_impl (tree_code code, int unspec_for_sint,
624d0f07
RS
441 int unspec_for_uint)
442 : m_code (code), m_unspec_for_sint (unspec_for_sint),
443 m_unspec_for_uint (unspec_for_uint) {}
444
445 rtx
ff171cb1 446 expand (function_expander &e) const override
624d0f07
RS
447 {
448 machine_mode mode = e.vector_mode (0);
449 bool unsigned_p = e.type_suffix (0).unsigned_p;
450 rtx_code code = get_rtx_code (m_code, unsigned_p);
451
452 /* Comparisons are UNSPEC_PRED_Z operations and so need a hint
453 operand. */
454 e.add_ptrue_hint (0, e.gp_mode (0));
455
456 /* If the argument is a constant that the unwidened comparisons
457 can handle directly, use them instead. */
458 insn_code icode = code_for_aarch64_pred_cmp (code, mode);
459 rtx op2 = unwrap_const_vec_duplicate (e.args[3]);
460 if (CONSTANT_P (op2)
461 && insn_data[icode].operand[4].predicate (op2, DImode))
462 {
463 e.args[3] = op2;
464 return e.use_exact_insn (icode);
465 }
466
467 int unspec = (unsigned_p ? m_unspec_for_uint : m_unspec_for_sint);
468 return e.use_exact_insn (code_for_aarch64_pred_cmp_wide (unspec, mode));
469 }
470
471 /* The tree code associated with the comparison. */
472 tree_code m_code;
473
474 /* The unspec codes for signed and unsigned wide comparisons
475 respectively. */
476 int m_unspec_for_sint;
477 int m_unspec_for_uint;
478};
479
480class svcmpuo_impl : public quiet<function_base>
481{
482public:
483 rtx
ff171cb1 484 expand (function_expander &e) const override
624d0f07
RS
485 {
486 e.add_ptrue_hint (0, e.gp_mode (0));
487 return e.use_exact_insn (code_for_aarch64_pred_fcmuo (e.vector_mode (0)));
488 }
489};
490
491class svcnot_impl : public function_base
492{
493public:
494 rtx
ff171cb1 495 expand (function_expander &e) const override
624d0f07
RS
496 {
497 machine_mode mode = e.vector_mode (0);
498 if (e.pred == PRED_x)
499 {
500 /* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs
501 a ptrue hint. */
502 e.add_ptrue_hint (0, e.gp_mode (0));
503 return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode));
504 }
505
506 return e.use_cond_insn (code_for_cond_cnot (mode), 0);
507 }
508};
509
510/* Implements svcnt[bhwd], which count the number of elements
511 in a particular vector mode. */
512class svcnt_bhwd_impl : public function_base
513{
514public:
f95d3d5d 515 CONSTEXPR svcnt_bhwd_impl (machine_mode ref_mode) : m_ref_mode (ref_mode) {}
624d0f07
RS
516
517 gimple *
ff171cb1 518 fold (gimple_folder &f) const override
624d0f07 519 {
df99e9e4 520 return f.fold_to_cstu (GET_MODE_NUNITS (m_ref_mode));
624d0f07
RS
521 }
522
523 rtx
ff171cb1 524 expand (function_expander &) const override
624d0f07
RS
525 {
526 return gen_int_mode (GET_MODE_NUNITS (m_ref_mode), DImode);
527 }
528
529 /* The mode of the vector associated with the [bhwd] suffix. */
530 machine_mode m_ref_mode;
531};
532
533/* Implements svcnt[bhwd]_pat. */
534class svcnt_bhwd_pat_impl : public svcnt_bhwd_impl
535{
536public:
7bca7218 537 using svcnt_bhwd_impl::svcnt_bhwd_impl;
624d0f07
RS
538
539 gimple *
ff171cb1 540 fold (gimple_folder &f) const override
624d0f07
RS
541 {
542 tree pattern_arg = gimple_call_arg (f.call, 0);
543 aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg);
544
545 if (pattern == AARCH64_SV_ALL)
546 /* svcvnt[bwhd]_pat (SV_ALL) == svcnt[bwhd] (). */
547 return svcnt_bhwd_impl::fold (f);
548
549 /* See whether we can count the number of elements in the pattern
550 at compile time. */
551 unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode);
552 HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, elements_per_vq);
553 if (value >= 0)
df99e9e4 554 return f.fold_to_cstu (value);
624d0f07
RS
555
556 return NULL;
557 }
558
559 rtx
ff171cb1 560 expand (function_expander &e) const override
624d0f07
RS
561 {
562 unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode);
563 e.args.quick_push (gen_int_mode (elements_per_vq, DImode));
564 e.args.quick_push (const1_rtx);
565 return e.use_exact_insn (CODE_FOR_aarch64_sve_cnt_pat);
566 }
567};
568
569class svcntp_impl : public function_base
570{
571public:
572 rtx
ff171cb1 573 expand (function_expander &e) const override
624d0f07
RS
574 {
575 machine_mode mode = e.vector_mode (0);
576 e.add_ptrue_hint (0, mode);
577 return e.use_exact_insn (code_for_aarch64_pred_cntp (mode));
578 }
579};
580
624d0f07
RS
581/* Implements svcreate2, svcreate3 and svcreate4. */
582class svcreate_impl : public quiet<multi_vector_function>
583{
584public:
7bca7218 585 using quiet<multi_vector_function>::quiet;
624d0f07
RS
586
587 gimple *
ff171cb1 588 fold (gimple_folder &f) const override
624d0f07
RS
589 {
590 unsigned int nargs = gimple_call_num_args (f.call);
591 tree lhs_type = TREE_TYPE (f.lhs);
592
593 /* Replace the call with a clobber of the result (to prevent it from
594 becoming upwards exposed) followed by stores into each individual
595 vector of tuple.
596
597 The fold routines expect the replacement statement to have the
598 same lhs as the original call, so return the clobber statement
599 rather than the final vector store. */
600 gassign *clobber = gimple_build_assign (f.lhs, build_clobber (lhs_type));
601
602 for (unsigned int i = nargs; i-- > 0; )
603 {
604 tree rhs_vector = gimple_call_arg (f.call, i);
605 tree field = tuple_type_field (TREE_TYPE (f.lhs));
606 tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
607 unshare_expr (f.lhs), field, NULL_TREE);
608 tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector),
609 lhs_array, size_int (i),
610 NULL_TREE, NULL_TREE);
611 gassign *assign = gimple_build_assign (lhs_vector, rhs_vector);
612 gsi_insert_after (f.gsi, assign, GSI_SAME_STMT);
613 }
614 return clobber;
615 }
616
617 rtx
ff171cb1 618 expand (function_expander &e) const override
624d0f07
RS
619 {
620 rtx lhs_tuple = e.get_nonoverlapping_reg_target ();
621
622 /* Record that LHS_TUPLE is dead before the first store. */
623 emit_clobber (lhs_tuple);
624 for (unsigned int i = 0; i < e.args.length (); ++i)
625 {
626 /* Use an lvalue subreg to refer to vector I in LHS_TUPLE. */
627 rtx lhs_vector = simplify_gen_subreg (GET_MODE (e.args[i]),
628 lhs_tuple, GET_MODE (lhs_tuple),
629 i * BYTES_PER_SVE_VECTOR);
630 emit_move_insn (lhs_vector, e.args[i]);
631 }
632 return lhs_tuple;
633 }
634};
635
636class svcvt_impl : public function_base
637{
638public:
639 rtx
ff171cb1 640 expand (function_expander &e) const override
624d0f07
RS
641 {
642 machine_mode mode0 = e.vector_mode (0);
643 machine_mode mode1 = e.vector_mode (1);
644 insn_code icode;
645 /* All this complication comes from the need to select four things
646 simultaneously:
647
648 (1) the kind of conversion (int<-float, float<-int, float<-float)
649 (2) signed vs. unsigned integers, where relevant
650 (3) the predication mode, which must be the wider of the predication
651 modes for MODE0 and MODE1
652 (4) the predication type (m, x or z)
653
654 The only supported int<->float conversions for which the integer is
655 narrower than the float are SI<->DF. It's therefore more convenient
656 to handle (3) by defining two patterns for int<->float conversions:
657 one in which the integer is at least as wide as the float and so
658 determines the predication mode, and another single SI<->DF pattern
659 in which the float's mode determines the predication mode (which is
660 always VNx2BI in that case).
661
662 The names of the patterns follow the optab convention of giving
663 the source mode before the destination mode. */
664 if (e.type_suffix (1).integer_p)
665 {
666 int unspec = (e.type_suffix (1).unsigned_p
667 ? UNSPEC_COND_UCVTF
668 : UNSPEC_COND_SCVTF);
669 if (e.type_suffix (0).element_bytes <= e.type_suffix (1).element_bytes)
670 icode = (e.pred == PRED_x
671 ? code_for_aarch64_sve_nonextend (unspec, mode1, mode0)
672 : code_for_cond_nonextend (unspec, mode1, mode0));
673 else
674 icode = (e.pred == PRED_x
675 ? code_for_aarch64_sve_extend (unspec, mode1, mode0)
676 : code_for_cond_extend (unspec, mode1, mode0));
677 }
678 else
679 {
680 int unspec = (!e.type_suffix (0).integer_p ? UNSPEC_COND_FCVT
681 : e.type_suffix (0).unsigned_p ? UNSPEC_COND_FCVTZU
682 : UNSPEC_COND_FCVTZS);
683 if (e.type_suffix (0).element_bytes >= e.type_suffix (1).element_bytes)
684 icode = (e.pred == PRED_x
685 ? code_for_aarch64_sve_nontrunc (unspec, mode1, mode0)
686 : code_for_cond_nontrunc (unspec, mode1, mode0));
687 else
688 icode = (e.pred == PRED_x
689 ? code_for_aarch64_sve_trunc (unspec, mode1, mode0)
690 : code_for_cond_trunc (unspec, mode1, mode0));
691 }
692
693 if (e.pred == PRED_x)
694 return e.use_pred_x_insn (icode);
695 return e.use_cond_insn (icode);
696 }
697};
698
699class svdot_impl : public function_base
700{
701public:
702 rtx
ff171cb1 703 expand (function_expander &e) const override
624d0f07
RS
704 {
705 /* In the optab, the multiplication operands come before the accumulator
706 operand. The optab is keyed off the multiplication mode. */
707 e.rotate_inputs_left (0, 3);
708 insn_code icode
709 = e.direct_optab_handler_for_sign (sdot_prod_optab, udot_prod_optab,
710 0, GET_MODE (e.args[0]));
711 return e.use_unpred_insn (icode);
712 }
713};
714
36696774 715class svdotprod_lane_impl : public unspec_based_function_base
624d0f07
RS
716{
717public:
7bca7218 718 using unspec_based_function_base::unspec_based_function_base;
36696774 719
624d0f07 720 rtx
ff171cb1 721 expand (function_expander &e) const override
624d0f07
RS
722 {
723 /* Use the same ordering as the dot_prod_optab, with the
724 accumulator last. */
725 e.rotate_inputs_left (0, 4);
36696774 726 int unspec = unspec_for (e);
624d0f07
RS
727 machine_mode mode = e.vector_mode (0);
728 return e.use_exact_insn (code_for_aarch64_dot_prod_lane (unspec, mode));
729 }
730};
731
732class svdup_impl : public quiet<function_base>
733{
734public:
735 gimple *
ff171cb1 736 fold (gimple_folder &f) const override
624d0f07
RS
737 {
738 tree vec_type = TREE_TYPE (f.lhs);
739 tree rhs = gimple_call_arg (f.call, f.pred == PRED_none ? 0 : 1);
740
741 if (f.pred == PRED_none || f.pred == PRED_x)
742 {
743 if (CONSTANT_CLASS_P (rhs))
744 {
745 if (f.type_suffix (0).bool_p)
746 return (tree_to_shwi (rhs)
747 ? f.fold_to_ptrue ()
748 : f.fold_to_pfalse ());
749
750 tree rhs_vector = build_vector_from_val (vec_type, rhs);
751 return gimple_build_assign (f.lhs, rhs_vector);
752 }
753
754 /* Avoid folding _b to a VEC_DUPLICATE_EXPR, since to do that we
755 would need to introduce an extra and unwanted conversion to
756 the truth vector element type. */
757 if (!f.type_suffix (0).bool_p)
758 return gimple_build_assign (f.lhs, VEC_DUPLICATE_EXPR, rhs);
759 }
760
0435b10d
RS
761 /* svdup_z (pg, x) == VEC_COND_EXPR <pg, VEC_DUPLICATE_EXPR <x>, 0>. */
762 if (f.pred == PRED_z)
763 {
764 gimple_seq stmts = NULL;
765 tree pred = f.convert_pred (stmts, vec_type, 0);
766 rhs = f.force_vector (stmts, vec_type, rhs);
767 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
768 return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, rhs,
769 build_zero_cst (vec_type));
770 }
771
624d0f07
RS
772 return NULL;
773 }
774
775 rtx
ff171cb1 776 expand (function_expander &e) const override
624d0f07
RS
777 {
778 if (e.pred == PRED_none || e.pred == PRED_x)
779 /* There's no benefit to using predicated instructions for _x here. */
780 return e.use_unpred_insn (e.direct_optab_handler (vec_duplicate_optab));
781
782 /* Model predicated svdups as a SEL in which the "true" value is
783 the duplicate of the function argument and the "false" value
784 is the value of inactive lanes. */
785 insn_code icode;
786 machine_mode mode = e.vector_mode (0);
787 if (valid_for_const_vector_p (GET_MODE_INNER (mode), e.args.last ()))
788 /* Duplicate the constant to fill a vector. The pattern optimizes
789 various cases involving constant operands, falling back to SEL
790 if necessary. */
791 icode = code_for_vcond_mask (mode, mode);
792 else
793 /* Use the pattern for selecting between a duplicated scalar
794 variable and a vector fallback. */
795 icode = code_for_aarch64_sel_dup (mode);
796 return e.use_vcond_mask_insn (icode);
797 }
798};
799
800class svdup_lane_impl : public quiet<function_base>
801{
802public:
803 rtx
ff171cb1 804 expand (function_expander &e) const override
624d0f07
RS
805 {
806 /* The native DUP lane has an index range of 64 bytes. */
807 machine_mode mode = e.vector_mode (0);
808 if (CONST_INT_P (e.args[1])
809 && IN_RANGE (INTVAL (e.args[1]) * GET_MODE_UNIT_SIZE (mode), 0, 63))
810 return e.use_exact_insn (code_for_aarch64_sve_dup_lane (mode));
811
812 /* Treat svdup_lane as if it were svtbl_n. */
813 return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0)));
814 }
815};
816
817class svdupq_impl : public quiet<function_base>
818{
819public:
820 gimple *
ff171cb1 821 fold (gimple_folder &f) const override
624d0f07
RS
822 {
823 tree vec_type = TREE_TYPE (f.lhs);
824 unsigned int nargs = gimple_call_num_args (f.call);
825 /* For predicates, pad out each argument so that we have one element
826 per bit. */
827 unsigned int factor = (f.type_suffix (0).bool_p
828 ? f.type_suffix (0).element_bytes : 1);
829 tree_vector_builder builder (vec_type, nargs * factor, 1);
830 for (unsigned int i = 0; i < nargs; ++i)
831 {
832 tree elt = gimple_call_arg (f.call, i);
833 if (!CONSTANT_CLASS_P (elt))
834 return NULL;
835 builder.quick_push (elt);
836 for (unsigned int j = 1; j < factor; ++j)
837 builder.quick_push (build_zero_cst (TREE_TYPE (vec_type)));
838 }
839 return gimple_build_assign (f.lhs, builder.build ());
840 }
841
842 rtx
ff171cb1 843 expand (function_expander &e) const override
624d0f07
RS
844 {
845 machine_mode mode = e.vector_mode (0);
846 unsigned int elements_per_vq = e.args.length ();
847 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
848 {
849 /* Construct a vector of integers so that we can compare them against
850 zero below. Zero vs. nonzero is the only distinction that
851 matters. */
852 mode = aarch64_sve_int_mode (mode);
853 for (unsigned int i = 0; i < elements_per_vq; ++i)
854 e.args[i] = simplify_gen_unary (ZERO_EXTEND, GET_MODE_INNER (mode),
855 e.args[i], QImode);
856 }
857
858 /* Get the 128-bit Advanced SIMD vector for this data size. */
859 scalar_mode element_mode = GET_MODE_INNER (mode);
860 machine_mode vq_mode = aarch64_vq_mode (element_mode).require ();
861 gcc_assert (known_eq (elements_per_vq, GET_MODE_NUNITS (vq_mode)));
862
863 /* Put the arguments into a 128-bit Advanced SIMD vector. We want
864 argument N to go into architectural lane N, whereas Advanced SIMD
865 vectors are loaded memory lsb to register lsb. We therefore need
866 to reverse the elements for big-endian targets. */
867 rtx vq_reg = gen_reg_rtx (vq_mode);
868 rtvec vec = rtvec_alloc (elements_per_vq);
869 for (unsigned int i = 0; i < elements_per_vq; ++i)
870 {
871 unsigned int argno = BYTES_BIG_ENDIAN ? elements_per_vq - i - 1 : i;
872 RTVEC_ELT (vec, i) = e.args[argno];
873 }
874 aarch64_expand_vector_init (vq_reg, gen_rtx_PARALLEL (vq_mode, vec));
875
876 /* If the result is a boolean, compare the data vector against zero. */
877 if (mode != e.vector_mode (0))
878 {
879 rtx data_dupq = aarch64_expand_sve_dupq (NULL, mode, vq_reg);
880 return aarch64_convert_sve_data_to_pred (e.possible_target,
881 e.vector_mode (0), data_dupq);
882 }
883
884 return aarch64_expand_sve_dupq (e.possible_target, mode, vq_reg);
885 }
886};
887
888class svdupq_lane_impl : public quiet<function_base>
889{
890public:
891 rtx
ff171cb1 892 expand (function_expander &e) const override
624d0f07
RS
893 {
894 machine_mode mode = e.vector_mode (0);
895 rtx index = e.args[1];
896 if (CONST_INT_P (index) && IN_RANGE (INTVAL (index), 0, 3))
897 {
898 /* Use the .Q form of DUP, which is the native instruction for
899 this function. */
900 insn_code icode = code_for_aarch64_sve_dupq_lane (mode);
901 unsigned int num_indices = e.elements_per_vq (0);
902 rtx indices = aarch64_gen_stepped_int_parallel
903 (num_indices, INTVAL (index) * num_indices, 1);
904
905 e.add_output_operand (icode);
906 e.add_input_operand (icode, e.args[0]);
907 e.add_fixed_operand (indices);
908 return e.generate_insn (icode);
909 }
910
911 /* Build a .D TBL index for the pairs of doublewords that we want to
912 duplicate. */
913 if (CONST_INT_P (index))
914 {
915 /* The index vector is a constant. */
916 rtx_vector_builder builder (VNx2DImode, 2, 1);
917 builder.quick_push (gen_int_mode (INTVAL (index) * 2, DImode));
918 builder.quick_push (gen_int_mode (INTVAL (index) * 2 + 1, DImode));
919 index = builder.build ();
920 }
921 else
922 {
923 /* Duplicate INDEX * 2 to fill a DImode vector. The ACLE spec
924 explicitly allows the top of the index to be dropped. */
925 index = force_reg (DImode, simplify_gen_binary (ASHIFT, DImode,
926 index, const1_rtx));
927 index = expand_vector_broadcast (VNx2DImode, index);
928
929 /* Get an alternating 0, 1 predicate. */
930 rtx_vector_builder builder (VNx2BImode, 2, 1);
931 builder.quick_push (const0_rtx);
932 builder.quick_push (constm1_rtx);
933 rtx pg = force_reg (VNx2BImode, builder.build ());
934
935 /* Add one to the odd elements of the index. */
936 rtx one = force_reg (VNx2DImode, CONST1_RTX (VNx2DImode));
937 rtx target = gen_reg_rtx (VNx2DImode);
938 emit_insn (gen_cond_addvnx2di (target, pg, index, one, index));
939 index = target;
940 }
941
942 e.args[0] = gen_lowpart (VNx2DImode, e.args[0]);
943 e.args[1] = index;
944 return e.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di);
945 }
946};
947
624d0f07
RS
948/* Implements svextb, svexth and svextw. */
949class svext_bhw_impl : public function_base
950{
951public:
f95d3d5d 952 CONSTEXPR svext_bhw_impl (scalar_int_mode from_mode)
624d0f07
RS
953 : m_from_mode (from_mode) {}
954
955 rtx
ff171cb1 956 expand (function_expander &e) const override
624d0f07
RS
957 {
958 if (e.type_suffix (0).unsigned_p)
959 {
960 /* Convert to an AND. The widest we go is 0xffffffff, which fits
961 in a CONST_INT. */
962 e.args.quick_push (GEN_INT (GET_MODE_MASK (m_from_mode)));
963 if (e.pred == PRED_m)
964 /* We now have arguments "(inactive, pg, op, mask)". Convert this
965 to "(pg, op, mask, inactive)" so that the order matches svand_m
966 with an extra argument on the end. Take the inactive elements
967 from this extra argument. */
968 e.rotate_inputs_left (0, 4);
969 return e.map_to_rtx_codes (AND, AND, -1, 3);
970 }
971
972 machine_mode wide_mode = e.vector_mode (0);
973 poly_uint64 nunits = GET_MODE_NUNITS (wide_mode);
974 machine_mode narrow_mode
975 = aarch64_sve_data_mode (m_from_mode, nunits).require ();
976 if (e.pred == PRED_x)
977 {
978 insn_code icode = code_for_aarch64_pred_sxt (wide_mode, narrow_mode);
979 return e.use_pred_x_insn (icode);
980 }
981
982 insn_code icode = code_for_aarch64_cond_sxt (wide_mode, narrow_mode);
983 return e.use_cond_insn (icode);
984 }
985
986 /* The element mode that we're extending from. */
987 scalar_int_mode m_from_mode;
988};
989
990/* Implements svget2, svget3 and svget4. */
991class svget_impl : public quiet<multi_vector_function>
992{
993public:
7bca7218 994 using quiet<multi_vector_function>::quiet;
624d0f07
RS
995
996 gimple *
ff171cb1 997 fold (gimple_folder &f) const override
624d0f07
RS
998 {
999 /* Fold into a normal gimple component access. */
1000 tree rhs_tuple = gimple_call_arg (f.call, 0);
1001 tree index = gimple_call_arg (f.call, 1);
1002 tree field = tuple_type_field (TREE_TYPE (rhs_tuple));
1003 tree rhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
1004 rhs_tuple, field, NULL_TREE);
1005 tree rhs_vector = build4 (ARRAY_REF, TREE_TYPE (f.lhs),
1006 rhs_array, index, NULL_TREE, NULL_TREE);
1007 return gimple_build_assign (f.lhs, rhs_vector);
1008 }
1009
1010 rtx
ff171cb1 1011 expand (function_expander &e) const override
624d0f07
RS
1012 {
1013 /* Fold the access into a subreg rvalue. */
1014 return simplify_gen_subreg (e.vector_mode (0), e.args[0],
1015 GET_MODE (e.args[0]),
1016 INTVAL (e.args[1]) * BYTES_PER_SVE_VECTOR);
1017 }
1018};
1019
1020class svindex_impl : public function_base
1021{
1022public:
1023 rtx
ff171cb1 1024 expand (function_expander &e) const override
624d0f07
RS
1025 {
1026 return e.use_exact_insn (e.direct_optab_handler (vec_series_optab));
1027 }
1028};
1029
1030class svinsr_impl : public quiet<function_base>
1031{
1032public:
1033 gimple *
ff171cb1 1034 fold (gimple_folder &f) const override
624d0f07
RS
1035 {
1036 gcall *new_call = gimple_build_call_internal (IFN_VEC_SHL_INSERT, 2,
1037 gimple_call_arg (f.call, 0),
1038 gimple_call_arg (f.call, 1));
1039 gimple_call_set_lhs (new_call, f.lhs);
1040 return new_call;
1041 }
1042
1043 rtx
ff171cb1 1044 expand (function_expander &e) const override
624d0f07
RS
1045 {
1046 insn_code icode = direct_optab_handler (vec_shl_insert_optab,
1047 e.vector_mode (0));
1048 return e.use_exact_insn (icode);
1049 }
1050};
1051
1052/* Implements svlasta and svlastb. */
1053class svlast_impl : public quiet<function_base>
1054{
1055public:
f95d3d5d 1056 CONSTEXPR svlast_impl (int unspec) : m_unspec (unspec) {}
624d0f07
RS
1057
1058 rtx
ff171cb1 1059 expand (function_expander &e) const override
624d0f07
RS
1060 {
1061 return e.use_exact_insn (code_for_extract (m_unspec, e.vector_mode (0)));
1062 }
1063
1064 /* The unspec code associated with the operation. */
1065 int m_unspec;
1066};
1067
1068class svld1_impl : public full_width_access
1069{
1070public:
1071 unsigned int
ff171cb1 1072 call_properties (const function_instance &) const override
624d0f07
RS
1073 {
1074 return CP_READ_MEMORY;
1075 }
1076
1077 gimple *
ff171cb1 1078 fold (gimple_folder &f) const override
624d0f07
RS
1079 {
1080 tree vectype = f.vector_type (0);
1081
1082 /* Get the predicate and base pointer. */
1083 gimple_seq stmts = NULL;
1084 tree pred = f.convert_pred (stmts, vectype, 0);
1085 tree base = f.fold_contiguous_base (stmts, vectype);
1086 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
1087
1088 tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
1089 gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
1090 base, cookie, pred);
1091 gimple_call_set_lhs (new_call, f.lhs);
1092 return new_call;
1093 }
1094
1095 rtx
ff171cb1 1096 expand (function_expander &e) const override
624d0f07
RS
1097 {
1098 insn_code icode = convert_optab_handler (maskload_optab,
1099 e.vector_mode (0), e.gp_mode (0));
1100 return e.use_contiguous_load_insn (icode);
1101 }
1102};
1103
1104/* Implements extending contiguous forms of svld1. */
1105class svld1_extend_impl : public extending_load
1106{
1107public:
7bca7218 1108 using extending_load::extending_load;
624d0f07
RS
1109
1110 rtx
ff171cb1 1111 expand (function_expander &e) const override
624d0f07 1112 {
7bb4b7a5 1113 insn_code icode = code_for_aarch64_load (UNSPEC_LD1_SVE, extend_rtx_code (),
624d0f07
RS
1114 e.vector_mode (0),
1115 e.memory_vector_mode ());
1116 return e.use_contiguous_load_insn (icode);
1117 }
1118};
1119
1120class svld1_gather_impl : public full_width_access
1121{
1122public:
1123 unsigned int
ff171cb1 1124 call_properties (const function_instance &) const override
624d0f07
RS
1125 {
1126 return CP_READ_MEMORY;
1127 }
1128
1129 rtx
ff171cb1 1130 expand (function_expander &e) const override
624d0f07
RS
1131 {
1132 e.prepare_gather_address_operands (1);
1133 /* Put the predicate last, as required by mask_gather_load_optab. */
1134 e.rotate_inputs_left (0, 5);
1135 machine_mode mem_mode = e.memory_vector_mode ();
09eb042a
RS
1136 machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
1137 insn_code icode = convert_optab_handler (mask_gather_load_optab,
1138 mem_mode, int_mode);
624d0f07
RS
1139 return e.use_exact_insn (icode);
1140 }
1141};
1142
1143/* Implements extending forms of svld1_gather. */
1144class svld1_gather_extend_impl : public extending_load
1145{
1146public:
7bca7218 1147 using extending_load::extending_load;
624d0f07
RS
1148
1149 rtx
ff171cb1 1150 expand (function_expander &e) const override
624d0f07
RS
1151 {
1152 e.prepare_gather_address_operands (1);
1153 /* Put the predicate last, since the extending gathers use the same
1154 operand order as mask_gather_load_optab. */
1155 e.rotate_inputs_left (0, 5);
87a80d27
RS
1156 /* Add a constant predicate for the extension rtx. */
1157 e.args.quick_push (CONSTM1_RTX (VNx16BImode));
624d0f07
RS
1158 insn_code icode = code_for_aarch64_gather_load (extend_rtx_code (),
1159 e.vector_mode (0),
1160 e.memory_vector_mode ());
1161 return e.use_exact_insn (icode);
1162 }
1163};
1164
9ceec73f 1165class load_replicate : public function_base
624d0f07
RS
1166{
1167public:
1168 unsigned int
ff171cb1 1169 call_properties (const function_instance &) const override
624d0f07
RS
1170 {
1171 return CP_READ_MEMORY;
1172 }
1173
1174 tree
ff171cb1 1175 memory_scalar_type (const function_instance &fi) const override
624d0f07
RS
1176 {
1177 return fi.scalar_type (0);
1178 }
9ceec73f 1179};
624d0f07 1180
9ceec73f
MM
1181class svld1rq_impl : public load_replicate
1182{
1183public:
624d0f07 1184 machine_mode
ff171cb1 1185 memory_vector_mode (const function_instance &fi) const override
624d0f07
RS
1186 {
1187 return aarch64_vq_mode (GET_MODE_INNER (fi.vector_mode (0))).require ();
1188 }
1189
1190 rtx
ff171cb1 1191 expand (function_expander &e) const override
624d0f07
RS
1192 {
1193 insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0));
1194 return e.use_contiguous_load_insn (icode);
1195 }
494bec02
PK
1196
1197 gimple *
1198 fold (gimple_folder &f) const override
1199 {
1200 tree arg0 = gimple_call_arg (f.call, 0);
1201 tree arg1 = gimple_call_arg (f.call, 1);
1202
1203 /* Transform:
1204 lhs = svld1rq ({-1, -1, ... }, arg1)
1205 into:
1206 tmp = mem_ref<vectype> [(elem * {ref-all}) arg1]
1207 lhs = vec_perm_expr<tmp, tmp, {0, 1, 2, 3, ...}>.
1208 on little endian target.
1209 vectype is the corresponding ADVSIMD type. */
1210
1211 if (!BYTES_BIG_ENDIAN
1212 && integer_all_onesp (arg0))
1213 {
1214 tree lhs = gimple_call_lhs (f.call);
1215 tree lhs_type = TREE_TYPE (lhs);
1216 poly_uint64 lhs_len = TYPE_VECTOR_SUBPARTS (lhs_type);
1217 tree eltype = TREE_TYPE (lhs_type);
1218
1219 scalar_mode elmode = GET_MODE_INNER (TYPE_MODE (lhs_type));
1220 machine_mode vq_mode = aarch64_vq_mode (elmode).require ();
1221 tree vectype = build_vector_type_for_mode (eltype, vq_mode);
1222
1223 tree elt_ptr_type
1224 = build_pointer_type_for_mode (eltype, VOIDmode, true);
1225 tree zero = build_zero_cst (elt_ptr_type);
1226
1227 /* Use element type alignment. */
1228 tree access_type
1229 = build_aligned_type (vectype, TYPE_ALIGN (eltype));
1230
1231 tree mem_ref_lhs = make_ssa_name_fn (cfun, access_type, 0);
1232 tree mem_ref_op = fold_build2 (MEM_REF, access_type, arg1, zero);
1233 gimple *mem_ref_stmt
1234 = gimple_build_assign (mem_ref_lhs, mem_ref_op);
1235 gsi_insert_before (f.gsi, mem_ref_stmt, GSI_SAME_STMT);
1236
1237 int source_nelts = TYPE_VECTOR_SUBPARTS (access_type).to_constant ();
1238 vec_perm_builder sel (lhs_len, source_nelts, 1);
1239 for (int i = 0; i < source_nelts; i++)
1240 sel.quick_push (i);
1241
1242 vec_perm_indices indices (sel, 1, source_nelts);
1243 gcc_checking_assert (can_vec_perm_const_p (TYPE_MODE (lhs_type),
1244 TYPE_MODE (access_type),
1245 indices));
1246 tree mask_type = build_vector_type (ssizetype, lhs_len);
1247 tree mask = vec_perm_indices_to_tree (mask_type, indices);
1248 return gimple_build_assign (lhs, VEC_PERM_EXPR,
1249 mem_ref_lhs, mem_ref_lhs, mask);
1250 }
1251
1252 return NULL;
1253 }
624d0f07
RS
1254};
1255
9ceec73f
MM
1256class svld1ro_impl : public load_replicate
1257{
1258public:
1259 machine_mode
ff171cb1 1260 memory_vector_mode (const function_instance &) const override
9ceec73f
MM
1261 {
1262 return OImode;
1263 }
1264
1265 rtx
ff171cb1 1266 expand (function_expander &e) const override
9ceec73f
MM
1267 {
1268 insn_code icode = code_for_aarch64_sve_ld1ro (e.vector_mode (0));
1269 return e.use_contiguous_load_insn (icode);
1270 }
1271};
1272
624d0f07
RS
1273/* Implements svld2, svld3 and svld4. */
1274class svld234_impl : public full_width_access
1275{
1276public:
7bca7218 1277 using full_width_access::full_width_access;
624d0f07
RS
1278
1279 unsigned int
ff171cb1 1280 call_properties (const function_instance &) const override
624d0f07
RS
1281 {
1282 return CP_READ_MEMORY;
1283 }
1284
1285 gimple *
ff171cb1 1286 fold (gimple_folder &f) const override
624d0f07
RS
1287 {
1288 tree tuple_type = TREE_TYPE (f.lhs);
1289 tree vectype = f.vector_type (0);
1290
1291 /* Get the predicate and base pointer. */
1292 gimple_seq stmts = NULL;
1293 tree pred = f.convert_pred (stmts, vectype, 0);
1294 tree base = f.fold_contiguous_base (stmts, vectype);
1295 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
1296
1297 /* Emit two statements: a clobber of the lhs, so that it isn't
1298 upwards exposed, and then the load itself.
1299
1300 The fold routines expect the replacement statement to have the
1301 same lhs as the original call, so return the clobber statement
1302 rather than the load. */
1303 gimple *clobber = gimple_build_assign (f.lhs, build_clobber (tuple_type));
1304
1305 /* View the loaded data as an array of vectors. */
1306 tree field = tuple_type_field (tuple_type);
1307 tree lhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field),
1308 unshare_expr (f.lhs));
1309
1310 /* Emit the load itself. */
1311 tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
1312 gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
1313 base, cookie, pred);
1314 gimple_call_set_lhs (new_call, lhs_array);
1315 gsi_insert_after (f.gsi, new_call, GSI_SAME_STMT);
1316
1317 return clobber;
1318 }
1319
1320 rtx
ff171cb1 1321 expand (function_expander &e) const override
624d0f07
RS
1322 {
1323 machine_mode tuple_mode = TYPE_MODE (TREE_TYPE (e.call_expr));
1324 insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab,
1325 tuple_mode, e.vector_mode (0));
1326 return e.use_contiguous_load_insn (icode);
1327 }
1328};
1329
1330class svldff1_gather_impl : public full_width_access
1331{
1332public:
1333 unsigned int
ff171cb1 1334 call_properties (const function_instance &) const override
624d0f07
RS
1335 {
1336 return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
1337 }
1338
1339 rtx
ff171cb1 1340 expand (function_expander &e) const override
624d0f07
RS
1341 {
1342 /* See the block comment in aarch64-sve.md for details about the
1343 FFR handling. */
1344 emit_insn (gen_aarch64_update_ffr_for_load ());
1345
1346 e.prepare_gather_address_operands (1);
1347 /* Put the predicate last, since ldff1_gather uses the same operand
1348 order as mask_gather_load_optab. */
1349 e.rotate_inputs_left (0, 5);
1350 machine_mode mem_mode = e.memory_vector_mode ();
1351 return e.use_exact_insn (code_for_aarch64_ldff1_gather (mem_mode));
1352 }
1353};
1354
1355/* Implements extending forms of svldff1_gather. */
1356class svldff1_gather_extend : public extending_load
1357{
1358public:
7bca7218 1359 using extending_load::extending_load;
624d0f07
RS
1360
1361 rtx
ff171cb1 1362 expand (function_expander &e) const override
624d0f07
RS
1363 {
1364 /* See the block comment in aarch64-sve.md for details about the
1365 FFR handling. */
1366 emit_insn (gen_aarch64_update_ffr_for_load ());
1367
1368 e.prepare_gather_address_operands (1);
1369 /* Put the predicate last, since ldff1_gather uses the same operand
1370 order as mask_gather_load_optab. */
1371 e.rotate_inputs_left (0, 5);
87a80d27
RS
1372 /* Add a constant predicate for the extension rtx. */
1373 e.args.quick_push (CONSTM1_RTX (VNx16BImode));
624d0f07
RS
1374 insn_code icode = code_for_aarch64_ldff1_gather (extend_rtx_code (),
1375 e.vector_mode (0),
1376 e.memory_vector_mode ());
1377 return e.use_exact_insn (icode);
1378 }
1379};
1380
1381class svldnt1_impl : public full_width_access
1382{
1383public:
1384 unsigned int
ff171cb1 1385 call_properties (const function_instance &) const override
624d0f07
RS
1386 {
1387 return CP_READ_MEMORY;
1388 }
1389
1390 rtx
ff171cb1 1391 expand (function_expander &e) const override
624d0f07
RS
1392 {
1393 insn_code icode = code_for_aarch64_ldnt1 (e.vector_mode (0));
1394 return e.use_contiguous_load_insn (icode);
1395 }
1396};
1397
1398/* Implements svldff1 and svldnf1. */
1399class svldxf1_impl : public full_width_access
1400{
1401public:
f95d3d5d 1402 CONSTEXPR svldxf1_impl (int unspec) : m_unspec (unspec) {}
624d0f07
RS
1403
1404 unsigned int
ff171cb1 1405 call_properties (const function_instance &) const override
624d0f07
RS
1406 {
1407 return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
1408 }
1409
1410 rtx
ff171cb1 1411 expand (function_expander &e) const override
624d0f07
RS
1412 {
1413 /* See the block comment in aarch64-sve.md for details about the
1414 FFR handling. */
1415 emit_insn (gen_aarch64_update_ffr_for_load ());
1416
1417 machine_mode mode = e.vector_mode (0);
1418 return e.use_contiguous_load_insn (code_for_aarch64_ldf1 (m_unspec, mode));
1419 }
1420
1421 /* The unspec associated with the load. */
1422 int m_unspec;
1423};
1424
1425/* Implements extending contiguous forms of svldff1 and svldnf1. */
1426class svldxf1_extend_impl : public extending_load
1427{
1428public:
f95d3d5d 1429 CONSTEXPR svldxf1_extend_impl (type_suffix_index memory_type, int unspec)
624d0f07
RS
1430 : extending_load (memory_type), m_unspec (unspec) {}
1431
1432 unsigned int
ff171cb1 1433 call_properties (const function_instance &) const override
624d0f07
RS
1434 {
1435 return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR;
1436 }
1437
1438 rtx
ff171cb1 1439 expand (function_expander &e) const override
624d0f07
RS
1440 {
1441 /* See the block comment in aarch64-sve.md for details about the
1442 FFR handling. */
1443 emit_insn (gen_aarch64_update_ffr_for_load ());
1444
1445 insn_code icode = code_for_aarch64_ldf1 (m_unspec, extend_rtx_code (),
1446 e.vector_mode (0),
1447 e.memory_vector_mode ());
1448 return e.use_contiguous_load_insn (icode);
1449 }
1450
1451 /* The unspec associated with the load. */
1452 int m_unspec;
1453};
1454
1455class svlen_impl : public quiet<function_base>
1456{
1457public:
1458 gimple *
ff171cb1 1459 fold (gimple_folder &f) const override
624d0f07
RS
1460 {
1461 /* The argument only exists for its type. */
1462 tree rhs_type = TREE_TYPE (gimple_call_arg (f.call, 0));
1463 tree count = build_int_cstu (TREE_TYPE (f.lhs),
1464 TYPE_VECTOR_SUBPARTS (rhs_type));
1465 return gimple_build_assign (f.lhs, count);
1466 }
1467
1468 rtx
ff171cb1 1469 expand (function_expander &e) const override
624d0f07
RS
1470 {
1471 /* The argument only exists for its type. */
1472 return gen_int_mode (GET_MODE_NUNITS (e.vector_mode (0)), DImode);
1473 }
1474};
1475
1476class svmad_impl : public function_base
1477{
1478public:
1479 rtx
ff171cb1 1480 expand (function_expander &e) const override
624d0f07
RS
1481 {
1482 return expand_mad (e);
1483 }
1484};
1485
1486class svmla_impl : public function_base
1487{
1488public:
1489 rtx
ff171cb1 1490 expand (function_expander &e) const override
624d0f07
RS
1491 {
1492 /* Put the accumulator at the end (argument 3), but keep it as the
1493 merge input for _m functions. */
1494 e.rotate_inputs_left (1, 4);
1495 return expand_mad (e, 3);
1496 }
1497};
1498
0a09a948 1499class svmla_lane_impl : public function_base
624d0f07
RS
1500{
1501public:
624d0f07 1502 rtx
ff171cb1 1503 expand (function_expander &e) const override
624d0f07 1504 {
0a09a948
RS
1505 if (e.type_suffix (0).integer_p)
1506 {
1507 machine_mode mode = e.vector_mode (0);
1508 return e.use_exact_insn (code_for_aarch64_sve_add_mul_lane (mode));
1509 }
1510 return expand_mla_mls_lane (e, UNSPEC_FMLA);
624d0f07 1511 }
624d0f07
RS
1512};
1513
1514class svmls_impl : public function_base
1515{
1516public:
1517 rtx
ff171cb1 1518 expand (function_expander &e) const override
624d0f07
RS
1519 {
1520 /* Put the accumulator at the end (argument 3), but keep it as the
1521 merge input for _m functions. */
1522 e.rotate_inputs_left (1, 4);
1523 return expand_msb (e, 3);
1524 }
1525};
1526
1527class svmov_impl : public function_base
1528{
1529public:
1530 gimple *
ff171cb1 1531 fold (gimple_folder &f) const override
624d0f07
RS
1532 {
1533 return gimple_build_assign (f.lhs, BIT_AND_EXPR,
1534 gimple_call_arg (f.call, 0),
1535 gimple_call_arg (f.call, 1));
1536 }
1537
1538 rtx
ff171cb1 1539 expand (function_expander &e) const override
624d0f07
RS
1540 {
1541 /* The canonical form for the assembler alias "MOV Pa.B, Pb/Z, Pc.B"
1542 is "AND Pa.B, Pb/Z, Pc.B, Pc.B". */
1543 gcc_assert (e.pred == PRED_z);
1544 e.args.quick_push (e.args[1]);
1545 return e.use_exact_insn (CODE_FOR_aarch64_pred_andvnx16bi_z);
1546 }
1547};
1548
0a09a948
RS
1549class svmls_lane_impl : public function_base
1550{
1551public:
1552 rtx
ff171cb1 1553 expand (function_expander &e) const override
0a09a948
RS
1554 {
1555 if (e.type_suffix (0).integer_p)
1556 {
1557 machine_mode mode = e.vector_mode (0);
1558 return e.use_exact_insn (code_for_aarch64_sve_sub_mul_lane (mode));
1559 }
1560 return expand_mla_mls_lane (e, UNSPEC_FMLS);
1561 }
1562};
1563
36696774
RS
1564class svmmla_impl : public function_base
1565{
1566public:
1567 rtx
ff171cb1 1568 expand (function_expander &e) const override
36696774
RS
1569 {
1570 insn_code icode;
1571 if (e.type_suffix (0).integer_p)
1572 {
1573 if (e.type_suffix (0).unsigned_p)
1574 icode = code_for_aarch64_sve_add (UNSPEC_UMATMUL, e.vector_mode (0));
1575 else
1576 icode = code_for_aarch64_sve_add (UNSPEC_SMATMUL, e.vector_mode (0));
1577 }
1578 else
1579 icode = code_for_aarch64_sve (UNSPEC_FMMLA, e.vector_mode (0));
1580 return e.use_exact_insn (icode);
1581 }
1582};
1583
624d0f07
RS
1584class svmsb_impl : public function_base
1585{
1586public:
1587 rtx
ff171cb1 1588 expand (function_expander &e) const override
624d0f07
RS
1589 {
1590 return expand_msb (e);
1591 }
1592};
1593
624d0f07
RS
1594class svnand_impl : public function_base
1595{
1596public:
1597 rtx
ff171cb1 1598 expand (function_expander &e) const override
624d0f07
RS
1599 {
1600 gcc_assert (e.pred == PRED_z);
1601 return e.use_exact_insn (CODE_FOR_aarch64_pred_nandvnx16bi_z);
1602 }
1603};
1604
1605class svnor_impl : public function_base
1606{
1607public:
1608 rtx
ff171cb1 1609 expand (function_expander &e) const override
624d0f07
RS
1610 {
1611 gcc_assert (e.pred == PRED_z);
1612 return e.use_exact_insn (CODE_FOR_aarch64_pred_norvnx16bi_z);
1613 }
1614};
1615
1616class svnot_impl : public rtx_code_function
1617{
1618public:
f95d3d5d 1619 CONSTEXPR svnot_impl () : rtx_code_function (NOT, NOT, -1) {}
624d0f07
RS
1620
1621 rtx
ff171cb1 1622 expand (function_expander &e) const override
624d0f07
RS
1623 {
1624 if (e.type_suffix_ids[0] == TYPE_SUFFIX_b)
1625 {
1626 /* The canonical form for the assembler alias "NOT Pa.B, Pb/Z, Pc.B"
1627 is "EOR Pa.B, Pb/Z, Pb.B, Pc.B". */
1628 gcc_assert (e.pred == PRED_z);
1629 e.args.quick_insert (1, e.args[0]);
1630 return e.use_exact_insn (CODE_FOR_aarch64_pred_xorvnx16bi_z);
1631 }
1632 return rtx_code_function::expand (e);
1633 }
1634};
1635
1636class svorn_impl : public function_base
1637{
1638public:
1639 rtx
ff171cb1 1640 expand (function_expander &e) const override
624d0f07
RS
1641 {
1642 gcc_assert (e.pred == PRED_z);
1643 return e.use_exact_insn (CODE_FOR_aarch64_pred_ornvnx16bi_z);
1644 }
1645};
1646
1647class svpfalse_impl : public function_base
1648{
1649public:
1650 gimple *
ff171cb1 1651 fold (gimple_folder &f) const override
624d0f07
RS
1652 {
1653 return f.fold_to_pfalse ();
1654 }
1655
1656 rtx
ff171cb1 1657 expand (function_expander &) const override
624d0f07
RS
1658 {
1659 return CONST0_RTX (VNx16BImode);
1660 }
1661};
1662
1663/* Implements svpfirst and svpnext, which share the same .md patterns. */
1664class svpfirst_svpnext_impl : public function_base
1665{
1666public:
f95d3d5d 1667 CONSTEXPR svpfirst_svpnext_impl (int unspec) : m_unspec (unspec) {}
624d0f07
RS
1668
1669 rtx
ff171cb1 1670 expand (function_expander &e) const override
624d0f07
RS
1671 {
1672 machine_mode mode = e.vector_mode (0);
1673 e.add_ptrue_hint (0, mode);
1674 return e.use_exact_insn (code_for_aarch64_sve (m_unspec, mode));
1675 }
1676
1677 /* The unspec associated with the operation. */
1678 int m_unspec;
1679};
1680
1681/* Implements contiguous forms of svprf[bhwd]. */
1682class svprf_bhwd_impl : public function_base
1683{
1684public:
f95d3d5d 1685 CONSTEXPR svprf_bhwd_impl (machine_mode mode) : m_mode (mode) {}
624d0f07
RS
1686
1687 unsigned int
ff171cb1 1688 call_properties (const function_instance &) const override
624d0f07
RS
1689 {
1690 return CP_PREFETCH_MEMORY;
1691 }
1692
1693 rtx
ff171cb1 1694 expand (function_expander &e) const override
624d0f07
RS
1695 {
1696 e.prepare_prefetch_operands ();
1697 insn_code icode = code_for_aarch64_sve_prefetch (m_mode);
1698 return e.use_contiguous_prefetch_insn (icode);
1699 }
1700
1701 /* The mode that we'd use to hold one vector of prefetched data. */
1702 machine_mode m_mode;
1703};
1704
1705/* Implements svprf[bhwd]_gather. */
1706class svprf_bhwd_gather_impl : public function_base
1707{
1708public:
f95d3d5d 1709 CONSTEXPR svprf_bhwd_gather_impl (machine_mode mode) : m_mode (mode) {}
624d0f07
RS
1710
1711 unsigned int
ff171cb1 1712 call_properties (const function_instance &) const override
624d0f07
RS
1713 {
1714 return CP_PREFETCH_MEMORY;
1715 }
1716
1717 machine_mode
ff171cb1 1718 memory_vector_mode (const function_instance &) const override
624d0f07
RS
1719 {
1720 return m_mode;
1721 }
1722
1723 rtx
ff171cb1 1724 expand (function_expander &e) const override
624d0f07
RS
1725 {
1726 e.prepare_prefetch_operands ();
1727 e.prepare_gather_address_operands (1);
1728
1729 /* Insert a zero operand to identify the mode of the memory being
1730 accessed. This goes between the gather operands and prefetch
1731 operands created above. */
1732 e.args.quick_insert (5, CONST0_RTX (m_mode));
1733
1734 machine_mode reg_mode = GET_MODE (e.args[2]);
1735 insn_code icode = code_for_aarch64_sve_gather_prefetch (m_mode, reg_mode);
1736 return e.use_exact_insn (icode);
1737 }
1738
1739 /* The mode that we'd use to hold one vector of prefetched data. */
1740 machine_mode m_mode;
1741};
1742
1743/* Implements svptest_any, svptest_first and svptest_last. */
1744class svptest_impl : public function_base
1745{
1746public:
f95d3d5d 1747 CONSTEXPR svptest_impl (rtx_code compare) : m_compare (compare) {}
624d0f07
RS
1748
1749 rtx
ff171cb1 1750 expand (function_expander &e) const override
624d0f07
RS
1751 {
1752 /* See whether GP is an exact ptrue for some predicate mode;
1753 i.e. whether converting the GP to that mode will not drop
1754 set bits and will leave all significant bits set. */
1755 machine_mode wide_mode;
1756 int hint;
1757 if (aarch64_ptrue_all_mode (e.args[0]).exists (&wide_mode))
1758 hint = SVE_KNOWN_PTRUE;
1759 else
1760 {
1761 hint = SVE_MAYBE_NOT_PTRUE;
1762 wide_mode = VNx16BImode;
1763 }
1764
1765 /* Generate the PTEST itself. */
1766 rtx pg = force_reg (VNx16BImode, e.args[0]);
1767 rtx wide_pg = gen_lowpart (wide_mode, pg);
1768 rtx hint_rtx = gen_int_mode (hint, DImode);
1769 rtx op = force_reg (wide_mode, gen_lowpart (wide_mode, e.args[1]));
1770 emit_insn (gen_aarch64_ptestvnx16bi (pg, wide_pg, hint_rtx, op));
1771
1772 /* Get the location of the boolean result. We can provide SImode and
1773 DImode values directly; rely on generic code to convert others. */
1774 rtx target = e.possible_target;
1775 if (!target
1776 || !REG_P (target)
1777 || (GET_MODE (target) != SImode && GET_MODE (target) != DImode))
1778 target = gen_reg_rtx (DImode);
1779
1780 /* Generate a CSET to convert the CC result of the PTEST to a boolean. */
1781 rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
1782 rtx compare = gen_rtx_fmt_ee (m_compare, GET_MODE (target),
1783 cc_reg, const0_rtx);
1784 emit_insn (gen_rtx_SET (target, compare));
1785 return target;
1786 }
1787
1788 /* The comparison code associated with ptest condition. */
1789 rtx_code m_compare;
1790};
1791
1792class svptrue_impl : public function_base
1793{
1794public:
1795 gimple *
ff171cb1 1796 fold (gimple_folder &f) const override
624d0f07
RS
1797 {
1798 return f.fold_to_ptrue ();
1799 }
1800
1801 rtx
ff171cb1 1802 expand (function_expander &e) const override
624d0f07
RS
1803 {
1804 return aarch64_ptrue_all (e.type_suffix (0).element_bytes);
1805 }
1806};
1807
1808class svptrue_pat_impl : public function_base
1809{
1810public:
1811 gimple *
ff171cb1 1812 fold (gimple_folder &f) const override
624d0f07
RS
1813 {
1814 tree pattern_arg = gimple_call_arg (f.call, 0);
1815 aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg);
1816
1817 if (pattern == AARCH64_SV_ALL)
1818 /* svptrue_pat_bN (SV_ALL) == svptrue_bN (). */
1819 return f.fold_to_ptrue ();
1820
1821 /* See whether we can count the number of elements in the pattern
1822 at compile time. If so, construct a predicate with that number
1823 of 1s followed by all 0s. */
1824 int nelts_per_vq = f.elements_per_vq (0);
1825 HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, nelts_per_vq);
1826 if (value >= 0)
1827 return f.fold_to_vl_pred (value);
1828
1829 return NULL;
1830 }
1831
1832 rtx
ff171cb1 1833 expand (function_expander &e) const override
624d0f07
RS
1834 {
1835 /* In rtl, the predicate is represented as the constant:
1836
1837 (const:V16BI (unspec:V16BI [(const_int PATTERN)
1838 (const_vector:VnnBI [zeros])]
1839 UNSPEC_PTRUE))
1840
1841 where nn determines the element size. */
1842 rtvec vec = gen_rtvec (2, e.args[0], CONST0_RTX (e.vector_mode (0)));
1843 return gen_rtx_CONST (VNx16BImode,
1844 gen_rtx_UNSPEC (VNx16BImode, vec, UNSPEC_PTRUE));
1845 }
1846};
1847
624d0f07
RS
1848/* Implements svqdec[bhwd]{,_pat} and svqinc[bhwd]{,_pat}. */
1849class svqdec_svqinc_bhwd_impl : public function_base
1850{
1851public:
f95d3d5d 1852 CONSTEXPR svqdec_svqinc_bhwd_impl (rtx_code code_for_sint,
624d0f07
RS
1853 rtx_code code_for_uint,
1854 scalar_int_mode elem_mode)
1855 : m_code_for_sint (code_for_sint),
1856 m_code_for_uint (code_for_uint),
1857 m_elem_mode (elem_mode)
1858 {}
1859
1860 rtx
ff171cb1 1861 expand (function_expander &e) const override
624d0f07
RS
1862 {
1863 /* Treat non-_pat functions in the same way as _pat functions with
1864 an SV_ALL argument. */
1865 if (e.args.length () == 2)
1866 e.args.quick_insert (1, gen_int_mode (AARCH64_SV_ALL, DImode));
1867
1868 /* Insert the number of elements per 128-bit block as a fake argument,
1869 between the pattern and the multiplier. Arguments 1, 2 and 3 then
1870 correspond exactly with the 3 UNSPEC_SVE_CNT_PAT operands; see
1871 aarch64_sve_cnt_pat for details. */
1872 unsigned int elements_per_vq = 128 / GET_MODE_BITSIZE (m_elem_mode);
1873 e.args.quick_insert (2, gen_int_mode (elements_per_vq, DImode));
1874
1875 rtx_code code = (e.type_suffix (0).unsigned_p
1876 ? m_code_for_uint
1877 : m_code_for_sint);
1878
1879 /* Choose between operating on integer scalars or integer vectors. */
1880 machine_mode mode = e.vector_mode (0);
1881 if (e.mode_suffix_id == MODE_n)
1882 mode = GET_MODE_INNER (mode);
1883 return e.use_exact_insn (code_for_aarch64_sve_pat (code, mode));
1884 }
1885
1886 /* The saturating addition or subtraction codes to use for signed and
1887 unsigned values respectively. */
1888 rtx_code m_code_for_sint;
1889 rtx_code m_code_for_uint;
1890
1891 /* The integer mode associated with the [bhwd] suffix. */
1892 scalar_int_mode m_elem_mode;
1893};
1894
1895/* Implements svqdec[bhwd]{,_pat}. */
1896class svqdec_bhwd_impl : public svqdec_svqinc_bhwd_impl
1897{
1898public:
f95d3d5d 1899 CONSTEXPR svqdec_bhwd_impl (scalar_int_mode elem_mode)
624d0f07
RS
1900 : svqdec_svqinc_bhwd_impl (SS_MINUS, US_MINUS, elem_mode) {}
1901};
1902
1903/* Implements svqinc[bhwd]{,_pat}. */
1904class svqinc_bhwd_impl : public svqdec_svqinc_bhwd_impl
1905{
1906public:
f95d3d5d 1907 CONSTEXPR svqinc_bhwd_impl (scalar_int_mode elem_mode)
624d0f07
RS
1908 : svqdec_svqinc_bhwd_impl (SS_PLUS, US_PLUS, elem_mode) {}
1909};
1910
1911/* Implements svqdecp and svqincp. */
1912class svqdecp_svqincp_impl : public function_base
1913{
1914public:
f95d3d5d 1915 CONSTEXPR svqdecp_svqincp_impl (rtx_code code_for_sint,
624d0f07
RS
1916 rtx_code code_for_uint)
1917 : m_code_for_sint (code_for_sint),
1918 m_code_for_uint (code_for_uint)
1919 {}
1920
1921 rtx
ff171cb1 1922 expand (function_expander &e) const override
624d0f07
RS
1923 {
1924 rtx_code code = (e.type_suffix (0).unsigned_p
1925 ? m_code_for_uint
1926 : m_code_for_sint);
1927 insn_code icode;
1928 if (e.mode_suffix_id == MODE_n)
1929 {
1930 /* Increment or decrement a scalar (whose mode is given by the first
1931 type suffix) by the number of active elements in a predicate
1932 (whose mode is given by the second type suffix). */
1933 machine_mode mode = GET_MODE_INNER (e.vector_mode (0));
1934 icode = code_for_aarch64_sve_cntp (code, mode, e.vector_mode (1));
1935 }
1936 else
1937 /* Increment a vector by the number of active elements in a predicate,
1938 with the vector mode determining the predicate mode. */
1939 icode = code_for_aarch64_sve_cntp (code, e.vector_mode (0));
1940 return e.use_exact_insn (icode);
1941 }
1942
1943 /* The saturating addition or subtraction codes to use for signed and
1944 unsigned values respectively. */
1945 rtx_code m_code_for_sint;
1946 rtx_code m_code_for_uint;
1947};
1948
624d0f07
RS
1949class svrdffr_impl : public function_base
1950{
1951public:
1952 unsigned int
ff171cb1 1953 call_properties (const function_instance &) const override
624d0f07
RS
1954 {
1955 return CP_READ_FFR;
1956 }
1957
1958 rtx
ff171cb1 1959 expand (function_expander &e) const override
624d0f07
RS
1960 {
1961 /* See the block comment in aarch64-sve.md for details about the
1962 FFR handling. */
1963 emit_insn (gen_aarch64_copy_ffr_to_ffrt ());
1964 rtx result = e.use_exact_insn (e.pred == PRED_z
1965 ? CODE_FOR_aarch64_rdffr_z
1966 : CODE_FOR_aarch64_rdffr);
1967 emit_insn (gen_aarch64_update_ffrt ());
1968 return result;
1969 }
1970};
1971
1972class svreinterpret_impl : public quiet<function_base>
1973{
1974public:
1975 gimple *
ff171cb1 1976 fold (gimple_folder &f) const override
624d0f07
RS
1977 {
1978 /* Punt to rtl if the effect of the reinterpret on registers does not
1979 conform to GCC's endianness model. */
1980 if (!targetm.can_change_mode_class (f.vector_mode (0),
1981 f.vector_mode (1), FP_REGS))
1982 return NULL;
1983
1984 /* Otherwise svreinterpret corresponds directly to a VIEW_CONVERT_EXPR
1985 reinterpretation. */
1986 tree rhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (f.lhs),
1987 gimple_call_arg (f.call, 0));
1988 return gimple_build_assign (f.lhs, VIEW_CONVERT_EXPR, rhs);
1989 }
1990
1991 rtx
ff171cb1 1992 expand (function_expander &e) const override
624d0f07
RS
1993 {
1994 machine_mode mode = e.vector_mode (0);
1995 return e.use_exact_insn (code_for_aarch64_sve_reinterpret (mode));
1996 }
1997};
1998
1999class svrev_impl : public permute
2000{
2001public:
2002 gimple *
ff171cb1 2003 fold (gimple_folder &f) const override
624d0f07
RS
2004 {
2005 /* Punt for now on _b16 and wider; we'd need more complex evpc logic
2006 to rerecognize the result. */
2007 if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8)
2008 return NULL;
2009
2010 /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }. */
2011 poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
2012 vec_perm_builder builder (nelts, 1, 3);
2013 for (int i = 0; i < 3; ++i)
2014 builder.quick_push (nelts - i - 1);
2015 return fold_permute (f, builder);
2016 }
2017
2018 rtx
ff171cb1 2019 expand (function_expander &e) const override
624d0f07
RS
2020 {
2021 return e.use_exact_insn (code_for_aarch64_sve_rev (e.vector_mode (0)));
2022 }
2023};
2024
2025class svsel_impl : public quiet<function_base>
2026{
2027public:
2028 gimple *
ff171cb1 2029 fold (gimple_folder &f) const override
624d0f07
RS
2030 {
2031 /* svsel corresponds exactly to VEC_COND_EXPR. */
2032 gimple_seq stmts = NULL;
2033 tree pred = f.convert_pred (stmts, f.vector_type (0), 0);
2034 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
2035 return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred,
2036 gimple_call_arg (f.call, 1),
2037 gimple_call_arg (f.call, 2));
2038 }
2039
2040 rtx
ff171cb1 2041 expand (function_expander &e) const override
624d0f07
RS
2042 {
2043 /* svsel (cond, truev, falsev) is vcond_mask (truev, falsev, cond). */
2044 e.rotate_inputs_left (0, 3);
2045 insn_code icode = convert_optab_handler (vcond_mask_optab,
2046 e.vector_mode (0),
2047 e.gp_mode (0));
2048 return e.use_exact_insn (icode);
2049 }
2050};
2051
2052/* Implements svset2, svset3 and svset4. */
2053class svset_impl : public quiet<multi_vector_function>
2054{
2055public:
7bca7218 2056 using quiet<multi_vector_function>::quiet;
624d0f07
RS
2057
2058 gimple *
ff171cb1 2059 fold (gimple_folder &f) const override
624d0f07
RS
2060 {
2061 tree rhs_tuple = gimple_call_arg (f.call, 0);
2062 tree index = gimple_call_arg (f.call, 1);
2063 tree rhs_vector = gimple_call_arg (f.call, 2);
2064
2065 /* Replace the call with two statements: a copy of the full tuple
2066 to the call result, followed by an update of the individual vector.
2067
2068 The fold routines expect the replacement statement to have the
2069 same lhs as the original call, so return the copy statement
2070 rather than the field update. */
2071 gassign *copy = gimple_build_assign (unshare_expr (f.lhs), rhs_tuple);
2072
2073 /* Get a reference to the individual vector. */
2074 tree field = tuple_type_field (TREE_TYPE (f.lhs));
2075 tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field),
2076 f.lhs, field, NULL_TREE);
2077 tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector),
2078 lhs_array, index, NULL_TREE, NULL_TREE);
2079 gassign *update = gimple_build_assign (lhs_vector, rhs_vector);
2080 gsi_insert_after (f.gsi, update, GSI_SAME_STMT);
2081
2082 return copy;
2083 }
2084
2085 rtx
ff171cb1 2086 expand (function_expander &e) const override
624d0f07
RS
2087 {
2088 rtx rhs_tuple = e.args[0];
2089 unsigned int index = INTVAL (e.args[1]);
2090 rtx rhs_vector = e.args[2];
2091
2092 /* First copy the full tuple to the target register. */
2093 rtx lhs_tuple = e.get_nonoverlapping_reg_target ();
2094 emit_move_insn (lhs_tuple, rhs_tuple);
2095
2096 /* ...then update the individual vector. */
2097 rtx lhs_vector = simplify_gen_subreg (GET_MODE (rhs_vector),
2098 lhs_tuple, GET_MODE (lhs_tuple),
2099 index * BYTES_PER_SVE_VECTOR);
2100 emit_move_insn (lhs_vector, rhs_vector);
2101 return lhs_vector;
2102 }
2103};
2104
2105class svsetffr_impl : public function_base
2106{
2107public:
2108 unsigned int
ff171cb1 2109 call_properties (const function_instance &) const override
624d0f07
RS
2110 {
2111 return CP_WRITE_FFR;
2112 }
2113
2114 rtx
ff171cb1 2115 expand (function_expander &e) const override
624d0f07
RS
2116 {
2117 e.args.quick_push (CONSTM1_RTX (VNx16BImode));
2118 return e.use_exact_insn (CODE_FOR_aarch64_wrffr);
2119 }
2120};
2121
624d0f07
RS
2122class svst1_impl : public full_width_access
2123{
2124public:
2125 unsigned int
ff171cb1 2126 call_properties (const function_instance &) const override
624d0f07
RS
2127 {
2128 return CP_WRITE_MEMORY;
2129 }
2130
2131 gimple *
ff171cb1 2132 fold (gimple_folder &f) const override
624d0f07
RS
2133 {
2134 tree vectype = f.vector_type (0);
2135
2136 /* Get the predicate and base pointer. */
2137 gimple_seq stmts = NULL;
2138 tree pred = f.convert_pred (stmts, vectype, 0);
2139 tree base = f.fold_contiguous_base (stmts, vectype);
2140 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
2141
2142 tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
2143 tree rhs = gimple_call_arg (f.call, gimple_call_num_args (f.call) - 1);
2144 return gimple_build_call_internal (IFN_MASK_STORE, 4,
2145 base, cookie, pred, rhs);
2146 }
2147
2148 rtx
ff171cb1 2149 expand (function_expander &e) const override
624d0f07
RS
2150 {
2151 insn_code icode = convert_optab_handler (maskstore_optab,
2152 e.vector_mode (0), e.gp_mode (0));
2153 return e.use_contiguous_store_insn (icode);
2154 }
2155};
2156
2157class svst1_scatter_impl : public full_width_access
2158{
2159public:
2160 unsigned int
ff171cb1 2161 call_properties (const function_instance &) const override
624d0f07
RS
2162 {
2163 return CP_WRITE_MEMORY;
2164 }
2165
2166 rtx
ff171cb1 2167 expand (function_expander &e) const override
624d0f07
RS
2168 {
2169 e.prepare_gather_address_operands (1);
2170 /* Put the predicate last, as required by mask_scatter_store_optab. */
2171 e.rotate_inputs_left (0, 6);
09eb042a
RS
2172 machine_mode mem_mode = e.memory_vector_mode ();
2173 machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
2174 insn_code icode = convert_optab_handler (mask_scatter_store_optab,
2175 mem_mode, int_mode);
624d0f07
RS
2176 return e.use_exact_insn (icode);
2177 }
2178};
2179
2180/* Implements truncating forms of svst1_scatter. */
2181class svst1_scatter_truncate_impl : public truncating_store
2182{
2183public:
7bca7218 2184 using truncating_store::truncating_store;
624d0f07
RS
2185
2186 rtx
ff171cb1 2187 expand (function_expander &e) const override
624d0f07
RS
2188 {
2189 e.prepare_gather_address_operands (1);
2190 /* Put the predicate last, since the truncating scatters use the same
2191 operand order as mask_scatter_store_optab. */
2192 e.rotate_inputs_left (0, 6);
2193 insn_code icode = code_for_aarch64_scatter_store_trunc
2194 (e.memory_vector_mode (), e.vector_mode (0));
2195 return e.use_exact_insn (icode);
2196 }
2197};
2198
2199/* Implements truncating contiguous forms of svst1. */
2200class svst1_truncate_impl : public truncating_store
2201{
2202public:
7bca7218 2203 using truncating_store::truncating_store;
624d0f07
RS
2204
2205 rtx
ff171cb1 2206 expand (function_expander &e) const override
624d0f07
RS
2207 {
2208 insn_code icode = code_for_aarch64_store_trunc (e.memory_vector_mode (),
2209 e.vector_mode (0));
2210 return e.use_contiguous_store_insn (icode);
2211 }
2212};
2213
2214/* Implements svst2, svst3 and svst4. */
2215class svst234_impl : public full_width_access
2216{
2217public:
7bca7218 2218 using full_width_access::full_width_access;
624d0f07
RS
2219
2220 unsigned int
ff171cb1 2221 call_properties (const function_instance &) const override
624d0f07
RS
2222 {
2223 return CP_WRITE_MEMORY;
2224 }
2225
2226 gimple *
ff171cb1 2227 fold (gimple_folder &f) const override
624d0f07
RS
2228 {
2229 tree vectype = f.vector_type (0);
2230
2231 /* Get the predicate and base pointer. */
2232 gimple_seq stmts = NULL;
2233 tree pred = f.convert_pred (stmts, vectype, 0);
2234 tree base = f.fold_contiguous_base (stmts, vectype);
2235 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
2236
2237 /* View the stored data as an array of vectors. */
2238 unsigned int num_args = gimple_call_num_args (f.call);
2239 tree rhs_tuple = gimple_call_arg (f.call, num_args - 1);
2240 tree field = tuple_type_field (TREE_TYPE (rhs_tuple));
2241 tree rhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field), rhs_tuple);
2242
2243 tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
2244 return gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
2245 base, cookie, pred, rhs_array);
2246 }
2247
2248 rtx
ff171cb1 2249 expand (function_expander &e) const override
624d0f07
RS
2250 {
2251 machine_mode tuple_mode = GET_MODE (e.args.last ());
2252 insn_code icode = convert_optab_handler (vec_mask_store_lanes_optab,
2253 tuple_mode, e.vector_mode (0));
2254 return e.use_contiguous_store_insn (icode);
2255 }
2256};
2257
2258class svstnt1_impl : public full_width_access
2259{
2260public:
2261 unsigned int
ff171cb1 2262 call_properties (const function_instance &) const override
624d0f07
RS
2263 {
2264 return CP_WRITE_MEMORY;
2265 }
2266
2267 rtx
ff171cb1 2268 expand (function_expander &e) const override
624d0f07
RS
2269 {
2270 insn_code icode = code_for_aarch64_stnt1 (e.vector_mode (0));
2271 return e.use_contiguous_store_insn (icode);
2272 }
2273};
2274
2275class svsub_impl : public rtx_code_function
2276{
2277public:
f95d3d5d 2278 CONSTEXPR svsub_impl ()
624d0f07
RS
2279 : rtx_code_function (MINUS, MINUS, UNSPEC_COND_FSUB) {}
2280
2281 rtx
ff171cb1 2282 expand (function_expander &e) const override
624d0f07
RS
2283 {
2284 /* Canonicalize subtractions of constants to additions. */
2285 machine_mode mode = e.vector_mode (0);
2286 if (e.try_negating_argument (2, mode))
2287 return e.map_to_rtx_codes (PLUS, PLUS, UNSPEC_COND_FADD);
2288
2289 return rtx_code_function::expand (e);
2290 }
2291};
2292
2293class svtbl_impl : public permute
2294{
2295public:
2296 rtx
ff171cb1 2297 expand (function_expander &e) const override
624d0f07
RS
2298 {
2299 return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0)));
2300 }
2301};
2302
624d0f07
RS
2303/* Implements svtrn1 and svtrn2. */
2304class svtrn_impl : public binary_permute
2305{
2306public:
f95d3d5d 2307 CONSTEXPR svtrn_impl (int base)
624d0f07
RS
2308 : binary_permute (base ? UNSPEC_TRN2 : UNSPEC_TRN1), m_base (base) {}
2309
2310 gimple *
ff171cb1 2311 fold (gimple_folder &f) const override
624d0f07
RS
2312 {
2313 /* svtrn1: { 0, nelts, 2, nelts + 2, 4, nelts + 4, ... }
2314 svtrn2: as for svtrn1, but with 1 added to each index. */
2315 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
2316 vec_perm_builder builder (nelts, 2, 3);
2317 for (unsigned int i = 0; i < 3; ++i)
2318 {
2319 builder.quick_push (m_base + i * 2);
2320 builder.quick_push (m_base + i * 2 + nelts);
2321 }
2322 return fold_permute (f, builder);
2323 }
2324
2325 /* 0 for svtrn1, 1 for svtrn2. */
2326 unsigned int m_base;
2327};
2328
2329/* Base class for svundef{,2,3,4}. */
2330class svundef_impl : public quiet<multi_vector_function>
2331{
2332public:
7bca7218 2333 using quiet<multi_vector_function>::quiet;
624d0f07 2334
624d0f07 2335 rtx
ff171cb1 2336 expand (function_expander &e) const override
624d0f07
RS
2337 {
2338 rtx target = e.get_reg_target ();
2339 emit_clobber (copy_rtx (target));
2340 return target;
2341 }
2342};
2343
2344/* Implements svunpklo and svunpkhi. */
2345class svunpk_impl : public quiet<function_base>
2346{
2347public:
f95d3d5d 2348 CONSTEXPR svunpk_impl (bool high_p) : m_high_p (high_p) {}
624d0f07
RS
2349
2350 gimple *
ff171cb1 2351 fold (gimple_folder &f) const override
624d0f07
RS
2352 {
2353 /* Don't fold the predicate ops, since every bit of the svbool_t
2354 result is significant. */
2355 if (f.type_suffix_ids[0] == TYPE_SUFFIX_b)
2356 return NULL;
2357
2358 /* The first half in memory is VEC_UNPACK_LO_EXPR for little-endian
2359 and VEC_UNPACK_HI_EXPR for big-endian. */
2360 bool high_p = BYTES_BIG_ENDIAN ? !m_high_p : m_high_p;
2361 tree_code code = high_p ? VEC_UNPACK_HI_EXPR : VEC_UNPACK_LO_EXPR;
2362 return gimple_build_assign (f.lhs, code, gimple_call_arg (f.call, 0));
2363 }
2364
2365 rtx
ff171cb1 2366 expand (function_expander &e) const override
624d0f07
RS
2367 {
2368 machine_mode mode = GET_MODE (e.args[0]);
2369 unsigned int unpacku = m_high_p ? UNSPEC_UNPACKUHI : UNSPEC_UNPACKULO;
2370 unsigned int unpacks = m_high_p ? UNSPEC_UNPACKSHI : UNSPEC_UNPACKSLO;
2371 insn_code icode;
2372 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
2373 icode = code_for_aarch64_sve_punpk (unpacku, mode);
2374 else
2375 {
2376 int unspec = e.type_suffix (0).unsigned_p ? unpacku : unpacks;
2377 icode = code_for_aarch64_sve_unpk (unspec, unspec, mode);
2378 }
2379 return e.use_exact_insn (icode);
2380 }
2381
2382 /* True for svunpkhi, false for svunpklo. */
2383 bool m_high_p;
2384};
2385
36696774
RS
2386/* Also implements svsudot. */
2387class svusdot_impl : public function_base
2388{
2389public:
f95d3d5d 2390 CONSTEXPR svusdot_impl (bool su) : m_su (su) {}
36696774
RS
2391
2392 rtx
ff171cb1 2393 expand (function_expander &e) const override
36696774
RS
2394 {
2395 /* The implementation of the ACLE function svsudot (for the non-lane
2396 version) is through the USDOT instruction but with the second and third
2397 inputs swapped. */
2398 if (m_su)
2399 e.rotate_inputs_left (1, 2);
2400 /* The ACLE function has the same order requirements as for svdot.
2401 While there's no requirement for the RTL pattern to have the same sort
2402 of order as that for <sur>dot_prod, it's easier to read.
2403 Hence we do the same rotation on arguments as svdot_impl does. */
2404 e.rotate_inputs_left (0, 3);
2405 machine_mode mode = e.vector_mode (0);
752045ed 2406 insn_code icode = code_for_dot_prod (UNSPEC_USDOT, mode);
36696774
RS
2407 return e.use_exact_insn (icode);
2408 }
2409
2410private:
2411 bool m_su;
2412};
2413
624d0f07
RS
2414/* Implements svuzp1 and svuzp2. */
2415class svuzp_impl : public binary_permute
2416{
2417public:
f95d3d5d 2418 CONSTEXPR svuzp_impl (unsigned int base)
624d0f07
RS
2419 : binary_permute (base ? UNSPEC_UZP2 : UNSPEC_UZP1), m_base (base) {}
2420
2421 gimple *
ff171cb1 2422 fold (gimple_folder &f) const override
624d0f07
RS
2423 {
2424 /* svuzp1: { 0, 2, 4, 6, ... }
2425 svuzp2: { 1, 3, 5, 7, ... }. */
2426 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
2427 vec_perm_builder builder (nelts, 1, 3);
2428 for (unsigned int i = 0; i < 3; ++i)
2429 builder.quick_push (m_base + i * 2);
2430 return fold_permute (f, builder);
2431 }
2432
2433 /* 0 for svuzp1, 1 for svuzp2. */
2434 unsigned int m_base;
2435};
2436
2437/* A function_base for svwhilele and svwhilelt functions. */
0a09a948 2438class svwhilelx_impl : public while_comparison
624d0f07
RS
2439{
2440public:
f95d3d5d 2441 CONSTEXPR svwhilelx_impl (int unspec_for_sint, int unspec_for_uint, bool eq_p)
0a09a948 2442 : while_comparison (unspec_for_sint, unspec_for_uint), m_eq_p (eq_p)
624d0f07
RS
2443 {}
2444
2445 /* Try to fold a call by treating its arguments as constants of type T. */
2446 template<typename T>
2447 gimple *
2448 fold_type (gimple_folder &f) const
2449 {
2450 /* Only handle cases in which both operands are constant. */
2451 T arg0, arg1;
2452 if (!poly_int_tree_p (gimple_call_arg (f.call, 0), &arg0)
2453 || !poly_int_tree_p (gimple_call_arg (f.call, 1), &arg1))
2454 return NULL;
2455
2456 /* Check whether the result is known to be all-false. */
2457 if (m_eq_p ? known_gt (arg0, arg1) : known_ge (arg0, arg1))
2458 return f.fold_to_pfalse ();
2459
2460 /* Punt if we can't tell at compile time whether the result
2461 is all-false. */
2462 if (m_eq_p ? maybe_gt (arg0, arg1) : maybe_ge (arg0, arg1))
2463 return NULL;
2464
2465 /* At this point we know the result has at least one set element. */
2466 poly_uint64 diff = arg1 - arg0;
2467 poly_uint64 nelts = GET_MODE_NUNITS (f.vector_mode (0));
2468
2469 /* Canonicalize the svwhilele form to the svwhilelt form. Subtract
2470 from NELTS rather than adding to DIFF, to prevent overflow. */
2471 if (m_eq_p)
2472 nelts -= 1;
2473
2474 /* Check whether the result is known to be all-true. */
2475 if (known_ge (diff, nelts))
2476 return f.fold_to_ptrue ();
2477
2478 /* Punt if DIFF might not be the actual number of set elements
2479 in the result. Conditional equality is fine. */
2480 if (maybe_gt (diff, nelts))
2481 return NULL;
2482
2483 /* At this point we know that the predicate will have DIFF set elements
2484 for svwhilelt and DIFF + 1 set elements for svwhilele (which stops
2485 after rather than before ARG1 is reached). See if we can create
2486 the predicate at compile time. */
2487 unsigned HOST_WIDE_INT vl;
2488 if (diff.is_constant (&vl))
2489 /* Overflow is no longer possible after the checks above. */
2490 return f.fold_to_vl_pred (m_eq_p ? vl + 1 : vl);
2491
2492 return NULL;
2493 }
2494
2495 gimple *
ff171cb1 2496 fold (gimple_folder &f) const override
624d0f07
RS
2497 {
2498 if (f.type_suffix (1).unsigned_p)
2499 return fold_type<poly_uint64> (f);
2500 else
2501 return fold_type<poly_int64> (f);
2502 }
2503
624d0f07
RS
2504 /* True svwhilele, false for svwhilelt. */
2505 bool m_eq_p;
2506};
2507
2508class svwrffr_impl : public function_base
2509{
2510public:
2511 unsigned int
ff171cb1 2512 call_properties (const function_instance &) const override
624d0f07
RS
2513 {
2514 return CP_WRITE_FFR;
2515 }
2516
2517 rtx
ff171cb1 2518 expand (function_expander &e) const override
624d0f07
RS
2519 {
2520 return e.use_exact_insn (CODE_FOR_aarch64_wrffr);
2521 }
2522};
2523
2524/* Implements svzip1 and svzip2. */
2525class svzip_impl : public binary_permute
2526{
2527public:
f95d3d5d 2528 CONSTEXPR svzip_impl (unsigned int base)
624d0f07
RS
2529 : binary_permute (base ? UNSPEC_ZIP2 : UNSPEC_ZIP1), m_base (base) {}
2530
2531 gimple *
ff171cb1 2532 fold (gimple_folder &f) const override
624d0f07
RS
2533 {
2534 /* svzip1: { 0, nelts, 1, nelts + 1, 2, nelts + 2, ... }
2535 svzip2: as for svzip1, but with nelts / 2 added to each index. */
2536 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs));
2537 poly_uint64 base = m_base * exact_div (nelts, 2);
2538 vec_perm_builder builder (nelts, 2, 3);
2539 for (unsigned int i = 0; i < 3; ++i)
2540 {
2541 builder.quick_push (base + i);
2542 builder.quick_push (base + i + nelts);
2543 }
2544 return fold_permute (f, builder);
2545 }
2546
2547 /* 0 for svzip1, 1 for svzip2. */
2548 unsigned int m_base;
2549};
2550
2551} /* end anonymous namespace */
2552
2553namespace aarch64_sve {
2554
2555FUNCTION (svabd, svabd_impl,)
2556FUNCTION (svabs, quiet<rtx_code_function>, (ABS, ABS, UNSPEC_COND_FABS))
2557FUNCTION (svacge, svac_impl, (UNSPEC_COND_FCMGE))
2558FUNCTION (svacgt, svac_impl, (UNSPEC_COND_FCMGT))
2559FUNCTION (svacle, svac_impl, (UNSPEC_COND_FCMLE))
2560FUNCTION (svaclt, svac_impl, (UNSPEC_COND_FCMLT))
2561FUNCTION (svadd, rtx_code_function, (PLUS, PLUS, UNSPEC_COND_FADD))
2562FUNCTION (svadda, svadda_impl,)
2563FUNCTION (svaddv, reduction, (UNSPEC_SADDV, UNSPEC_UADDV, UNSPEC_FADDV))
2564FUNCTION (svadrb, svadr_bhwd_impl, (0))
2565FUNCTION (svadrd, svadr_bhwd_impl, (3))
2566FUNCTION (svadrh, svadr_bhwd_impl, (1))
2567FUNCTION (svadrw, svadr_bhwd_impl, (2))
2568FUNCTION (svand, rtx_code_function, (AND, AND))
2569FUNCTION (svandv, reduction, (UNSPEC_ANDV))
2570FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT))
2571FUNCTION (svasr_wide, shift_wide, (ASHIFTRT, UNSPEC_ASHIFTRT_WIDE))
0a09a948 2572FUNCTION (svasrd, unspec_based_function, (UNSPEC_ASRD, -1, -1))
896dff99
RS
2573FUNCTION (svbfdot, fixed_insn_function, (CODE_FOR_aarch64_sve_bfdotvnx4sf))
2574FUNCTION (svbfdot_lane, fixed_insn_function,
2575 (CODE_FOR_aarch64_sve_bfdot_lanevnx4sf))
2576FUNCTION (svbfmlalb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlalbvnx4sf))
2577FUNCTION (svbfmlalb_lane, fixed_insn_function,
2578 (CODE_FOR_aarch64_sve_bfmlalb_lanevnx4sf))
2579FUNCTION (svbfmlalt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlaltvnx4sf))
2580FUNCTION (svbfmlalt_lane, fixed_insn_function,
2581 (CODE_FOR_aarch64_sve_bfmlalt_lanevnx4sf))
2582FUNCTION (svbfmmla, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmmlavnx4sf))
624d0f07
RS
2583FUNCTION (svbic, svbic_impl,)
2584FUNCTION (svbrka, svbrk_unary_impl, (UNSPEC_BRKA))
2585FUNCTION (svbrkb, svbrk_unary_impl, (UNSPEC_BRKB))
2586FUNCTION (svbrkn, svbrk_binary_impl, (UNSPEC_BRKN))
2587FUNCTION (svbrkpa, svbrk_binary_impl, (UNSPEC_BRKPA))
2588FUNCTION (svbrkpb, svbrk_binary_impl, (UNSPEC_BRKPB))
2589FUNCTION (svcadd, svcadd_impl,)
2590FUNCTION (svclasta, svclast_impl, (UNSPEC_CLASTA))
2591FUNCTION (svclastb, svclast_impl, (UNSPEC_CLASTB))
2592FUNCTION (svcls, unary_count, (CLRSB))
2593FUNCTION (svclz, unary_count, (CLZ))
2594FUNCTION (svcmla, svcmla_impl,)
2595FUNCTION (svcmla_lane, svcmla_lane_impl,)
2596FUNCTION (svcmpeq, svcmp_impl, (EQ_EXPR, UNSPEC_COND_FCMEQ))
2597FUNCTION (svcmpeq_wide, svcmp_wide_impl, (EQ_EXPR, UNSPEC_COND_CMPEQ_WIDE,
2598 UNSPEC_COND_CMPEQ_WIDE))
2599FUNCTION (svcmpge, svcmp_impl, (GE_EXPR, UNSPEC_COND_FCMGE))
2600FUNCTION (svcmpge_wide, svcmp_wide_impl, (GE_EXPR, UNSPEC_COND_CMPGE_WIDE,
2601 UNSPEC_COND_CMPHS_WIDE))
2602FUNCTION (svcmpgt, svcmp_impl, (GT_EXPR, UNSPEC_COND_FCMGT))
2603FUNCTION (svcmpgt_wide, svcmp_wide_impl, (GT_EXPR, UNSPEC_COND_CMPGT_WIDE,
2604 UNSPEC_COND_CMPHI_WIDE))
2605FUNCTION (svcmple, svcmp_impl, (LE_EXPR, UNSPEC_COND_FCMLE))
2606FUNCTION (svcmple_wide, svcmp_wide_impl, (LE_EXPR, UNSPEC_COND_CMPLE_WIDE,
2607 UNSPEC_COND_CMPLS_WIDE))
2608FUNCTION (svcmplt, svcmp_impl, (LT_EXPR, UNSPEC_COND_FCMLT))
2609FUNCTION (svcmplt_wide, svcmp_wide_impl, (LT_EXPR, UNSPEC_COND_CMPLT_WIDE,
2610 UNSPEC_COND_CMPLO_WIDE))
2611FUNCTION (svcmpne, svcmp_impl, (NE_EXPR, UNSPEC_COND_FCMNE))
2612FUNCTION (svcmpne_wide, svcmp_wide_impl, (NE_EXPR, UNSPEC_COND_CMPNE_WIDE,
2613 UNSPEC_COND_CMPNE_WIDE))
2614FUNCTION (svcmpuo, svcmpuo_impl,)
2615FUNCTION (svcnot, svcnot_impl,)
2616FUNCTION (svcnt, unary_count, (POPCOUNT))
2617FUNCTION (svcntb, svcnt_bhwd_impl, (VNx16QImode))
2618FUNCTION (svcntb_pat, svcnt_bhwd_pat_impl, (VNx16QImode))
2619FUNCTION (svcntd, svcnt_bhwd_impl, (VNx2DImode))
2620FUNCTION (svcntd_pat, svcnt_bhwd_pat_impl, (VNx2DImode))
2621FUNCTION (svcnth, svcnt_bhwd_impl, (VNx8HImode))
2622FUNCTION (svcnth_pat, svcnt_bhwd_pat_impl, (VNx8HImode))
2623FUNCTION (svcntp, svcntp_impl,)
2624FUNCTION (svcntw, svcnt_bhwd_impl, (VNx4SImode))
2625FUNCTION (svcntw_pat, svcnt_bhwd_pat_impl, (VNx4SImode))
99a3b915 2626FUNCTION (svcompact, QUIET_CODE_FOR_MODE0 (aarch64_sve_compact),)
624d0f07
RS
2627FUNCTION (svcreate2, svcreate_impl, (2))
2628FUNCTION (svcreate3, svcreate_impl, (3))
2629FUNCTION (svcreate4, svcreate_impl, (4))
2630FUNCTION (svcvt, svcvt_impl,)
896dff99 2631FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),)
624d0f07
RS
2632FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV))
2633FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
2634FUNCTION (svdot, svdot_impl,)
36696774 2635FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, -1))
624d0f07
RS
2636FUNCTION (svdup, svdup_impl,)
2637FUNCTION (svdup_lane, svdup_lane_impl,)
2638FUNCTION (svdupq, svdupq_impl,)
2639FUNCTION (svdupq_lane, svdupq_lane_impl,)
2640FUNCTION (sveor, rtx_code_function, (XOR, XOR, -1))
2641FUNCTION (sveorv, reduction, (UNSPEC_XORV))
2642FUNCTION (svexpa, unspec_based_function, (-1, -1, UNSPEC_FEXPA))
99a3b915 2643FUNCTION (svext, QUIET_CODE_FOR_MODE0 (aarch64_sve_ext),)
624d0f07
RS
2644FUNCTION (svextb, svext_bhw_impl, (QImode))
2645FUNCTION (svexth, svext_bhw_impl, (HImode))
2646FUNCTION (svextw, svext_bhw_impl, (SImode))
2647FUNCTION (svget2, svget_impl, (2))
2648FUNCTION (svget3, svget_impl, (3))
2649FUNCTION (svget4, svget_impl, (4))
2650FUNCTION (svindex, svindex_impl,)
2651FUNCTION (svinsr, svinsr_impl,)
2652FUNCTION (svlasta, svlast_impl, (UNSPEC_LASTA))
2653FUNCTION (svlastb, svlast_impl, (UNSPEC_LASTB))
2654FUNCTION (svld1, svld1_impl,)
2655FUNCTION (svld1_gather, svld1_gather_impl,)
9ceec73f 2656FUNCTION (svld1ro, svld1ro_impl,)
624d0f07
RS
2657FUNCTION (svld1rq, svld1rq_impl,)
2658FUNCTION (svld1sb, svld1_extend_impl, (TYPE_SUFFIX_s8))
2659FUNCTION (svld1sb_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s8))
2660FUNCTION (svld1sh, svld1_extend_impl, (TYPE_SUFFIX_s16))
2661FUNCTION (svld1sh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s16))
2662FUNCTION (svld1sw, svld1_extend_impl, (TYPE_SUFFIX_s32))
2663FUNCTION (svld1sw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s32))
2664FUNCTION (svld1ub, svld1_extend_impl, (TYPE_SUFFIX_u8))
2665FUNCTION (svld1ub_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u8))
2666FUNCTION (svld1uh, svld1_extend_impl, (TYPE_SUFFIX_u16))
2667FUNCTION (svld1uh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u16))
2668FUNCTION (svld1uw, svld1_extend_impl, (TYPE_SUFFIX_u32))
2669FUNCTION (svld1uw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u32))
2670FUNCTION (svld2, svld234_impl, (2))
2671FUNCTION (svld3, svld234_impl, (3))
2672FUNCTION (svld4, svld234_impl, (4))
2673FUNCTION (svldff1, svldxf1_impl, (UNSPEC_LDFF1))
2674FUNCTION (svldff1_gather, svldff1_gather_impl,)
2675FUNCTION (svldff1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDFF1))
2676FUNCTION (svldff1sb_gather, svldff1_gather_extend, (TYPE_SUFFIX_s8))
2677FUNCTION (svldff1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDFF1))
2678FUNCTION (svldff1sh_gather, svldff1_gather_extend, (TYPE_SUFFIX_s16))
2679FUNCTION (svldff1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDFF1))
2680FUNCTION (svldff1sw_gather, svldff1_gather_extend, (TYPE_SUFFIX_s32))
2681FUNCTION (svldff1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDFF1))
2682FUNCTION (svldff1ub_gather, svldff1_gather_extend, (TYPE_SUFFIX_u8))
2683FUNCTION (svldff1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDFF1))
2684FUNCTION (svldff1uh_gather, svldff1_gather_extend, (TYPE_SUFFIX_u16))
2685FUNCTION (svldff1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDFF1))
2686FUNCTION (svldff1uw_gather, svldff1_gather_extend, (TYPE_SUFFIX_u32))
2687FUNCTION (svldnf1, svldxf1_impl, (UNSPEC_LDNF1))
2688FUNCTION (svldnf1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDNF1))
2689FUNCTION (svldnf1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDNF1))
2690FUNCTION (svldnf1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDNF1))
2691FUNCTION (svldnf1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDNF1))
2692FUNCTION (svldnf1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDNF1))
2693FUNCTION (svldnf1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDNF1))
2694FUNCTION (svldnt1, svldnt1_impl,)
2695FUNCTION (svlen, svlen_impl,)
2696FUNCTION (svlsl, rtx_code_function, (ASHIFT, ASHIFT))
2697FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE))
2698FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT))
2699FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE))
2700FUNCTION (svmad, svmad_impl,)
2701FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX))
2702FUNCTION (svmaxnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMAXNM))
2703FUNCTION (svmaxnmv, reduction, (UNSPEC_FMAXNMV))
2704FUNCTION (svmaxv, reduction, (UNSPEC_SMAXV, UNSPEC_UMAXV, UNSPEC_FMAXV))
2705FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN))
2706FUNCTION (svminnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMINNM))
2707FUNCTION (svminnmv, reduction, (UNSPEC_FMINNMV))
2708FUNCTION (svminv, reduction, (UNSPEC_SMINV, UNSPEC_UMINV, UNSPEC_FMINV))
2709FUNCTION (svmla, svmla_impl,)
0a09a948 2710FUNCTION (svmla_lane, svmla_lane_impl,)
624d0f07 2711FUNCTION (svmls, svmls_impl,)
0a09a948 2712FUNCTION (svmls_lane, svmls_lane_impl,)
36696774 2713FUNCTION (svmmla, svmmla_impl,)
624d0f07
RS
2714FUNCTION (svmov, svmov_impl,)
2715FUNCTION (svmsb, svmsb_impl,)
2716FUNCTION (svmul, rtx_code_function, (MULT, MULT, UNSPEC_COND_FMUL))
99a3b915 2717FUNCTION (svmul_lane, CODE_FOR_MODE0 (aarch64_mul_lane),)
624d0f07
RS
2718FUNCTION (svmulh, unspec_based_function, (UNSPEC_SMUL_HIGHPART,
2719 UNSPEC_UMUL_HIGHPART, -1))
2720FUNCTION (svmulx, unspec_based_function, (-1, -1, UNSPEC_COND_FMULX))
2721FUNCTION (svnand, svnand_impl,)
2722FUNCTION (svneg, quiet<rtx_code_function>, (NEG, NEG, UNSPEC_COND_FNEG))
2723FUNCTION (svnmad, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLA))
2724FUNCTION (svnmla, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLA))
2725FUNCTION (svnmls, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLS))
2726FUNCTION (svnmsb, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLS))
2727FUNCTION (svnor, svnor_impl,)
2728FUNCTION (svnot, svnot_impl,)
2729FUNCTION (svorn, svorn_impl,)
2730FUNCTION (svorr, rtx_code_function, (IOR, IOR))
2731FUNCTION (svorv, reduction, (UNSPEC_IORV))
2732FUNCTION (svpfalse, svpfalse_impl,)
2733FUNCTION (svpfirst, svpfirst_svpnext_impl, (UNSPEC_PFIRST))
2734FUNCTION (svpnext, svpfirst_svpnext_impl, (UNSPEC_PNEXT))
2735FUNCTION (svprfb, svprf_bhwd_impl, (VNx16QImode))
2736FUNCTION (svprfb_gather, svprf_bhwd_gather_impl, (VNx16QImode))
2737FUNCTION (svprfd, svprf_bhwd_impl, (VNx2DImode))
2738FUNCTION (svprfd_gather, svprf_bhwd_gather_impl, (VNx2DImode))
2739FUNCTION (svprfh, svprf_bhwd_impl, (VNx8HImode))
2740FUNCTION (svprfh_gather, svprf_bhwd_gather_impl, (VNx8HImode))
2741FUNCTION (svprfw, svprf_bhwd_impl, (VNx4SImode))
2742FUNCTION (svprfw_gather, svprf_bhwd_gather_impl, (VNx4SImode))
2743FUNCTION (svptest_any, svptest_impl, (NE))
2744FUNCTION (svptest_first, svptest_impl, (LT))
2745FUNCTION (svptest_last, svptest_impl, (LTU))
2746FUNCTION (svptrue, svptrue_impl,)
2747FUNCTION (svptrue_pat, svptrue_pat_impl,)
694e6b19 2748FUNCTION (svqadd, rtx_code_function, (SS_PLUS, US_PLUS, -1))
624d0f07
RS
2749FUNCTION (svqdecb, svqdec_bhwd_impl, (QImode))
2750FUNCTION (svqdecb_pat, svqdec_bhwd_impl, (QImode))
2751FUNCTION (svqdecd, svqdec_bhwd_impl, (DImode))
2752FUNCTION (svqdecd_pat, svqdec_bhwd_impl, (DImode))
2753FUNCTION (svqdech, svqdec_bhwd_impl, (HImode))
2754FUNCTION (svqdech_pat, svqdec_bhwd_impl, (HImode))
2755FUNCTION (svqdecp, svqdecp_svqincp_impl, (SS_MINUS, US_MINUS))
2756FUNCTION (svqdecw, svqdec_bhwd_impl, (SImode))
2757FUNCTION (svqdecw_pat, svqdec_bhwd_impl, (SImode))
2758FUNCTION (svqincb, svqinc_bhwd_impl, (QImode))
2759FUNCTION (svqincb_pat, svqinc_bhwd_impl, (QImode))
2760FUNCTION (svqincd, svqinc_bhwd_impl, (DImode))
2761FUNCTION (svqincd_pat, svqinc_bhwd_impl, (DImode))
2762FUNCTION (svqinch, svqinc_bhwd_impl, (HImode))
2763FUNCTION (svqinch_pat, svqinc_bhwd_impl, (HImode))
2764FUNCTION (svqincp, svqdecp_svqincp_impl, (SS_PLUS, US_PLUS))
2765FUNCTION (svqincw, svqinc_bhwd_impl, (SImode))
2766FUNCTION (svqincw_pat, svqinc_bhwd_impl, (SImode))
694e6b19 2767FUNCTION (svqsub, rtx_code_function, (SS_MINUS, US_MINUS, -1))
624d0f07
RS
2768FUNCTION (svrbit, unspec_based_function, (UNSPEC_RBIT, UNSPEC_RBIT, -1))
2769FUNCTION (svrdffr, svrdffr_impl,)
0a09a948 2770FUNCTION (svrecpe, unspec_based_function, (-1, UNSPEC_URECPE, UNSPEC_FRECPE))
624d0f07
RS
2771FUNCTION (svrecps, unspec_based_function, (-1, -1, UNSPEC_FRECPS))
2772FUNCTION (svrecpx, unspec_based_function, (-1, -1, UNSPEC_COND_FRECPX))
2773FUNCTION (svreinterpret, svreinterpret_impl,)
2774FUNCTION (svrev, svrev_impl,)
2775FUNCTION (svrevb, unspec_based_function, (UNSPEC_REVB, UNSPEC_REVB, -1))
2776FUNCTION (svrevh, unspec_based_function, (UNSPEC_REVH, UNSPEC_REVH, -1))
2777FUNCTION (svrevw, unspec_based_function, (UNSPEC_REVW, UNSPEC_REVW, -1))
2778FUNCTION (svrinta, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTA))
2779FUNCTION (svrinti, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTI))
2780FUNCTION (svrintm, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTM))
2781FUNCTION (svrintn, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTN))
2782FUNCTION (svrintp, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTP))
2783FUNCTION (svrintx, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTX))
2784FUNCTION (svrintz, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTZ))
0a09a948 2785FUNCTION (svrsqrte, unspec_based_function, (-1, UNSPEC_RSQRTE, UNSPEC_RSQRTE))
624d0f07
RS
2786FUNCTION (svrsqrts, unspec_based_function, (-1, -1, UNSPEC_RSQRTS))
2787FUNCTION (svscale, unspec_based_function, (-1, -1, UNSPEC_COND_FSCALE))
2788FUNCTION (svsel, svsel_impl,)
2789FUNCTION (svset2, svset_impl, (2))
2790FUNCTION (svset3, svset_impl, (3))
2791FUNCTION (svset4, svset_impl, (4))
2792FUNCTION (svsetffr, svsetffr_impl,)
99a3b915 2793FUNCTION (svsplice, QUIET_CODE_FOR_MODE0 (aarch64_sve_splice),)
624d0f07
RS
2794FUNCTION (svsqrt, rtx_code_function, (SQRT, SQRT, UNSPEC_COND_FSQRT))
2795FUNCTION (svst1, svst1_impl,)
2796FUNCTION (svst1_scatter, svst1_scatter_impl,)
2797FUNCTION (svst1b, svst1_truncate_impl, (QImode))
2798FUNCTION (svst1b_scatter, svst1_scatter_truncate_impl, (QImode))
2799FUNCTION (svst1h, svst1_truncate_impl, (HImode))
2800FUNCTION (svst1h_scatter, svst1_scatter_truncate_impl, (HImode))
2801FUNCTION (svst1w, svst1_truncate_impl, (SImode))
2802FUNCTION (svst1w_scatter, svst1_scatter_truncate_impl, (SImode))
2803FUNCTION (svst2, svst234_impl, (2))
2804FUNCTION (svst3, svst234_impl, (3))
2805FUNCTION (svst4, svst234_impl, (4))
2806FUNCTION (svstnt1, svstnt1_impl,)
2807FUNCTION (svsub, svsub_impl,)
2808FUNCTION (svsubr, rtx_code_function_rotated, (MINUS, MINUS, UNSPEC_COND_FSUB))
36696774
RS
2809FUNCTION (svsudot, svusdot_impl, (true))
2810FUNCTION (svsudot_lane, svdotprod_lane_impl, (UNSPEC_SUDOT, -1, -1))
624d0f07 2811FUNCTION (svtbl, svtbl_impl,)
99a3b915 2812FUNCTION (svtmad, CODE_FOR_MODE0 (aarch64_sve_tmad),)
624d0f07 2813FUNCTION (svtrn1, svtrn_impl, (0))
36696774
RS
2814FUNCTION (svtrn1q, unspec_based_function, (UNSPEC_TRN1Q, UNSPEC_TRN1Q,
2815 UNSPEC_TRN1Q))
624d0f07 2816FUNCTION (svtrn2, svtrn_impl, (1))
36696774
RS
2817FUNCTION (svtrn2q, unspec_based_function, (UNSPEC_TRN2Q, UNSPEC_TRN2Q,
2818 UNSPEC_TRN2Q))
624d0f07
RS
2819FUNCTION (svtsmul, unspec_based_function, (-1, -1, UNSPEC_FTSMUL))
2820FUNCTION (svtssel, unspec_based_function, (-1, -1, UNSPEC_FTSSEL))
2821FUNCTION (svundef, svundef_impl, (1))
2822FUNCTION (svundef2, svundef_impl, (2))
2823FUNCTION (svundef3, svundef_impl, (3))
2824FUNCTION (svundef4, svundef_impl, (4))
2825FUNCTION (svunpkhi, svunpk_impl, (true))
2826FUNCTION (svunpklo, svunpk_impl, (false))
36696774
RS
2827FUNCTION (svusdot, svusdot_impl, (false))
2828FUNCTION (svusdot_lane, svdotprod_lane_impl, (UNSPEC_USDOT, -1, -1))
2829FUNCTION (svusmmla, unspec_based_add_function, (UNSPEC_USMATMUL, -1, -1))
624d0f07 2830FUNCTION (svuzp1, svuzp_impl, (0))
36696774
RS
2831FUNCTION (svuzp1q, unspec_based_function, (UNSPEC_UZP1Q, UNSPEC_UZP1Q,
2832 UNSPEC_UZP1Q))
624d0f07 2833FUNCTION (svuzp2, svuzp_impl, (1))
36696774
RS
2834FUNCTION (svuzp2q, unspec_based_function, (UNSPEC_UZP2Q, UNSPEC_UZP2Q,
2835 UNSPEC_UZP2Q))
0a09a948
RS
2836FUNCTION (svwhilele, svwhilelx_impl, (UNSPEC_WHILELE, UNSPEC_WHILELS, true))
2837FUNCTION (svwhilelt, svwhilelx_impl, (UNSPEC_WHILELT, UNSPEC_WHILELO, false))
624d0f07
RS
2838FUNCTION (svwrffr, svwrffr_impl,)
2839FUNCTION (svzip1, svzip_impl, (0))
36696774
RS
2840FUNCTION (svzip1q, unspec_based_function, (UNSPEC_ZIP1Q, UNSPEC_ZIP1Q,
2841 UNSPEC_ZIP1Q))
624d0f07 2842FUNCTION (svzip2, svzip_impl, (1))
36696774
RS
2843FUNCTION (svzip2q, unspec_based_function, (UNSPEC_ZIP2Q, UNSPEC_ZIP2Q,
2844 UNSPEC_ZIP2Q))
624d0f07
RS
2845
2846} /* end namespace aarch64_sve */