]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64-sve.md
[AArch64] Canonicalise SVE predicate constants
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-sve.md
CommitLineData
43cacb12 1;; Machine description for AArch64 SVE.
a5544970 2;; Copyright (C) 2009-2019 Free Software Foundation, Inc.
43cacb12
RS
3;; Contributed by ARM Ltd.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3. If not see
19;; <http://www.gnu.org/licenses/>.
20
915d28fe
RS
21;; The file is organised into the following sections (search for the full
22;; line):
23;;
24;; == General notes
25;; ---- Note on the handling of big-endian SVE
34467289 26;; ---- Description of UNSPEC_PTEST
915d28fe
RS
27;;
28;; == Moves
29;; ---- Moves of single vectors
30;; ---- Moves of multiple vectors
31;; ---- Moves of predicates
32;;
33;; == Loads
34;; ---- Normal contiguous loads
35;; ---- Normal gather loads
36;;
37;; == Stores
38;; ---- Normal contiguous stores
39;; ---- Normal scatter stores
40;;
41;; == Vector creation
42;; ---- [INT,FP] Duplicate element
43;; ---- [INT,FP] Initialize from individual elements
44;; ---- [INT] Linear series
45;; ---- [PRED] Duplicate element
46;;
47;; == Vector decomposition
48;; ---- [INT,FP] Extract index
49;; ---- [INT,FP] Extract active element
50;; ---- [PRED] Extract index
51;;
52;; == Unary arithmetic
53;; ---- [INT] General unary arithmetic corresponding to rtx codes
d45b20a5 54;; ---- [FP] General unary arithmetic corresponding to unspecs
915d28fe
RS
55;; ---- [PRED] Inverse
56
57;; == Binary arithmetic
58;; ---- [INT] General binary arithmetic corresponding to rtx codes
59;; ---- [INT] Addition
60;; ---- [INT] Subtraction
61;; ---- [INT] Absolute difference
62;; ---- [INT] Multiplication
63;; ---- [INT] Highpart multiplication
64;; ---- [INT] Division
65;; ---- [INT] Binary logical operations
66;; ---- [INT] Binary logical operations (inverted second input)
67;; ---- [INT] Shifts
68;; ---- [INT] Maximum and minimum
69;; ---- [FP] General binary arithmetic corresponding to rtx codes
70;; ---- [FP] General binary arithmetic corresponding to unspecs
71;; ---- [FP] Addition
72;; ---- [FP] Subtraction
73;; ---- [FP] Absolute difference
74;; ---- [FP] Multiplication
75;; ---- [FP] Division
76;; ---- [FP] Binary logical operations
77;; ---- [FP] Sign copying
78;; ---- [FP] Maximum and minimum
79;; ---- [PRED] Binary logical operations
80;; ---- [PRED] Binary logical operations (inverted second input)
81;; ---- [PRED] Binary logical operations (inverted result)
82;;
83;; == Ternary arithmetic
84;; ---- [INT] MLA and MAD
85;; ---- [INT] MLS and MSB
86;; ---- [INT] Dot product
87;; ---- [INT] Sum of absolute differences
88;; ---- [FP] General ternary arithmetic corresponding to unspecs
915d28fe
RS
89;;
90;; == Comparisons and selects
91;; ---- [INT,FP] Select based on predicates
92;; ---- [INT,FP] Compare and select
93;; ---- [INT] Comparisons
94;; ---- [INT] While tests
95;; ---- [FP] Comparisons
96;; ---- [PRED] Test bits
97;;
98;; == Reductions
99;; ---- [INT,FP] Conditional reductions
100;; ---- [INT] Tree reductions
101;; ---- [FP] Tree reductions
102;; ---- [FP] Left-to-right reductions
103;;
104;; == Permutes
105;; ---- [INT,FP] General permutes
106;; ---- [INT,FP] Special-purpose unary permutes
107;; ---- [INT,FP] Special-purpose binary permutes
108;; ---- [PRED] Special-purpose binary permutes
109;;
110;; == Conversions
111;; ---- [INT<-INT] Packs
112;; ---- [INT<-INT] Unpacks
113;; ---- [INT<-FP] Conversions
114;; ---- [INT<-FP] Packs
115;; ---- [INT<-FP] Unpacks
116;; ---- [FP<-INT] Conversions
117;; ---- [FP<-INT] Packs
118;; ---- [FP<-INT] Unpacks
119;; ---- [FP<-FP] Packs
120;; ---- [FP<-FP] Unpacks
121;; ---- [PRED<-PRED] Packs
122;; ---- [PRED<-PRED] Unpacks
123
124;; =========================================================================
125;; == General notes
126;; =========================================================================
127;;
128;; -------------------------------------------------------------------------
129;; ---- Note on the handling of big-endian SVE
130;; -------------------------------------------------------------------------
43cacb12
RS
131;;
132;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
133;; same way as movdi or movti would: the first byte of memory goes
134;; into the most significant byte of the register and the last byte
135;; of memory goes into the least significant byte of the register.
136;; This is the most natural ordering for Advanced SIMD and matches
137;; the ABI layout for 64-bit and 128-bit vector types.
138;;
139;; As a result, the order of bytes within the register is what GCC
140;; expects for a big-endian target, and subreg offsets therefore work
141;; as expected, with the first element in memory having subreg offset 0
142;; and the last element in memory having the subreg offset associated
143;; with a big-endian lowpart. However, this ordering also means that
144;; GCC's lane numbering does not match the architecture's numbering:
145;; GCC always treats the element at the lowest address in memory
146;; (subreg offset 0) as element 0, while the architecture treats
147;; the least significant end of the register as element 0.
148;;
149;; The situation for SVE is different. We want the layout of the
150;; SVE register to be same for mov<mode> as it is for maskload<mode>:
151;; logically, a mov<mode> load must be indistinguishable from a
152;; maskload<mode> whose mask is all true. We therefore need the
153;; register layout to match LD1 rather than LDR. The ABI layout of
154;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
155;;
156;; As a result, the architecture lane numbering matches GCC's lane
157;; numbering, with element 0 always being the first in memory.
158;; However:
159;;
160;; - Applying a subreg offset to a register does not give the element
161;; that GCC expects: the first element in memory has the subreg offset
162;; associated with a big-endian lowpart while the last element in memory
163;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
164;;
165;; - We cannot use LDR and STR for spill slots that might be accessed
166;; via subregs, since although the elements have the order GCC expects,
167;; the order of the bytes within the elements is different. We instead
168;; access spill slots via LD1 and ST1, using secondary reloads to
169;; reserve a predicate register.
34467289
RS
170;;
171;; -------------------------------------------------------------------------
172;; ---- Description of UNSPEC_PTEST
173;; -------------------------------------------------------------------------
174;;
175;; SVE provides a PTEST instruction for testing the active lanes of a
176;; predicate and setting the flags based on the result. The associated
177;; condition code tests are:
178;;
179;; - any (= ne): at least one active bit is set
180;; - none (= eq): all active bits are clear (*)
181;; - first (= mi): the first active bit is set
182;; - nfrst (= pl): the first active bit is clear (*)
183;; - last (= cc): the last active bit is set
184;; - nlast (= cs): the last active bit is clear (*)
185;;
186;; where the conditions marked (*) are also true when there are no active
187;; lanes (i.e. when the governing predicate is a PFALSE). The flags results
188;; of a PTEST use the condition code mode CC_NZC.
189;;
190;; PTEST is always a .B operation (i.e. it always operates on VNx16BI).
191;; This means that for other predicate modes, we need a governing predicate
192;; in which all bits are defined.
193;;
194;; For example, most predicated .H operations ignore the odd bits of the
195;; governing predicate, so that an active lane is represented by the
196;; bits "1x" and an inactive lane by the bits "0x", where "x" can be
197;; any value. To test a .H predicate, we instead need "10" and "00"
198;; respectively, so that the condition only tests the even bits of the
199;; predicate.
200;;
201;; Several instructions set the flags as a side-effect, in the same way
202;; that a separate PTEST would. It's important for code quality that we
203;; use these flags results as often as possible, particularly in the case
204;; of WHILE* and RDFFR.
205;;
206;; Also, some of the instructions that set the flags are unpredicated
207;; and instead implicitly test all .B, .H, .S or .D elements, as though
208;; they were predicated on a PTRUE of that size. For example, a .S
209;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE
210;; would.
211;;
212;; We therefore need to represent PTEST operations in a way that
213;; makes it easy to combine them with both predicated and unpredicated
214;; operations, while using a VNx16BI governing predicate for all
215;; predicate modes. We do this using:
216;;
217;; (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST)
218;;
219;; where:
220;;
221;; - GP is the real VNx16BI governing predicate
222;;
223;; - CAST_GP is GP cast to the mode of OP. All bits dropped by casting
224;; GP to CAST_GP are guaranteed to be clear in GP.
225;;
226;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
227;; SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and
228;; SVE_MAYBE_NOT_PTRUE otherwise.
229;;
230;; - OP is the predicate we want to test, of the same mode as CAST_GP.
43cacb12 231
915d28fe
RS
232;; =========================================================================
233;; == Moves
234;; =========================================================================
235
236;; -------------------------------------------------------------------------
237;; ---- Moves of single vectors
238;; -------------------------------------------------------------------------
239;; Includes:
240;; - MOV (including aliases)
241;; - LD1B (contiguous form)
242;; - LD1D ( " " )
243;; - LD1H ( " " )
244;; - LD1W ( " " )
245;; - LDR
246;; - ST1B (contiguous form)
247;; - ST1D ( " " )
248;; - ST1H ( " " )
249;; - ST1W ( " " )
250;; - STR
251;; -------------------------------------------------------------------------
252
43cacb12
RS
253(define_expand "mov<mode>"
254 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
255 (match_operand:SVE_ALL 1 "general_operand"))]
256 "TARGET_SVE"
257 {
258 /* Use the predicated load and store patterns where possible.
259 This is required for big-endian targets (see the comment at the
260 head of the file) and increases the addressing choices for
261 little-endian. */
262 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
ea403d8b 263 && can_create_pseudo_p ())
43cacb12
RS
264 {
265 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
266 DONE;
267 }
268
269 if (CONSTANT_P (operands[1]))
270 {
4aeb1ba7 271 aarch64_expand_mov_immediate (operands[0], operands[1]);
43cacb12
RS
272 DONE;
273 }
002092be
RS
274
275 /* Optimize subregs on big-endian targets: we can use REV[BHW]
276 instead of going through memory. */
277 if (BYTES_BIG_ENDIAN
ea403d8b 278 && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
002092be
RS
279 DONE;
280 }
281)
282
915d28fe
RS
283(define_expand "movmisalign<mode>"
284 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
285 (match_operand:SVE_ALL 1 "general_operand"))]
286 "TARGET_SVE"
002092be 287 {
915d28fe
RS
288 /* Equivalent to a normal move for our purpooses. */
289 emit_move_insn (operands[0], operands[1]);
002092be 290 DONE;
43cacb12
RS
291 }
292)
293
294;; Unpredicated moves (little-endian). Only allow memory operations
295;; during and after RA; before RA we want the predicated load and
296;; store patterns to be used instead.
297(define_insn "*aarch64_sve_mov<mode>_le"
298 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
299 (match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
300 "TARGET_SVE
301 && !BYTES_BIG_ENDIAN
302 && ((lra_in_progress || reload_completed)
303 || (register_operand (operands[0], <MODE>mode)
304 && nonmemory_operand (operands[1], <MODE>mode)))"
305 "@
306 ldr\t%0, %1
307 str\t%1, %0
308 mov\t%0.d, %1.d
309 * return aarch64_output_sve_mov_immediate (operands[1]);"
310)
311
312;; Unpredicated moves (big-endian). Memory accesses require secondary
313;; reloads.
314(define_insn "*aarch64_sve_mov<mode>_be"
315 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
316 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
317 "TARGET_SVE && BYTES_BIG_ENDIAN"
318 "@
319 mov\t%0.d, %1.d
320 * return aarch64_output_sve_mov_immediate (operands[1]);"
321)
322
323;; Handle big-endian memory reloads. We use byte PTRUE for all modes
324;; to try to encourage reuse.
1bbffb87 325;; This pattern needs constraints due to TARGET_SECONDARY_RELOAD hook.
43cacb12
RS
326(define_expand "aarch64_sve_reload_be"
327 [(parallel
328 [(set (match_operand 0)
ea403d8b 329 (match_operand 1))
43cacb12
RS
330 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
331 "TARGET_SVE && BYTES_BIG_ENDIAN"
332 {
333 /* Create a PTRUE. */
334 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
335
336 /* Refer to the PTRUE in the appropriate mode for this move. */
337 machine_mode mode = GET_MODE (operands[0]);
338 machine_mode pred_mode
339 = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require ();
340 rtx pred = gen_lowpart (pred_mode, operands[2]);
341
342 /* Emit a predicated load or store. */
343 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
344 DONE;
345 }
346)
347
915d28fe
RS
348;; A predicated move in which the predicate is known to be all-true.
349;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
350;; so changes to this pattern will need changes there as well.
0c63a8ee 351(define_insn_and_split "@aarch64_pred_mov<mode>"
9c6b4601 352 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
43cacb12 353 (unspec:SVE_ALL
9c6b4601
RS
354 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
355 (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")]
43cacb12
RS
356 UNSPEC_MERGE_PTRUE))]
357 "TARGET_SVE
358 && (register_operand (operands[0], <MODE>mode)
359 || register_operand (operands[2], <MODE>mode))"
360 "@
9c6b4601 361 #
43cacb12
RS
362 ld1<Vesize>\t%0.<Vetype>, %1/z, %2
363 st1<Vesize>\t%2.<Vetype>, %1, %0"
9c6b4601
RS
364 "&& register_operand (operands[0], <MODE>mode)
365 && register_operand (operands[2], <MODE>mode)"
366 [(set (match_dup 0) (match_dup 2))]
43cacb12
RS
367)
368
915d28fe
RS
369;; A pattern for optimizing SUBREGs that have a reinterpreting effect
370;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
371;; for details. We use a special predicate for operand 2 to reduce
372;; the number of patterns.
373(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
374 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
43cacb12 375 (unspec:SVE_ALL
915d28fe
RS
376 [(match_operand:VNx16BI 1 "register_operand" "Upl")
377 (match_operand 2 "aarch64_any_register_operand" "w")]
378 UNSPEC_REV_SUBREG))]
379 "TARGET_SVE && BYTES_BIG_ENDIAN"
380 "#"
381 "&& reload_completed"
382 [(const_int 0)]
f307441a 383 {
915d28fe
RS
384 aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
385 DONE;
f307441a
RS
386 }
387)
388
4aeb1ba7
RS
389;; Reinterpret operand 1 in operand 0's mode, without changing its contents.
390;; This is equivalent to a subreg on little-endian targets but not for
391;; big-endian; see the comment at the head of the file for details.
392(define_expand "@aarch64_sve_reinterpret<mode>"
393 [(set (match_operand:SVE_ALL 0 "register_operand")
394 (unspec:SVE_ALL [(match_operand 1 "aarch64_any_register_operand")]
395 UNSPEC_REINTERPRET))]
396 "TARGET_SVE"
397 {
398 if (!BYTES_BIG_ENDIAN)
399 {
400 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1]));
401 DONE;
402 }
403 }
404)
405
406;; A pattern for handling type punning on big-endian targets. We use a
407;; special predicate for operand 1 to reduce the number of patterns.
408(define_insn_and_split "*aarch64_sve_reinterpret<mode>"
409 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
410 (unspec:SVE_ALL [(match_operand 1 "aarch64_any_register_operand" "0")]
411 UNSPEC_REINTERPRET))]
412 "TARGET_SVE"
413 "#"
414 "&& reload_completed"
415 [(set (match_dup 0) (match_dup 1))]
416 {
417 emit_note (NOTE_INSN_DELETED);
418 DONE;
419 }
420)
421
915d28fe
RS
422;; -------------------------------------------------------------------------
423;; ---- Moves of multiple vectors
424;; -------------------------------------------------------------------------
425;; All patterns in this section are synthetic and split to real
426;; instructions after reload.
427;; -------------------------------------------------------------------------
f307441a 428
9f4cbab8
RS
429(define_expand "mov<mode>"
430 [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
431 (match_operand:SVE_STRUCT 1 "general_operand"))]
432 "TARGET_SVE"
433 {
434 /* Big-endian loads and stores need to be done via LD1 and ST1;
435 see the comment at the head of the file for details. */
436 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
437 && BYTES_BIG_ENDIAN)
438 {
439 gcc_assert (can_create_pseudo_p ());
440 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
441 DONE;
442 }
443
444 if (CONSTANT_P (operands[1]))
445 {
446 aarch64_expand_mov_immediate (operands[0], operands[1]);
447 DONE;
448 }
449 }
450)
451
452;; Unpredicated structure moves (little-endian).
453(define_insn "*aarch64_sve_mov<mode>_le"
454 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
455 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
456 "TARGET_SVE && !BYTES_BIG_ENDIAN"
457 "#"
458 [(set_attr "length" "<insn_length>")]
459)
460
461;; Unpredicated structure moves (big-endian). Memory accesses require
462;; secondary reloads.
915d28fe 463(define_insn "*aarch64_sve_mov<mode>_be"
9f4cbab8
RS
464 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
465 (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
466 "TARGET_SVE && BYTES_BIG_ENDIAN"
467 "#"
468 [(set_attr "length" "<insn_length>")]
469)
470
471;; Split unpredicated structure moves into pieces. This is the same
472;; for both big-endian and little-endian code, although it only needs
473;; to handle memory operands for little-endian code.
474(define_split
475 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
476 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
477 "TARGET_SVE && reload_completed"
478 [(const_int 0)]
479 {
480 rtx dest = operands[0];
481 rtx src = operands[1];
482 if (REG_P (dest) && REG_P (src))
483 aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
484 else
485 for (unsigned int i = 0; i < <vector_count>; ++i)
486 {
487 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
488 i * BYTES_PER_SVE_VECTOR);
489 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
490 i * BYTES_PER_SVE_VECTOR);
491 emit_insn (gen_rtx_SET (subdest, subsrc));
492 }
493 DONE;
494 }
495)
496
497;; Predicated structure moves. This works for both endiannesses but in
498;; practice is only useful for big-endian.
0c63a8ee 499(define_insn_and_split "@aarch64_pred_mov<mode>"
9c6b4601 500 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
9f4cbab8 501 (unspec:SVE_STRUCT
9c6b4601
RS
502 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
503 (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
9f4cbab8
RS
504 UNSPEC_MERGE_PTRUE))]
505 "TARGET_SVE
506 && (register_operand (operands[0], <MODE>mode)
507 || register_operand (operands[2], <MODE>mode))"
508 "#"
509 "&& reload_completed"
510 [(const_int 0)]
511 {
512 for (unsigned int i = 0; i < <vector_count>; ++i)
513 {
514 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
515 <MODE>mode,
516 i * BYTES_PER_SVE_VECTOR);
517 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
518 <MODE>mode,
519 i * BYTES_PER_SVE_VECTOR);
520 aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
521 }
522 DONE;
523 }
524 [(set_attr "length" "<insn_length>")]
525)
526
915d28fe
RS
527;; -------------------------------------------------------------------------
528;; ---- Moves of predicates
529;; -------------------------------------------------------------------------
530;; Includes:
531;; - MOV
532;; - LDR
533;; - PFALSE
534;; - PTRUE
535;; - STR
536;; -------------------------------------------------------------------------
537
43cacb12
RS
538(define_expand "mov<mode>"
539 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
540 (match_operand:PRED_ALL 1 "general_operand"))]
541 "TARGET_SVE"
542 {
543 if (GET_CODE (operands[0]) == MEM)
544 operands[1] = force_reg (<MODE>mode, operands[1]);
0b1fe8cf
RS
545
546 if (CONSTANT_P (operands[1]))
547 {
548 aarch64_expand_mov_immediate (operands[0], operands[1]);
549 DONE;
550 }
43cacb12
RS
551 }
552)
553
554(define_insn "*aarch64_sve_mov<mode>"
1044fa32 555 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa")
0b1fe8cf 556 (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))]
43cacb12
RS
557 "TARGET_SVE
558 && (register_operand (operands[0], <MODE>mode)
559 || register_operand (operands[1], <MODE>mode))"
560 "@
561 mov\t%0.b, %1.b
562 str\t%1, %0
563 ldr\t%0, %1
1044fa32 564 * return aarch64_output_sve_mov_immediate (operands[1]);"
43cacb12
RS
565)
566
915d28fe
RS
567;; =========================================================================
568;; == Loads
569;; =========================================================================
570
571;; -------------------------------------------------------------------------
572;; ---- Normal contiguous loads
573;; -------------------------------------------------------------------------
574;; Includes contiguous forms of:
575;; - LD1B
576;; - LD1D
577;; - LD1H
578;; - LD1W
579;; - LD2B
580;; - LD2D
581;; - LD2H
582;; - LD2W
583;; - LD3B
584;; - LD3D
585;; - LD3H
586;; - LD3W
587;; - LD4B
588;; - LD4D
589;; - LD4H
590;; - LD4W
591;; -------------------------------------------------------------------------
592
593;; Predicated LD1.
594(define_insn "maskload<mode><vpred>"
595 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
596 (unspec:SVE_ALL
597 [(match_operand:<VPRED> 2 "register_operand" "Upl")
598 (match_operand:SVE_ALL 1 "memory_operand" "m")]
599 UNSPEC_LD1_SVE))]
43cacb12 600 "TARGET_SVE"
915d28fe 601 "ld1<Vesize>\t%0.<Vetype>, %2/z, %1"
43cacb12
RS
602)
603
915d28fe
RS
604;; Unpredicated LD[234].
605(define_expand "vec_load_lanes<mode><vsingle>"
606 [(set (match_operand:SVE_STRUCT 0 "register_operand")
607 (unspec:SVE_STRUCT
608 [(match_dup 2)
609 (match_operand:SVE_STRUCT 1 "memory_operand")]
610 UNSPEC_LDN))]
43cacb12
RS
611 "TARGET_SVE"
612 {
915d28fe 613 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
614 }
615)
616
915d28fe
RS
617;; Predicated LD[234].
618(define_insn "vec_mask_load_lanes<mode><vsingle>"
619 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
620 (unspec:SVE_STRUCT
621 [(match_operand:<VPRED> 2 "register_operand" "Upl")
622 (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
623 UNSPEC_LDN))]
8711e791 624 "TARGET_SVE"
915d28fe 625 "ld<vector_count><Vesize>\t%0, %2/z, %1"
8711e791
RS
626)
627
915d28fe
RS
628;; -------------------------------------------------------------------------
629;; ---- Normal gather loads
630;; -------------------------------------------------------------------------
631;; Includes gather forms of:
632;; - LD1D
633;; - LD1W
634;; -------------------------------------------------------------------------
635
636;; Unpredicated gather loads.
637(define_expand "gather_load<mode>"
638 [(set (match_operand:SVE_SD 0 "register_operand")
639 (unspec:SVE_SD
640 [(match_dup 5)
641 (match_operand:DI 1 "aarch64_reg_or_zero")
642 (match_operand:<V_INT_EQUIV> 2 "register_operand")
643 (match_operand:DI 3 "const_int_operand")
644 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
645 (mem:BLK (scratch))]
646 UNSPEC_LD1_GATHER))]
647 "TARGET_SVE"
43cacb12 648 {
915d28fe 649 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12 650 }
43cacb12
RS
651)
652
915d28fe
RS
653;; Predicated gather loads for 32-bit elements. Operand 3 is true for
654;; unsigned extension and false for signed extension.
655(define_insn "mask_gather_load<mode>"
656 [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w")
657 (unspec:SVE_S
658 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
659 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
660 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w")
661 (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1")
662 (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
663 (mem:BLK (scratch))]
664 UNSPEC_LD1_GATHER))]
665 "TARGET_SVE"
666 "@
667 ld1w\t%0.s, %5/z, [%2.s]
668 ld1w\t%0.s, %5/z, [%1, %2.s, sxtw]
669 ld1w\t%0.s, %5/z, [%1, %2.s, uxtw]
670 ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
671 ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
672)
673
674;; Predicated gather loads for 64-bit elements. The value of operand 3
675;; doesn't matter in this case.
676(define_insn "mask_gather_load<mode>"
677 [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w")
678 (unspec:SVE_D
679 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
680 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk")
681 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w")
682 (match_operand:DI 3 "const_int_operand")
683 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
684 (mem:BLK (scratch))]
685 UNSPEC_LD1_GATHER))]
686 "TARGET_SVE"
687 "@
688 ld1d\t%0.d, %5/z, [%2.d]
689 ld1d\t%0.d, %5/z, [%1, %2.d]
690 ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
691)
692
693;; =========================================================================
694;; == Stores
695;; =========================================================================
696
697;; -------------------------------------------------------------------------
698;; ---- Normal contiguous stores
699;; -------------------------------------------------------------------------
700;; Includes contiguous forms of:
701;; - ST1B
702;; - ST1D
703;; - ST1H
704;; - ST1W
705;; - ST2B
706;; - ST2D
707;; - ST2H
708;; - ST2W
709;; - ST3B
710;; - ST3D
711;; - ST3H
712;; - ST3W
713;; - ST4B
714;; - ST4D
715;; - ST4H
716;; - ST4W
717;; -------------------------------------------------------------------------
718
719;; Predicated ST1.
720(define_insn "maskstore<mode><vpred>"
721 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
722 (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl")
723 (match_operand:SVE_ALL 1 "register_operand" "w")
724 (match_dup 0)]
725 UNSPEC_ST1_SVE))]
726 "TARGET_SVE"
727 "st1<Vesize>\t%1.<Vetype>, %2, %0"
728)
729
730;; Unpredicated ST[234]. This is always a full update, so the dependence
731;; on the old value of the memory location (via (match_dup 0)) is redundant.
732;; There doesn't seem to be any obvious benefit to treating the all-true
733;; case differently though. In particular, it's very unlikely that we'll
734;; only find out during RTL that a store_lanes is dead.
735(define_expand "vec_store_lanes<mode><vsingle>"
736 [(set (match_operand:SVE_STRUCT 0 "memory_operand")
737 (unspec:SVE_STRUCT
738 [(match_dup 2)
739 (match_operand:SVE_STRUCT 1 "register_operand")
740 (match_dup 0)]
741 UNSPEC_STN))]
742 "TARGET_SVE"
43cacb12 743 {
915d28fe 744 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
745 }
746)
747
915d28fe
RS
748;; Predicated ST[234].
749(define_insn "vec_mask_store_lanes<mode><vsingle>"
750 [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
751 (unspec:SVE_STRUCT
752 [(match_operand:<VPRED> 2 "register_operand" "Upl")
753 (match_operand:SVE_STRUCT 1 "register_operand" "w")
754 (match_dup 0)]
755 UNSPEC_STN))]
756 "TARGET_SVE"
757 "st<vector_count><Vesize>\t%1, %2, %0"
758)
759
760;; -------------------------------------------------------------------------
761;; ---- Normal scatter stores
762;; -------------------------------------------------------------------------
763;; Includes scatter forms of:
764;; - ST1D
765;; - ST1W
766;; -------------------------------------------------------------------------
767
768;; Unpredicated scatter stores.
769(define_expand "scatter_store<mode>"
770 [(set (mem:BLK (scratch))
771 (unspec:BLK
772 [(match_dup 5)
773 (match_operand:DI 0 "aarch64_reg_or_zero")
774 (match_operand:<V_INT_EQUIV> 1 "register_operand")
775 (match_operand:DI 2 "const_int_operand")
776 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
777 (match_operand:SVE_SD 4 "register_operand")]
778 UNSPEC_ST1_SCATTER))]
779 "TARGET_SVE"
43cacb12 780 {
915d28fe 781 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
782 }
783)
784
915d28fe
RS
785;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
786;; unsigned extension and false for signed extension.
787(define_insn "mask_scatter_store<mode>"
788 [(set (mem:BLK (scratch))
789 (unspec:BLK
790 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
791 (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
792 (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w")
793 (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1")
794 (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
795 (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")]
796 UNSPEC_ST1_SCATTER))]
43cacb12
RS
797 "TARGET_SVE"
798 "@
915d28fe
RS
799 st1w\t%4.s, %5, [%1.s]
800 st1w\t%4.s, %5, [%0, %1.s, sxtw]
801 st1w\t%4.s, %5, [%0, %1.s, uxtw]
802 st1w\t%4.s, %5, [%0, %1.s, sxtw %p3]
803 st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]"
804)
805
806;; Predicated scatter stores for 64-bit elements. The value of operand 2
807;; doesn't matter in this case.
808(define_insn "mask_scatter_store<mode>"
809 [(set (mem:BLK (scratch))
810 (unspec:BLK
811 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
812 (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk")
813 (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w")
814 (match_operand:DI 2 "const_int_operand")
815 (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
816 (match_operand:SVE_D 4 "register_operand" "w, w, w")]
817 UNSPEC_ST1_SCATTER))]
818 "TARGET_SVE"
819 "@
820 st1d\t%4.d, %5, [%1.d]
821 st1d\t%4.d, %5, [%0, %1.d]
822 st1d\t%4.d, %5, [%0, %1.d, lsl %p3]"
43cacb12
RS
823)
824
915d28fe
RS
825;; =========================================================================
826;; == Vector creation
827;; =========================================================================
828
829;; -------------------------------------------------------------------------
830;; ---- [INT,FP] Duplicate element
831;; -------------------------------------------------------------------------
832;; Includes:
833;; - MOV
834;; - LD1RB
835;; - LD1RD
836;; - LD1RH
837;; - LD1RW
838;; - LD1RQB
839;; - LD1RQD
840;; - LD1RQH
841;; - LD1RQW
842;; -------------------------------------------------------------------------
843
43cacb12
RS
844(define_expand "vec_duplicate<mode>"
845 [(parallel
846 [(set (match_operand:SVE_ALL 0 "register_operand")
847 (vec_duplicate:SVE_ALL
848 (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
678faefc 849 (clobber (scratch:VNx16BI))])]
43cacb12
RS
850 "TARGET_SVE"
851 {
852 if (MEM_P (operands[1]))
853 {
16de3637 854 rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
855 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
856 CONST0_RTX (<MODE>mode)));
857 DONE;
858 }
859 }
860)
861
862;; Accept memory operands for the benefit of combine, and also in case
863;; the scalar input gets spilled to memory during RA. We want to split
864;; the load at the first opportunity in order to allow the PTRUE to be
865;; optimized with surrounding code.
866(define_insn_and_split "*vec_duplicate<mode>_reg"
867 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
868 (vec_duplicate:SVE_ALL
869 (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
678faefc 870 (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
43cacb12
RS
871 "TARGET_SVE"
872 "@
873 mov\t%0.<Vetype>, %<vwcore>1
874 mov\t%0.<Vetype>, %<Vetype>1
875 #"
876 "&& MEM_P (operands[1])"
877 [(const_int 0)]
878 {
879 if (GET_CODE (operands[2]) == SCRATCH)
678faefc
RS
880 operands[2] = gen_reg_rtx (VNx16BImode);
881 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
882 rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
883 emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1],
43cacb12
RS
884 CONST0_RTX (<MODE>mode)));
885 DONE;
886 }
887 [(set_attr "length" "4,4,8")]
888)
889
4aeb1ba7
RS
890;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version).
891(define_insn "@aarch64_vec_duplicate_vq<mode>_le"
892 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
893 (vec_duplicate:SVE_ALL
894 (match_operand:<V128> 1 "register_operand" "w")))]
895 "TARGET_SVE && !BYTES_BIG_ENDIAN"
896 {
897 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
898 return "dup\t%0.q, %1.q[0]";
899 }
900)
901
902;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version).
903;; The SVE register layout puts memory lane N into (architectural)
904;; register lane N, whereas the Advanced SIMD layout puts the memory
905;; lsb into the register lsb. We therefore have to describe this in rtl
906;; terms as a reverse of the V128 vector followed by a duplicate.
907(define_insn "@aarch64_vec_duplicate_vq<mode>_be"
908 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
909 (vec_duplicate:SVE_ALL
910 (vec_select:<V128>
911 (match_operand:<V128> 1 "register_operand" "w")
912 (match_operand 2 "descending_int_parallel"))))]
913 "TARGET_SVE
914 && BYTES_BIG_ENDIAN
915 && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
916 GET_MODE_NUNITS (<V128>mode) - 1)"
917 {
918 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
919 return "dup\t%0.q, %1.q[0]";
920 }
921)
922
43cacb12
RS
923;; This is used for vec_duplicate<mode>s from memory, but can also
924;; be used by combine to optimize selects of a a vec_duplicate<mode>
925;; with zero.
926(define_insn "sve_ld1r<mode>"
927 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
928 (unspec:SVE_ALL
929 [(match_operand:<VPRED> 1 "register_operand" "Upl")
930 (vec_duplicate:SVE_ALL
931 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
932 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
933 UNSPEC_SEL))]
934 "TARGET_SVE"
935 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
936)
937
4aeb1ba7
RS
938;; Load 128 bits from memory under predicate control and duplicate to
939;; fill a vector.
940(define_insn "@aarch64_sve_ld1rq<mode>"
947b1372
RS
941 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
942 (unspec:SVE_ALL
4aeb1ba7
RS
943 [(match_operand:<VPRED> 2 "register_operand" "Upl")
944 (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")]
43cacb12
RS
945 UNSPEC_LD1RQ))]
946 "TARGET_SVE"
4aeb1ba7
RS
947 {
948 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
949 return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1";
950 }
43cacb12
RS
951)
952
915d28fe
RS
953;; -------------------------------------------------------------------------
954;; ---- [INT,FP] Initialize from individual elements
955;; -------------------------------------------------------------------------
956;; Includes:
957;; - INSR
958;; -------------------------------------------------------------------------
959
960(define_expand "vec_init<mode><Vel>"
961 [(match_operand:SVE_ALL 0 "register_operand")
962 (match_operand 1 "")]
43cacb12
RS
963 "TARGET_SVE"
964 {
915d28fe 965 aarch64_sve_expand_vector_init (operands[0], operands[1]);
43cacb12
RS
966 DONE;
967 }
968)
969
915d28fe
RS
970;; Shift an SVE vector left and insert a scalar into element 0.
971(define_insn "vec_shl_insert_<mode>"
61ee25b9 972 [(set (match_operand:SVE_ALL 0 "register_operand" "=?w, w, ??&w, ?&w")
915d28fe 973 (unspec:SVE_ALL
61ee25b9
RS
974 [(match_operand:SVE_ALL 1 "register_operand" "0, 0, w, w")
975 (match_operand:<VEL> 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")]
915d28fe
RS
976 UNSPEC_INSR))]
977 "TARGET_SVE"
978 "@
979 insr\t%0.<Vetype>, %<vwcore>2
61ee25b9
RS
980 insr\t%0.<Vetype>, %<Vetype>2
981 movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2
982 movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2"
983 [(set_attr "movprfx" "*,*,yes,yes")]
915d28fe
RS
984)
985
986;; -------------------------------------------------------------------------
987;; ---- [INT] Linear series
988;; -------------------------------------------------------------------------
989;; Includes:
990;; - INDEX
991;; -------------------------------------------------------------------------
992
993(define_insn "vec_series<mode>"
994 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
995 (vec_series:SVE_I
996 (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
997 (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
998 "TARGET_SVE"
999 "@
1000 index\t%0.<Vetype>, #%1, %<vw>2
43cacb12
RS
1001 index\t%0.<Vetype>, %<vw>1, #%2
1002 index\t%0.<Vetype>, %<vw>1, %<vw>2"
1003)
1004
1005;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
1006;; of an INDEX instruction.
1007(define_insn "*vec_series<mode>_plus"
1008 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1009 (plus:SVE_I
1010 (vec_duplicate:SVE_I
1011 (match_operand:<VEL> 1 "register_operand" "r"))
1012 (match_operand:SVE_I 2 "immediate_operand")))]
1013 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
1014 {
1015 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
1016 return "index\t%0.<Vetype>, %<vw>1, #%2";
1017 }
1018)
1019
915d28fe
RS
1020;; -------------------------------------------------------------------------
1021;; ---- [PRED] Duplicate element
1022;; -------------------------------------------------------------------------
1023;; The patterns in this section are synthetic.
1024;; -------------------------------------------------------------------------
1025
1026;; Implement a predicate broadcast by shifting the low bit of the scalar
1027;; input into the top bit and using a WHILELO. An alternative would be to
1028;; duplicate the input and do a compare with zero.
1029(define_expand "vec_duplicate<mode>"
1030 [(set (match_operand:PRED_ALL 0 "register_operand")
1031 (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))]
9f4cbab8
RS
1032 "TARGET_SVE"
1033 {
915d28fe
RS
1034 rtx tmp = gen_reg_rtx (DImode);
1035 rtx op1 = gen_lowpart (DImode, operands[1]);
1036 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
1037 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
1038 DONE;
9f4cbab8
RS
1039 }
1040)
1041
915d28fe
RS
1042;; =========================================================================
1043;; == Vector decomposition
1044;; =========================================================================
9f4cbab8 1045
915d28fe
RS
1046;; -------------------------------------------------------------------------
1047;; ---- [INT,FP] Extract index
1048;; -------------------------------------------------------------------------
1049;; Includes:
1050;; - DUP (Advanced SIMD)
1051;; - DUP (SVE)
1052;; - EXT (SVE)
1053;; - ST1 (Advanced SIMD)
1054;; - UMOV (Advanced SIMD)
1055;; -------------------------------------------------------------------------
1056
1057(define_expand "vec_extract<mode><Vel>"
1058 [(set (match_operand:<VEL> 0 "register_operand")
1059 (vec_select:<VEL>
1060 (match_operand:SVE_ALL 1 "register_operand")
1061 (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
9f4cbab8
RS
1062 "TARGET_SVE"
1063 {
915d28fe
RS
1064 poly_int64 val;
1065 if (poly_int_rtx_p (operands[2], &val)
1066 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
1067 {
1068 /* The last element can be extracted with a LASTB and a false
1069 predicate. */
1070 rtx sel = aarch64_pfalse_reg (<VPRED>mode);
1071 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
1072 DONE;
1073 }
1074 if (!CONST_INT_P (operands[2]))
1075 {
1076 /* Create an index with operand[2] as the base and -1 as the step.
1077 It will then be zero for the element we care about. */
1078 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
1079 index = force_reg (<VEL_INT>mode, index);
1080 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
1081 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
1082
1083 /* Get a predicate that is true for only that element. */
1084 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
1085 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
1086 rtx sel = gen_reg_rtx (<VPRED>mode);
1087 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
1088
1089 /* Select the element using LASTB. */
1090 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
1091 DONE;
1092 }
9f4cbab8
RS
1093 }
1094)
1095
915d28fe
RS
1096;; Extract element zero. This is a special case because we want to force
1097;; the registers to be the same for the second alternative, and then
1098;; split the instruction into nothing after RA.
1099(define_insn_and_split "*vec_extract<mode><Vel>_0"
1100 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
1101 (vec_select:<VEL>
1102 (match_operand:SVE_ALL 1 "register_operand" "w, 0, w")
1103 (parallel [(const_int 0)])))]
9f4cbab8 1104 "TARGET_SVE"
915d28fe
RS
1105 {
1106 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
1107 switch (which_alternative)
1108 {
1109 case 0:
1110 return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
1111 case 1:
1112 return "#";
1113 case 2:
1114 return "st1\\t{%1.<Vetype>}[0], %0";
1115 default:
1116 gcc_unreachable ();
1117 }
1118 }
1119 "&& reload_completed
1120 && REG_P (operands[0])
1121 && REGNO (operands[0]) == REGNO (operands[1])"
1122 [(const_int 0)]
1123 {
1124 emit_note (NOTE_INSN_DELETED);
1125 DONE;
1126 }
1127 [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
9f4cbab8
RS
1128)
1129
915d28fe
RS
1130;; Extract an element from the Advanced SIMD portion of the register.
1131;; We don't just reuse the aarch64-simd.md pattern because we don't
1132;; want any change in lane number on big-endian targets.
1133(define_insn "*vec_extract<mode><Vel>_v128"
1134 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
1135 (vec_select:<VEL>
1136 (match_operand:SVE_ALL 1 "register_operand" "w, w, w")
1137 (parallel [(match_operand:SI 2 "const_int_operand")])))]
1138 "TARGET_SVE
1139 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
43cacb12 1140 {
915d28fe
RS
1141 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
1142 switch (which_alternative)
1143 {
1144 case 0:
1145 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
1146 case 1:
1147 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
1148 case 2:
1149 return "st1\\t{%1.<Vetype>}[%2], %0";
1150 default:
1151 gcc_unreachable ();
1152 }
43cacb12 1153 }
915d28fe 1154 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
43cacb12
RS
1155)
1156
915d28fe
RS
1157;; Extract an element in the range of DUP. This pattern allows the
1158;; source and destination to be different.
1159(define_insn "*vec_extract<mode><Vel>_dup"
1160 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1161 (vec_select:<VEL>
1162 (match_operand:SVE_ALL 1 "register_operand" "w")
1163 (parallel [(match_operand:SI 2 "const_int_operand")])))]
1164 "TARGET_SVE
1165 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
1166 {
1167 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
1168 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
1169 }
43cacb12
RS
1170)
1171
915d28fe
RS
1172;; Extract an element outside the range of DUP. This pattern requires the
1173;; source and destination to be the same.
1174(define_insn "*vec_extract<mode><Vel>_ext"
1175 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1176 (vec_select:<VEL>
1177 (match_operand:SVE_ALL 1 "register_operand" "0")
1178 (parallel [(match_operand:SI 2 "const_int_operand")])))]
1179 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
1180 {
1181 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
1182 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
1183 return "ext\t%0.b, %0.b, %0.b, #%2";
1184 }
43cacb12
RS
1185)
1186
915d28fe
RS
1187;; -------------------------------------------------------------------------
1188;; ---- [INT,FP] Extract active element
1189;; -------------------------------------------------------------------------
1190;; Includes:
1191;; - LASTB
1192;; -------------------------------------------------------------------------
1193
1194;; Extract the last active element of operand 1 into operand 0.
1195;; If no elements are active, extract the last inactive element instead.
1196(define_insn "extract_last_<mode>"
1197 [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
1198 (unspec:<VEL>
1199 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1200 (match_operand:SVE_ALL 2 "register_operand" "w, w")]
1201 UNSPEC_LASTB))]
43cacb12 1202 "TARGET_SVE"
915d28fe
RS
1203 "@
1204 lastb\t%<vwcore>0, %1, %2.<Vetype>
1205 lastb\t%<Vetype>0, %1, %2.<Vetype>"
43cacb12
RS
1206)
1207
915d28fe
RS
1208;; -------------------------------------------------------------------------
1209;; ---- [PRED] Extract index
1210;; -------------------------------------------------------------------------
1211;; The patterns in this section are synthetic.
1212;; -------------------------------------------------------------------------
1213
1214;; Handle extractions from a predicate by converting to an integer vector
1215;; and extracting from there.
1216(define_expand "vec_extract<vpred><Vel>"
1217 [(match_operand:<VEL> 0 "register_operand")
1218 (match_operand:<VPRED> 1 "register_operand")
1219 (match_operand:SI 2 "nonmemory_operand")
1220 ;; Dummy operand to which we can attach the iterator.
1221 (reg:SVE_I V0_REGNUM)]
43cacb12 1222 "TARGET_SVE"
915d28fe
RS
1223 {
1224 rtx tmp = gen_reg_rtx (<MODE>mode);
1225 emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1],
1226 CONST1_RTX (<MODE>mode),
1227 CONST0_RTX (<MODE>mode)));
1228 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
1229 DONE;
1230 }
43cacb12
RS
1231)
1232
915d28fe
RS
1233;; =========================================================================
1234;; == Unary arithmetic
1235;; =========================================================================
1236
1237;; -------------------------------------------------------------------------
1238;; ---- [INT] General unary arithmetic corresponding to rtx codes
1239;; -------------------------------------------------------------------------
1240;; Includes:
1241;; - ABS
1242;; - CNT (= popcount)
1243;; - NEG
1244;; - NOT
1245;; -------------------------------------------------------------------------
1246
1247;; Unpredicated integer unary arithmetic.
1248(define_expand "<optab><mode>2"
1249 [(set (match_operand:SVE_I 0 "register_operand")
1250 (unspec:SVE_I
1251 [(match_dup 2)
1252 (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))]
43cacb12
RS
1253 UNSPEC_MERGE_PTRUE))]
1254 "TARGET_SVE"
915d28fe
RS
1255 {
1256 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1257 }
43cacb12
RS
1258)
1259
915d28fe
RS
1260;; Integer unary arithmetic predicated with a PTRUE.
1261(define_insn "*<optab><mode>2"
1262 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1263 (unspec:SVE_I
1264 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1265 (SVE_INT_UNARY:SVE_I
1266 (match_operand:SVE_I 2 "register_operand" "w"))]
43cacb12
RS
1267 UNSPEC_MERGE_PTRUE))]
1268 "TARGET_SVE"
915d28fe 1269 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
43cacb12
RS
1270)
1271
915d28fe 1272;; -------------------------------------------------------------------------
d45b20a5 1273;; ---- [FP] General unary arithmetic corresponding to unspecs
915d28fe
RS
1274;; -------------------------------------------------------------------------
1275;; Includes:
1276;; - FABS
1277;; - FNEG
915d28fe
RS
1278;; - FRINTA
1279;; - FRINTI
1280;; - FRINTM
1281;; - FRINTN
1282;; - FRINTP
1283;; - FRINTX
1284;; - FRINTZ
d45b20a5 1285;; - FSQRT
915d28fe
RS
1286;; -------------------------------------------------------------------------
1287
d45b20a5
RS
1288;; Unpredicated floating-point unary operations.
1289(define_expand "<optab><mode>2"
915d28fe
RS
1290 [(set (match_operand:SVE_F 0 "register_operand")
1291 (unspec:SVE_F
1292 [(match_dup 2)
d45b20a5
RS
1293 (match_operand:SVE_F 1 "register_operand")]
1294 SVE_COND_FP_UNARY))]
915d28fe
RS
1295 "TARGET_SVE"
1296 {
1297 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1298 }
1299)
1300
d45b20a5
RS
1301;; Predicated floating-point unary operations.
1302(define_insn "*<optab><mode>2"
915d28fe
RS
1303 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1304 (unspec:SVE_F
1305 [(match_operand:<VPRED> 1 "register_operand" "Upl")
d45b20a5
RS
1306 (match_operand:SVE_F 2 "register_operand" "w")]
1307 SVE_COND_FP_UNARY))]
915d28fe 1308 "TARGET_SVE"
d45b20a5 1309 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
915d28fe
RS
1310)
1311
1312;; -------------------------------------------------------------------------
1313;; ---- [PRED] Inverse
1314;; -------------------------------------------------------------------------
1315;; Includes:
1316;; - NOT
1317;; -------------------------------------------------------------------------
1318
1319;; Unpredicated predicate inverse.
1320(define_expand "one_cmpl<mode>2"
1321 [(set (match_operand:PRED_ALL 0 "register_operand")
1322 (and:PRED_ALL
1323 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
1324 (match_dup 2)))]
1325 "TARGET_SVE"
1326 {
1327 operands[2] = aarch64_ptrue_reg (<MODE>mode);
1328 }
1329)
1330
1331;; Predicated predicate inverse.
1332(define_insn "*one_cmpl<mode>3"
1333 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1334 (and:PRED_ALL
1335 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1336 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1337 "TARGET_SVE"
1338 "not\t%0.b, %1/z, %2.b"
1339)
1340
1341;; =========================================================================
1342;; == Binary arithmetic
1343;; =========================================================================
1344
1345;; -------------------------------------------------------------------------
1346;; ---- [INT] General binary arithmetic corresponding to rtx codes
1347;; -------------------------------------------------------------------------
1348;; Includes merging patterns for:
1349;; - ADD
1350;; - AND
1351;; - EOR
1352;; - MUL
1353;; - ORR
1354;; - SMAX
1355;; - SMIN
1356;; - SUB
1357;; - UMAX
1358;; - UMIN
1359;; -------------------------------------------------------------------------
1360
1361;; Predicated integer operations with merging.
1362(define_expand "cond_<optab><mode>"
1363 [(set (match_operand:SVE_I 0 "register_operand")
1364 (unspec:SVE_I
1365 [(match_operand:<VPRED> 1 "register_operand")
1366 (SVE_INT_BINARY:SVE_I
1367 (match_operand:SVE_I 2 "register_operand")
1368 (match_operand:SVE_I 3 "register_operand"))
1369 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
1370 UNSPEC_SEL))]
1371 "TARGET_SVE"
1372)
1373
1374;; Predicated integer operations, merging with the first input.
1375(define_insn "*cond_<optab><mode>_2"
1376 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1377 (unspec:SVE_I
1378 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1379 (SVE_INT_BINARY:SVE_I
1380 (match_operand:SVE_I 2 "register_operand" "0, w")
1381 (match_operand:SVE_I 3 "register_operand" "w, w"))
1382 (match_dup 2)]
1383 UNSPEC_SEL))]
1384 "TARGET_SVE"
1385 "@
1386 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1387 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1388 [(set_attr "movprfx" "*,yes")]
1389)
1390
1391;; Predicated integer operations, merging with the second input.
1392(define_insn "*cond_<optab><mode>_3"
1393 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1394 (unspec:SVE_I
1395 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1396 (SVE_INT_BINARY:SVE_I
1397 (match_operand:SVE_I 2 "register_operand" "w, w")
1398 (match_operand:SVE_I 3 "register_operand" "0, w"))
1399 (match_dup 3)]
1400 UNSPEC_SEL))]
1401 "TARGET_SVE"
1402 "@
1403 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1404 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
1405 [(set_attr "movprfx" "*,yes")]
1406)
1407
1408;; Predicated integer operations, merging with an independent value.
1409(define_insn_and_rewrite "*cond_<optab><mode>_any"
1410 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
1411 (unspec:SVE_I
1412 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
1413 (SVE_INT_BINARY:SVE_I
1414 (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
1415 (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))
1416 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
1417 UNSPEC_SEL))]
43cacb12 1418 "TARGET_SVE
915d28fe
RS
1419 && !rtx_equal_p (operands[2], operands[4])
1420 && !rtx_equal_p (operands[3], operands[4])"
1421 "@
1422 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1423 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1424 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1425 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1426 #"
1427 "&& reload_completed
1428 && register_operand (operands[4], <MODE>mode)
1429 && !rtx_equal_p (operands[0], operands[4])"
43cacb12 1430 {
915d28fe
RS
1431 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
1432 operands[4], operands[1]));
1433 operands[4] = operands[2] = operands[0];
43cacb12 1434 }
915d28fe 1435 [(set_attr "movprfx" "yes")]
43cacb12
RS
1436)
1437
915d28fe
RS
1438;; -------------------------------------------------------------------------
1439;; ---- [INT] Addition
1440;; -------------------------------------------------------------------------
1441;; Includes:
1442;; - ADD
1443;; - DECB
1444;; - DECD
1445;; - DECH
1446;; - DECW
1447;; - INCB
1448;; - INCD
1449;; - INCH
1450;; - INCW
1451;; - SUB
1452;; -------------------------------------------------------------------------
1453
43cacb12
RS
1454(define_insn "add<mode>3"
1455 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w")
1456 (plus:SVE_I
1457 (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w")
1458 (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, w")))]
1459 "TARGET_SVE"
1460 "@
1461 add\t%0.<Vetype>, %0.<Vetype>, #%D2
1462 sub\t%0.<Vetype>, %0.<Vetype>, #%N2
1463 * return aarch64_output_sve_inc_dec_immediate (\"%0.<Vetype>\", operands[2]);
1464 add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
1465)
1466
915d28fe
RS
1467;; Merging forms are handled through SVE_INT_BINARY.
1468
1469;; -------------------------------------------------------------------------
1470;; ---- [INT] Subtraction
1471;; -------------------------------------------------------------------------
1472;; Includes:
1473;; - SUB
1474;; - SUBR
1475;; -------------------------------------------------------------------------
1476
43cacb12
RS
1477(define_insn "sub<mode>3"
1478 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
1479 (minus:SVE_I
1480 (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa")
1481 (match_operand:SVE_I 2 "register_operand" "w, 0")))]
1482 "TARGET_SVE"
1483 "@
1484 sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
1485 subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
1486)
1487
915d28fe
RS
1488;; Merging forms are handled through SVE_INT_BINARY.
1489
1490;; -------------------------------------------------------------------------
1491;; ---- [INT] Absolute difference
1492;; -------------------------------------------------------------------------
1493;; Includes:
1494;; - SABD
1495;; - UABD
1496;; -------------------------------------------------------------------------
1497
1498;; Unpredicated integer absolute difference.
1499(define_expand "<su>abd<mode>_3"
1500 [(use (match_operand:SVE_I 0 "register_operand"))
1501 (USMAX:SVE_I (match_operand:SVE_I 1 "register_operand")
1502 (match_operand:SVE_I 2 "register_operand"))]
1503 "TARGET_SVE"
1504 {
1505 rtx pred = aarch64_ptrue_reg (<VPRED>mode);
1506 emit_insn (gen_aarch64_<su>abd<mode>_3 (operands[0], pred, operands[1],
1507 operands[2]));
1508 DONE;
1509 }
1510)
1511
1512;; Predicated integer absolute difference.
1513(define_insn "aarch64_<su>abd<mode>_3"
1514 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1515 (unspec:SVE_I
1516 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1517 (minus:SVE_I
1518 (USMAX:SVE_I
1519 (match_operand:SVE_I 2 "register_operand" "0, w")
1520 (match_operand:SVE_I 3 "register_operand" "w, w"))
1521 (<max_opp>:SVE_I
1522 (match_dup 2)
1523 (match_dup 3)))]
1524 UNSPEC_MERGE_PTRUE))]
1525 "TARGET_SVE"
1526 "@
1527 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1528 movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1529 [(set_attr "movprfx" "*,yes")]
1530)
1531
1532;; -------------------------------------------------------------------------
1533;; ---- [INT] Multiplication
1534;; -------------------------------------------------------------------------
1535;; Includes:
1536;; - MUL
1537;; -------------------------------------------------------------------------
1538
43cacb12
RS
1539;; Unpredicated multiplication.
1540(define_expand "mul<mode>3"
1541 [(set (match_operand:SVE_I 0 "register_operand")
1542 (unspec:SVE_I
1543 [(match_dup 3)
1544 (mult:SVE_I
1545 (match_operand:SVE_I 1 "register_operand")
1546 (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))]
1547 UNSPEC_MERGE_PTRUE))]
1548 "TARGET_SVE"
1549 {
16de3637 1550 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
1551 }
1552)
1553
1554;; Multiplication predicated with a PTRUE. We don't actually need the
1555;; predicate for the first alternative, but using Upa or X isn't likely
1556;; to gain much and would make the instruction seem less uniform to the
1557;; register allocator.
26004f51 1558(define_insn_and_split "*mul<mode>3"
a08acce8 1559 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
43cacb12 1560 (unspec:SVE_I
a08acce8 1561 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
43cacb12 1562 (mult:SVE_I
a08acce8
RH
1563 (match_operand:SVE_I 2 "register_operand" "%0, 0, w")
1564 (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w, w"))]
43cacb12
RS
1565 UNSPEC_MERGE_PTRUE))]
1566 "TARGET_SVE"
1567 "@
26004f51 1568 #
a08acce8
RH
1569 mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1570 movprfx\t%0, %2\;mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
26004f51
RS
1571 ; Split the unpredicated form after reload, so that we don't have
1572 ; the unnecessary PTRUE.
1573 "&& reload_completed
1574 && !register_operand (operands[3], <MODE>mode)"
1575 [(set (match_dup 0) (mult:SVE_I (match_dup 2) (match_dup 3)))]
1576 ""
a08acce8 1577 [(set_attr "movprfx" "*,*,yes")]
43cacb12
RS
1578)
1579
26004f51
RS
1580;; Unpredicated multiplications by a constant (post-RA only).
1581;; These are generated by splitting a predicated instruction whose
1582;; predicate is unused.
1583(define_insn "*post_ra_mul<mode>3"
1584 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1585 (mult:SVE_I
1586 (match_operand:SVE_I 1 "register_operand" "0")
1587 (match_operand:SVE_I 2 "aarch64_sve_mul_immediate")))]
1588 "TARGET_SVE && reload_completed"
1589 "mul\t%0.<Vetype>, %0.<Vetype>, #%2"
1590)
1591
915d28fe 1592;; Merging forms are handled through SVE_INT_BINARY.
43cacb12 1593
915d28fe
RS
1594;; -------------------------------------------------------------------------
1595;; ---- [INT] Highpart multiplication
1596;; -------------------------------------------------------------------------
1597;; Includes:
1598;; - SMULH
1599;; - UMULH
1600;; -------------------------------------------------------------------------
43cacb12 1601
11e9443f
RS
1602;; Unpredicated highpart multiplication.
1603(define_expand "<su>mul<mode>3_highpart"
1604 [(set (match_operand:SVE_I 0 "register_operand")
1605 (unspec:SVE_I
1606 [(match_dup 3)
1607 (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand")
1608 (match_operand:SVE_I 2 "register_operand")]
1609 MUL_HIGHPART)]
1610 UNSPEC_MERGE_PTRUE))]
1611 "TARGET_SVE"
1612 {
16de3637 1613 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
11e9443f
RS
1614 }
1615)
1616
1617;; Predicated highpart multiplication.
1618(define_insn "*<su>mul<mode>3_highpart"
a08acce8 1619 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
11e9443f 1620 (unspec:SVE_I
a08acce8
RH
1621 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1622 (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0, w")
1623 (match_operand:SVE_I 3 "register_operand" "w, w")]
11e9443f
RS
1624 MUL_HIGHPART)]
1625 UNSPEC_MERGE_PTRUE))]
1626 "TARGET_SVE"
a08acce8
RH
1627 "@
1628 <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1629 movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1630 [(set_attr "movprfx" "*,yes")]
11e9443f
RS
1631)
1632
915d28fe
RS
1633;; -------------------------------------------------------------------------
1634;; ---- [INT] Division
1635;; -------------------------------------------------------------------------
1636;; Includes:
1637;; - SDIV
1638;; - SDIVR
1639;; - UDIV
1640;; - UDIVR
1641;; -------------------------------------------------------------------------
1642
1643;; Unpredicated integer division.
c38f7319
RS
1644(define_expand "<optab><mode>3"
1645 [(set (match_operand:SVE_SDI 0 "register_operand")
1646 (unspec:SVE_SDI
1647 [(match_dup 3)
1648 (SVE_INT_BINARY_SD:SVE_SDI
1649 (match_operand:SVE_SDI 1 "register_operand")
1650 (match_operand:SVE_SDI 2 "register_operand"))]
1651 UNSPEC_MERGE_PTRUE))]
1652 "TARGET_SVE"
1653 {
16de3637 1654 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
c38f7319
RS
1655 }
1656)
1657
915d28fe 1658;; Integer division predicated with a PTRUE.
c38f7319 1659(define_insn "*<optab><mode>3"
a08acce8 1660 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w, ?&w")
c38f7319 1661 (unspec:SVE_SDI
a08acce8 1662 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
c38f7319 1663 (SVE_INT_BINARY_SD:SVE_SDI
a08acce8
RH
1664 (match_operand:SVE_SDI 2 "register_operand" "0, w, w")
1665 (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0, w"))]
c38f7319
RS
1666 UNSPEC_MERGE_PTRUE))]
1667 "TARGET_SVE"
1668 "@
1669 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
a08acce8
RH
1670 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1671 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1672 [(set_attr "movprfx" "*,*,yes")]
c38f7319
RS
1673)
1674
915d28fe
RS
1675;; Predicated integer division with merging.
1676(define_expand "cond_<optab><mode>"
1677 [(set (match_operand:SVE_SDI 0 "register_operand")
1678 (unspec:SVE_SDI
1679 [(match_operand:<VPRED> 1 "register_operand")
1680 (SVE_INT_BINARY_SD:SVE_SDI
1681 (match_operand:SVE_SDI 2 "register_operand")
1682 (match_operand:SVE_SDI 3 "register_operand"))
1683 (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
1684 UNSPEC_SEL))]
43cacb12 1685 "TARGET_SVE"
43cacb12
RS
1686)
1687
915d28fe
RS
1688;; Predicated integer division, merging with the first input.
1689(define_insn "*cond_<optab><mode>_2"
1690 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
1691 (unspec:SVE_SDI
1692 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1693 (SVE_INT_BINARY_SD:SVE_SDI
1694 (match_operand:SVE_SDI 2 "register_operand" "0, w")
1695 (match_operand:SVE_SDI 3 "register_operand" "w, w"))
1696 (match_dup 2)]
1697 UNSPEC_SEL))]
43cacb12 1698 "TARGET_SVE"
915d28fe
RS
1699 "@
1700 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1701 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1702 [(set_attr "movprfx" "*,yes")]
1703)
1704
1705;; Predicated integer division, merging with the second input.
1706(define_insn "*cond_<optab><mode>_3"
1707 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
1708 (unspec:SVE_SDI
1709 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1710 (SVE_INT_BINARY_SD:SVE_SDI
1711 (match_operand:SVE_SDI 2 "register_operand" "w, w")
1712 (match_operand:SVE_SDI 3 "register_operand" "0, w"))
1713 (match_dup 3)]
1714 UNSPEC_SEL))]
1715 "TARGET_SVE"
1716 "@
1717 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1718 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
1719 [(set_attr "movprfx" "*,yes")]
1720)
1721
1722;; Predicated integer division, merging with an independent value.
1723(define_insn_and_rewrite "*cond_<optab><mode>_any"
1724 [(set (match_operand:SVE_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
1725 (unspec:SVE_SDI
1726 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
1727 (SVE_INT_BINARY_SD:SVE_SDI
1728 (match_operand:SVE_SDI 2 "register_operand" "0, w, w, w, w")
1729 (match_operand:SVE_SDI 3 "register_operand" "w, 0, w, w, w"))
1730 (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
1731 UNSPEC_SEL))]
1732 "TARGET_SVE
1733 && !rtx_equal_p (operands[2], operands[4])
1734 && !rtx_equal_p (operands[3], operands[4])"
1735 "@
1736 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1737 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1738 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1739 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1740 #"
1741 "&& reload_completed
1742 && register_operand (operands[4], <MODE>mode)
1743 && !rtx_equal_p (operands[0], operands[4])"
1744 {
1745 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
1746 operands[4], operands[1]));
1747 operands[4] = operands[2] = operands[0];
1748 }
1749 [(set_attr "movprfx" "yes")]
43cacb12
RS
1750)
1751
915d28fe
RS
1752;; -------------------------------------------------------------------------
1753;; ---- [INT] Binary logical operations
1754;; -------------------------------------------------------------------------
1755;; Includes:
1756;; - AND
1757;; - EOR
1758;; - ORR
1759;; -------------------------------------------------------------------------
1760
1761;; Unpredicated integer binary logical operations.
43cacb12
RS
1762(define_insn "<optab><mode>3"
1763 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
1764 (LOGICAL:SVE_I
1765 (match_operand:SVE_I 1 "register_operand" "%0, w")
1766 (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))]
1767 "TARGET_SVE"
1768 "@
1769 <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
1770 <logical>\t%0.d, %1.d, %2.d"
1771)
1772
915d28fe
RS
1773;; Merging forms are handled through SVE_INT_BINARY.
1774
1775;; -------------------------------------------------------------------------
1776;; ---- [INT] Binary logical operations (inverted second input)
1777;; -------------------------------------------------------------------------
1778;; Includes:
1779;; - BIC
1780;; -------------------------------------------------------------------------
43cacb12
RS
1781
1782;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate
1783;; this pattern even though the NOT instruction itself is predicated.
1784(define_insn "bic<mode>3"
1785 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1786 (and:SVE_I
1787 (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w"))
1788 (match_operand:SVE_I 2 "register_operand" "w")))]
1789 "TARGET_SVE"
1790 "bic\t%0.d, %2.d, %1.d"
1791)
1792
915d28fe
RS
1793;; -------------------------------------------------------------------------
1794;; ---- [INT] Shifts
1795;; -------------------------------------------------------------------------
1796;; Includes:
1797;; - ASR
1798;; - LSL
1799;; - LSR
1800;; -------------------------------------------------------------------------
43cacb12 1801
915d28fe
RS
1802;; Unpredicated shift by a scalar, which expands into one of the vector
1803;; shifts below.
1804(define_expand "<ASHIFT:optab><mode>3"
1805 [(set (match_operand:SVE_I 0 "register_operand")
1806 (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand")
1807 (match_operand:<VEL> 2 "general_operand")))]
43cacb12
RS
1808 "TARGET_SVE"
1809 {
915d28fe
RS
1810 rtx amount;
1811 if (CONST_INT_P (operands[2]))
1812 {
1813 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
1814 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
1815 amount = force_reg (<MODE>mode, amount);
1816 }
1817 else
1818 {
1819 amount = gen_reg_rtx (<MODE>mode);
1820 emit_insn (gen_vec_duplicate<mode> (amount,
1821 convert_to_mode (<VEL>mode,
1822 operands[2], 0)));
1823 }
1824 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
1825 DONE;
43cacb12
RS
1826 }
1827)
1828
915d28fe 1829;; Unpredicated shift by a vector.
43cacb12
RS
1830(define_expand "v<optab><mode>3"
1831 [(set (match_operand:SVE_I 0 "register_operand")
1832 (unspec:SVE_I
1833 [(match_dup 3)
1834 (ASHIFT:SVE_I
1835 (match_operand:SVE_I 1 "register_operand")
1836 (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
1837 UNSPEC_MERGE_PTRUE))]
1838 "TARGET_SVE"
1839 {
16de3637 1840 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
1841 }
1842)
1843
915d28fe
RS
1844;; Shift by a vector, predicated with a PTRUE. We don't actually need
1845;; the predicate for the first alternative, but using Upa or X isn't
1846;; likely to gain much and would make the instruction seem less uniform
1847;; to the register allocator.
26004f51 1848(define_insn_and_split "*v<optab><mode>3"
a08acce8 1849 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
43cacb12 1850 (unspec:SVE_I
a08acce8 1851 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
43cacb12 1852 (ASHIFT:SVE_I
a08acce8
RH
1853 (match_operand:SVE_I 2 "register_operand" "w, 0, w")
1854 (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, w"))]
43cacb12
RS
1855 UNSPEC_MERGE_PTRUE))]
1856 "TARGET_SVE"
1857 "@
26004f51 1858 #
a08acce8
RH
1859 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1860 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
26004f51
RS
1861 "&& reload_completed
1862 && !register_operand (operands[3], <MODE>mode)"
1863 [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
1864 ""
a08acce8 1865 [(set_attr "movprfx" "*,*,yes")]
43cacb12
RS
1866)
1867
26004f51
RS
1868;; Unpredicated shift operations by a constant (post-RA only).
1869;; These are generated by splitting a predicated instruction whose
1870;; predicate is unused.
1871(define_insn "*post_ra_v<optab><mode>3"
1872 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1873 (ASHIFT:SVE_I
1874 (match_operand:SVE_I 1 "register_operand" "w")
1875 (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))]
1876 "TARGET_SVE && reload_completed"
1877 "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
1878)
1879
915d28fe
RS
1880;; -------------------------------------------------------------------------
1881;; ---- [INT] Maximum and minimum
1882;; -------------------------------------------------------------------------
1883;; Includes:
1884;; - SMAX
1885;; - SMIN
1886;; - UMAX
1887;; - UMIN
1888;; -------------------------------------------------------------------------
1889
1890;; Unpredicated integer MAX/MIN.
1891(define_expand "<su><maxmin><mode>3"
43cacb12 1892 [(set (match_operand:SVE_I 0 "register_operand")
915d28fe
RS
1893 (unspec:SVE_I
1894 [(match_dup 3)
1895 (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand")
1896 (match_operand:SVE_I 2 "register_operand"))]
1897 UNSPEC_MERGE_PTRUE))]
43cacb12
RS
1898 "TARGET_SVE"
1899 {
915d28fe 1900 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
1901 }
1902)
1903
915d28fe
RS
1904;; Integer MAX/MIN predicated with a PTRUE.
1905(define_insn "*<su><maxmin><mode>3"
1906 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1907 (unspec:SVE_I
1908 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1909 (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
1910 (match_operand:SVE_I 3 "register_operand" "w, w"))]
1911 UNSPEC_MERGE_PTRUE))]
43cacb12 1912 "TARGET_SVE"
915d28fe
RS
1913 "@
1914 <su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1915 movprfx\t%0, %2\;<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1916 [(set_attr "movprfx" "*,yes")]
43cacb12
RS
1917)
1918
915d28fe 1919;; Merging forms are handled through SVE_INT_BINARY.
43cacb12 1920
915d28fe
RS
1921;; -------------------------------------------------------------------------
1922;; ---- [FP] General binary arithmetic corresponding to rtx codes
1923;; -------------------------------------------------------------------------
1924;; Includes post-RA forms of:
1925;; - FADD
1926;; - FMUL
1927;; - FSUB
1928;; -------------------------------------------------------------------------
43cacb12 1929
915d28fe
RS
1930;; Unpredicated floating-point binary operations (post-RA only).
1931;; These are generated by splitting a predicated instruction whose
1932;; predicate is unused.
1933(define_insn "*post_ra_<sve_fp_op><mode>3"
1934 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1935 (SVE_UNPRED_FP_BINARY:SVE_F
1936 (match_operand:SVE_F 1 "register_operand" "w")
1937 (match_operand:SVE_F 2 "register_operand" "w")))]
1938 "TARGET_SVE && reload_completed"
1939 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
1940
1941;; -------------------------------------------------------------------------
1942;; ---- [FP] General binary arithmetic corresponding to unspecs
1943;; -------------------------------------------------------------------------
1944;; Includes merging forms of:
1945;; - FADD
1946;; - FDIV
1947;; - FDIVR
1948;; - FMAXNM
1949;; - FMINNM
1950;; - FMUL
1951;; - FSUB
1952;; - FSUBR
1953;; -------------------------------------------------------------------------
1954
1955;; Predicated floating-point operations with merging.
1956(define_expand "cond_<optab><mode>"
1957 [(set (match_operand:SVE_F 0 "register_operand")
1958 (unspec:SVE_F
1959 [(match_operand:<VPRED> 1 "register_operand")
1960 (unspec:SVE_F
1961 [(match_operand:SVE_F 2 "register_operand")
1962 (match_operand:SVE_F 3 "register_operand")]
1963 SVE_COND_FP_BINARY)
1964 (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
1965 UNSPEC_SEL))]
43cacb12 1966 "TARGET_SVE"
43cacb12
RS
1967)
1968
915d28fe
RS
1969;; Predicated floating-point operations, merging with the first input.
1970(define_insn "*cond_<optab><mode>_2"
1971 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
1972 (unspec:SVE_F
57d6f4d0 1973 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
915d28fe
RS
1974 (unspec:SVE_F
1975 [(match_operand:SVE_F 2 "register_operand" "0, w")
1976 (match_operand:SVE_F 3 "register_operand" "w, w")]
1977 SVE_COND_FP_BINARY)
1978 (match_dup 2)]
1979 UNSPEC_SEL))]
43cacb12
RS
1980 "TARGET_SVE"
1981 "@
915d28fe
RS
1982 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1983 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1984 [(set_attr "movprfx" "*,yes")]
43cacb12
RS
1985)
1986
915d28fe
RS
1987;; Predicated floating-point operations, merging with the second input.
1988(define_insn "*cond_<optab><mode>_3"
1989 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
1990 (unspec:SVE_F
57d6f4d0 1991 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
915d28fe
RS
1992 (unspec:SVE_F
1993 [(match_operand:SVE_F 2 "register_operand" "w, w")
1994 (match_operand:SVE_F 3 "register_operand" "0, w")]
1995 SVE_COND_FP_BINARY)
1996 (match_dup 3)]
1997 UNSPEC_SEL))]
43cacb12
RS
1998 "TARGET_SVE"
1999 "@
915d28fe
RS
2000 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2001 movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
2002 [(set_attr "movprfx" "*,yes")]
cee99fa0
RS
2003)
2004
915d28fe
RS
2005;; Predicated floating-point operations, merging with an independent value.
2006(define_insn_and_rewrite "*cond_<optab><mode>_any"
2007 [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
2008 (unspec:SVE_F
2009 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
2010 (unspec:SVE_F
2011 [(match_operand:SVE_F 2 "register_operand" "0, w, w, w, w")
2012 (match_operand:SVE_F 3 "register_operand" "w, 0, w, w, w")]
2013 SVE_COND_FP_BINARY)
2014 (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
2015 UNSPEC_SEL))]
2016 "TARGET_SVE
2017 && !rtx_equal_p (operands[2], operands[4])
2018 && !rtx_equal_p (operands[3], operands[4])"
cee99fa0 2019 "@
915d28fe
RS
2020 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2021 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2022 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2023 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2024 #"
2025 "&& reload_completed
2026 && register_operand (operands[4], <MODE>mode)
2027 && !rtx_equal_p (operands[0], operands[4])"
2028 {
2029 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
2030 operands[4], operands[1]));
2031 operands[4] = operands[2] = operands[0];
2032 }
2033 [(set_attr "movprfx" "yes")]
cee99fa0
RS
2034)
2035
915d28fe
RS
2036;; -------------------------------------------------------------------------
2037;; ---- [FP] Addition
2038;; -------------------------------------------------------------------------
2039;; Includes:
2040;; - FADD
2041;; - FSUB
2042;; -------------------------------------------------------------------------
43cacb12 2043
915d28fe
RS
2044;; Unpredicated floating-point addition.
2045(define_expand "add<mode>3"
2046 [(set (match_operand:SVE_F 0 "register_operand")
2047 (unspec:SVE_F
2048 [(match_dup 3)
2049 (plus:SVE_F
2050 (match_operand:SVE_F 1 "register_operand")
2051 (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))]
f22d7973 2052 UNSPEC_MERGE_PTRUE))]
43cacb12 2053 "TARGET_SVE"
915d28fe
RS
2054 {
2055 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
2056 }
43cacb12
RS
2057)
2058
915d28fe
RS
2059;; Floating-point addition predicated with a PTRUE.
2060(define_insn_and_split "*add<mode>3"
2061 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
2062 (unspec:SVE_F
2063 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2064 (plus:SVE_F
2065 (match_operand:SVE_F 2 "register_operand" "%0, 0, w")
2066 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))]
2067 UNSPEC_MERGE_PTRUE))]
cee99fa0 2068 "TARGET_SVE"
915d28fe
RS
2069 "@
2070 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2071 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
2072 #"
2073 ; Split the unpredicated form after reload, so that we don't have
2074 ; the unnecessary PTRUE.
2075 "&& reload_completed
2076 && register_operand (operands[3], <MODE>mode)"
2077 [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))]
cee99fa0
RS
2078)
2079
915d28fe 2080;; Merging forms are handled through SVE_COND_FP_BINARY.
cee99fa0 2081
915d28fe
RS
2082;; -------------------------------------------------------------------------
2083;; ---- [FP] Subtraction
2084;; -------------------------------------------------------------------------
2085;; Includes:
2086;; - FADD
2087;; - FSUB
2088;; - FSUBR
2089;; -------------------------------------------------------------------------
cee99fa0 2090
915d28fe
RS
2091;; Unpredicated floating-point subtraction.
2092(define_expand "sub<mode>3"
2093 [(set (match_operand:SVE_F 0 "register_operand")
2094 (unspec:SVE_F
2095 [(match_dup 3)
2096 (minus:SVE_F
2097 (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand")
2098 (match_operand:SVE_F 2 "register_operand"))]
2099 UNSPEC_MERGE_PTRUE))]
cee99fa0 2100 "TARGET_SVE"
915d28fe
RS
2101 {
2102 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
2103 }
cee99fa0
RS
2104)
2105
915d28fe
RS
2106;; Floating-point subtraction predicated with a PTRUE.
2107(define_insn_and_split "*sub<mode>3"
2108 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
2109 (unspec:SVE_F
2110 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
2111 (minus:SVE_F
2112 (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w")
2113 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))]
2114 UNSPEC_MERGE_PTRUE))]
2115 "TARGET_SVE
2116 && (register_operand (operands[2], <MODE>mode)
2117 || register_operand (operands[3], <MODE>mode))"
f22d7973 2118 "@
915d28fe
RS
2119 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2120 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
2121 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
2122 #"
2123 ; Split the unpredicated form after reload, so that we don't have
2124 ; the unnecessary PTRUE.
2125 "&& reload_completed
2126 && register_operand (operands[2], <MODE>mode)
2127 && register_operand (operands[3], <MODE>mode)"
2128 [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))]
f22d7973
RS
2129)
2130
915d28fe 2131;; Merging forms are handled through SVE_COND_FP_BINARY.
43cacb12 2132
915d28fe
RS
2133;; -------------------------------------------------------------------------
2134;; ---- [FP] Absolute difference
2135;; -------------------------------------------------------------------------
2136;; Includes:
2137;; - FABD
2138;; -------------------------------------------------------------------------
2139
2140;; Predicated floating-point absolute difference.
2141(define_insn "*fabd<mode>3"
2142 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2143 (unspec:SVE_F
43cacb12 2144 [(match_operand:<VPRED> 1 "register_operand" "Upl")
d45b20a5
RS
2145 (minus:SVE_F
2146 (match_operand:SVE_F 2 "register_operand" "0")
2147 (match_operand:SVE_F 3 "register_operand" "w"))]
2148 UNSPEC_COND_FABS))]
43cacb12 2149 "TARGET_SVE"
915d28fe 2150 "fabd\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
43cacb12
RS
2151)
2152
915d28fe
RS
2153;; -------------------------------------------------------------------------
2154;; ---- [FP] Multiplication
2155;; -------------------------------------------------------------------------
2156;; Includes:
2157;; - FMUL
2158;; -------------------------------------------------------------------------
2159
2160;; Unpredicated floating-point multiplication.
2161(define_expand "mul<mode>3"
2162 [(set (match_operand:SVE_F 0 "register_operand")
2163 (unspec:SVE_F
2164 [(match_dup 3)
2165 (mult:SVE_F
2166 (match_operand:SVE_F 1 "register_operand")
2167 (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))]
2168 UNSPEC_MERGE_PTRUE))]
43cacb12
RS
2169 "TARGET_SVE"
2170 {
915d28fe 2171 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
2172 }
2173)
2174
915d28fe
RS
2175;; Floating-point multiplication predicated with a PTRUE.
2176(define_insn_and_split "*mul<mode>3"
2177 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
2178 (unspec:SVE_F
2179 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2180 (mult:SVE_F
2181 (match_operand:SVE_F 2 "register_operand" "%0, w")
2182 (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))]
2183 UNSPEC_MERGE_PTRUE))]
43cacb12 2184 "TARGET_SVE"
915d28fe
RS
2185 "@
2186 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2187 #"
2188 ; Split the unpredicated form after reload, so that we don't have
2189 ; the unnecessary PTRUE.
2190 "&& reload_completed
2191 && register_operand (operands[3], <MODE>mode)"
2192 [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))]
43cacb12
RS
2193)
2194
915d28fe
RS
2195;; Merging forms are handled through SVE_COND_FP_BINARY.
2196
2197;; -------------------------------------------------------------------------
2198;; ---- [FP] Division
2199;; -------------------------------------------------------------------------
2200;; Includes:
2201;; - FDIV
2202;; - FDIVR
2203;; -------------------------------------------------------------------------
2204
2205;; Unpredicated floating-point division.
2206(define_expand "div<mode>3"
2207 [(set (match_operand:SVE_F 0 "register_operand")
2208 (unspec:SVE_F
2209 [(match_dup 3)
2210 (div:SVE_F (match_operand:SVE_F 1 "register_operand")
2211 (match_operand:SVE_F 2 "register_operand"))]
2212 UNSPEC_MERGE_PTRUE))]
43cacb12
RS
2213 "TARGET_SVE"
2214 {
915d28fe 2215 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
2216 }
2217)
2218
915d28fe
RS
2219;; Floating-point division predicated with a PTRUE.
2220(define_insn "*div<mode>3"
2221 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
2222 (unspec:SVE_F
2223 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2224 (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w, w")
2225 (match_operand:SVE_F 3 "register_operand" "w, 0, w"))]
2226 UNSPEC_MERGE_PTRUE))]
43cacb12 2227 "TARGET_SVE"
915d28fe
RS
2228 "@
2229 fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2230 fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2231 movprfx\t%0, %2\;fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2232 [(set_attr "movprfx" "*,*,yes")]
43cacb12
RS
2233)
2234
915d28fe
RS
2235;; Merging forms are handled through SVE_COND_FP_BINARY.
2236
2237;; -------------------------------------------------------------------------
2238;; ---- [FP] Binary logical operations
2239;; -------------------------------------------------------------------------
2240;; Includes
2241;; - AND
2242;; - EOR
2243;; - ORR
2244;; -------------------------------------------------------------------------
2245
2246;; Binary logical operations on floating-point modes. We avoid subregs
2247;; by providing this, but we need to use UNSPECs since rtx logical ops
2248;; aren't defined for floating-point modes.
2249(define_insn "*<optab><mode>3"
2250 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2251 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w")
2252 (match_operand:SVE_F 2 "register_operand" "w")]
2253 LOGICALF))]
43cacb12 2254 "TARGET_SVE"
915d28fe 2255 "<logicalf_op>\t%0.d, %1.d, %2.d"
43cacb12
RS
2256)
2257
915d28fe
RS
2258;; -------------------------------------------------------------------------
2259;; ---- [FP] Sign copying
2260;; -------------------------------------------------------------------------
2261;; The patterns in this section are synthetic.
2262;; -------------------------------------------------------------------------
2263
2264(define_expand "copysign<mode>3"
2265 [(match_operand:SVE_F 0 "register_operand")
2266 (match_operand:SVE_F 1 "register_operand")
2267 (match_operand:SVE_F 2 "register_operand")]
43cacb12
RS
2268 "TARGET_SVE"
2269 {
915d28fe
RS
2270 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
2271 rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
2272 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
2273 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
43cacb12 2274
915d28fe
RS
2275 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
2276 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
2277
2278 emit_insn (gen_and<v_int_equiv>3
2279 (sign, arg2,
2280 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
2281 HOST_WIDE_INT_M1U
2282 << bits)));
2283 emit_insn (gen_and<v_int_equiv>3
2284 (mant, arg1,
2285 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
2286 ~(HOST_WIDE_INT_M1U
2287 << bits))));
2288 emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
2289 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
2290 DONE;
43cacb12
RS
2291 }
2292)
2293
915d28fe
RS
2294(define_expand "xorsign<mode>3"
2295 [(match_operand:SVE_F 0 "register_operand")
2296 (match_operand:SVE_F 1 "register_operand")
2297 (match_operand:SVE_F 2 "register_operand")]
43cacb12
RS
2298 "TARGET_SVE"
2299 {
915d28fe
RS
2300 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
2301 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
2302 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
2303
2304 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
2305 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
2306
2307 emit_insn (gen_and<v_int_equiv>3
2308 (sign, arg2,
2309 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
2310 HOST_WIDE_INT_M1U
2311 << bits)));
2312 emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
2313 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
2314 DONE;
43cacb12
RS
2315 }
2316)
2317
915d28fe
RS
2318;; -------------------------------------------------------------------------
2319;; ---- [FP] Maximum and minimum
2320;; -------------------------------------------------------------------------
2321;; Includes:
915d28fe 2322;; - FMAXNM
915d28fe
RS
2323;; - FMINNM
2324;; -------------------------------------------------------------------------
43cacb12 2325
214c42fa
RS
2326;; Unpredicated floating-point MAX/MIN (the rtx codes). These are more
2327;; relaxed than fmax/fmin, but we implement them in the same way.
2328(define_expand "<optab><mode>3"
43cacb12
RS
2329 [(set (match_operand:SVE_F 0 "register_operand")
2330 (unspec:SVE_F
2331 [(match_dup 3)
214c42fa
RS
2332 (match_operand:SVE_F 1 "register_operand")
2333 (match_operand:SVE_F 2 "register_operand")]
2334 SVE_COND_FP_MAXMIN_PUBLIC))]
43cacb12
RS
2335 "TARGET_SVE"
2336 {
16de3637 2337 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
2338 }
2339)
2340
214c42fa 2341;; Unpredicated fmax/fmin (the libm functions).
43cacb12
RS
2342(define_expand "<maxmin_uns><mode>3"
2343 [(set (match_operand:SVE_F 0 "register_operand")
2344 (unspec:SVE_F
2345 [(match_dup 3)
214c42fa
RS
2346 (match_operand:SVE_F 1 "register_operand")
2347 (match_operand:SVE_F 2 "register_operand")]
2348 SVE_COND_FP_MAXMIN_PUBLIC))]
43cacb12
RS
2349 "TARGET_SVE"
2350 {
16de3637 2351 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
2352 }
2353)
2354
214c42fa
RS
2355;; Predicated floating-point maximum/minimum.
2356(define_insn "*<optab><mode>3"
a08acce8 2357 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
43cacb12 2358 (unspec:SVE_F
a08acce8 2359 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
214c42fa
RS
2360 (match_operand:SVE_F 2 "register_operand" "%0, w")
2361 (match_operand:SVE_F 3 "register_operand" "w, w")]
2362 SVE_COND_FP_MAXMIN_PUBLIC))]
43cacb12 2363 "TARGET_SVE"
a08acce8 2364 "@
214c42fa
RS
2365 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2366 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
a08acce8 2367 [(set_attr "movprfx" "*,yes")]
43cacb12
RS
2368)
2369
915d28fe
RS
2370;; Merging forms are handled through SVE_COND_FP_BINARY.
2371
2372;; -------------------------------------------------------------------------
2373;; ---- [PRED] Binary logical operations
2374;; -------------------------------------------------------------------------
2375;; Includes:
2376;; - AND
2377;; - ANDS
2378;; - EOR
2379;; - EORS
2380;; - ORR
2381;; - ORRS
2382;; -------------------------------------------------------------------------
2383
2384;; Predicate AND. We can reuse one of the inputs as the GP.
2385(define_insn "and<mode>3"
2386 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
2387 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
2388 (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
9d4ac06e 2389 "TARGET_SVE"
915d28fe 2390 "and\t%0.b, %1/z, %1.b, %2.b"
a08acce8 2391)
9d4ac06e 2392
915d28fe
RS
2393;; Unpredicated predicate EOR and ORR.
2394(define_expand "<optab><mode>3"
2395 [(set (match_operand:PRED_ALL 0 "register_operand")
2396 (and:PRED_ALL
2397 (LOGICAL_OR:PRED_ALL
2398 (match_operand:PRED_ALL 1 "register_operand")
2399 (match_operand:PRED_ALL 2 "register_operand"))
2400 (match_dup 3)))]
6c4fd4a9 2401 "TARGET_SVE"
915d28fe
RS
2402 {
2403 operands[3] = aarch64_ptrue_reg (<MODE>mode);
2404 }
a08acce8 2405)
6c4fd4a9 2406
915d28fe 2407;; Predicated predicate AND, EOR and ORR.
34467289 2408(define_insn "@aarch64_pred_<optab><mode>_z"
915d28fe
RS
2409 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
2410 (and:PRED_ALL
2411 (LOGICAL:PRED_ALL
2412 (match_operand:PRED_ALL 2 "register_operand" "Upa")
2413 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
2414 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
2415 "TARGET_SVE"
2416 "<logical>\t%0.b, %1/z, %2.b, %3.b"
2417)
2418
2419;; Perform a logical operation on operands 2 and 3, using operand 1 as
34467289
RS
2420;; the GP. Store the result in operand 0 and set the flags in the same
2421;; way as for PTEST.
915d28fe
RS
2422(define_insn "*<optab><mode>3_cc"
2423 [(set (reg:CC_NZC CC_REGNUM)
2424 (unspec:CC_NZC
34467289
RS
2425 [(match_operand:VNx16BI 1 "register_operand" "Upa")
2426 (match_operand 4)
2427 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
915d28fe
RS
2428 (and:PRED_ALL
2429 (LOGICAL:PRED_ALL
2430 (match_operand:PRED_ALL 2 "register_operand" "Upa")
2431 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
34467289
RS
2432 (match_dup 4))]
2433 UNSPEC_PTEST))
915d28fe
RS
2434 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
2435 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
34467289 2436 (match_dup 4)))]
915d28fe
RS
2437 "TARGET_SVE"
2438 "<logical>s\t%0.b, %1/z, %2.b, %3.b"
2439)
2440
2441;; -------------------------------------------------------------------------
2442;; ---- [PRED] Binary logical operations (inverted second input)
2443;; -------------------------------------------------------------------------
2444;; Includes:
2445;; - BIC
2446;; - ORN
2447;; -------------------------------------------------------------------------
2448
2449;; Predicated predicate BIC and ORN.
2450(define_insn "*<nlogical><mode>3"
2451 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
2452 (and:PRED_ALL
2453 (NLOGICAL:PRED_ALL
2454 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
2455 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
2456 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
2457 "TARGET_SVE"
2458 "<nlogical>\t%0.b, %1/z, %3.b, %2.b"
2459)
2460
2461;; -------------------------------------------------------------------------
2462;; ---- [PRED] Binary logical operations (inverted result)
2463;; -------------------------------------------------------------------------
2464;; Includes:
2465;; - NAND
2466;; - NOR
2467;; -------------------------------------------------------------------------
2468
2469;; Predicated predicate NAND and NOR.
2470(define_insn "*<logical_nn><mode>3"
2471 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
2472 (and:PRED_ALL
2473 (NLOGICAL:PRED_ALL
2474 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
2475 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
2476 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
2477 "TARGET_SVE"
2478 "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
2479)
2480
2481;; =========================================================================
2482;; == Ternary arithmetic
2483;; =========================================================================
2484
2485;; -------------------------------------------------------------------------
2486;; ---- [INT] MLA and MAD
2487;; -------------------------------------------------------------------------
2488;; Includes:
2489;; - MAD
2490;; - MLA
2491;; -------------------------------------------------------------------------
2492
2493;; Predicated integer addition of product.
2494(define_insn "*madd<mode>"
2495 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
2496 (plus:SVE_I
2497 (unspec:SVE_I
2498 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2499 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
2500 (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
2501 UNSPEC_MERGE_PTRUE)
2502 (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
a08acce8
RH
2503 "TARGET_SVE"
2504 "@
915d28fe
RS
2505 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2506 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
2507 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
2508 [(set_attr "movprfx" "*,*,yes")]
a08acce8
RH
2509)
2510
915d28fe
RS
2511;; -------------------------------------------------------------------------
2512;; ---- [INT] MLS and MSB
2513;; -------------------------------------------------------------------------
2514;; Includes:
2515;; - MLS
2516;; - MSB
2517;; -------------------------------------------------------------------------
2518
2519;; Predicated integer subtraction of product.
2520(define_insn "*msub<mode>3"
2521 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
2522 (minus:SVE_I
2523 (match_operand:SVE_I 4 "register_operand" "w, 0, w")
2524 (unspec:SVE_I
2525 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2526 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
2527 (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
2528 UNSPEC_MERGE_PTRUE)))]
2529 "TARGET_SVE"
2530 "@
2531 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2532 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
2533 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
2534 [(set_attr "movprfx" "*,*,yes")]
2535)
2536
2537;; -------------------------------------------------------------------------
2538;; ---- [INT] Dot product
2539;; -------------------------------------------------------------------------
2540;; Includes:
2541;; - SDOT
2542;; - UDOT
2543;; -------------------------------------------------------------------------
2544
2545;; Four-element integer dot-product with accumulation.
2546(define_insn "<sur>dot_prod<vsi2qi>"
a08acce8 2547 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
915d28fe
RS
2548 (plus:SVE_SDI
2549 (unspec:SVE_SDI
2550 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
2551 (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
2552 DOTPROD)
2553 (match_operand:SVE_SDI 3 "register_operand" "0, w")))]
a08acce8
RH
2554 "TARGET_SVE"
2555 "@
915d28fe
RS
2556 <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
2557 movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
a08acce8
RH
2558 [(set_attr "movprfx" "*,yes")]
2559)
2560
915d28fe
RS
2561;; -------------------------------------------------------------------------
2562;; ---- [INT] Sum of absolute differences
2563;; -------------------------------------------------------------------------
2564;; The patterns in this section are synthetic.
2565;; -------------------------------------------------------------------------
2566
2567;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
2568;; operands 1 and 2. The sequence also has to perform a widening reduction of
2569;; the difference into a vector and accumulate that into operand 3 before
2570;; copying that into the result operand 0.
2571;; Perform that with a sequence of:
2572;; MOV ones.b, #1
2573;; [SU]ABD diff.b, p0/m, op1.b, op2.b
2574;; MOVPRFX op0, op3 // If necessary
2575;; UDOT op0.s, diff.b, ones.b
2576(define_expand "<sur>sad<vsi2qi>"
2577 [(use (match_operand:SVE_SDI 0 "register_operand"))
2578 (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
2579 (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
2580 (use (match_operand:SVE_SDI 3 "register_operand"))]
2581 "TARGET_SVE"
2582 {
2583 rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
2584 rtx diff = gen_reg_rtx (<VSI2QI>mode);
2585 emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
2586 emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
2587 DONE;
2588 }
2589)
2590
2591;; -------------------------------------------------------------------------
2592;; ---- [FP] General ternary arithmetic corresponding to unspecs
2593;; -------------------------------------------------------------------------
2594;; Includes merging patterns for:
2595;; - FMAD
2596;; - FMLA
2597;; - FMLS
2598;; - FMSB
2599;; - FNMAD
2600;; - FNMLA
2601;; - FNMLS
2602;; - FNMSB
2603;; -------------------------------------------------------------------------
2604
0d80d083
RS
2605;; Unpredicated floating-point ternary operations.
2606(define_expand "<optab><mode>4"
2607 [(set (match_operand:SVE_F 0 "register_operand")
2608 (unspec:SVE_F
2609 [(match_dup 4)
2610 (match_operand:SVE_F 1 "register_operand")
2611 (match_operand:SVE_F 2 "register_operand")
2612 (match_operand:SVE_F 3 "register_operand")]
2613 SVE_COND_FP_TERNARY))]
2614 "TARGET_SVE"
2615 {
2616 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
2617 }
2618)
2619
2620;; Predicated floating-point ternary operations.
2621(define_insn "*<optab><mode>4"
2622 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
2623 (unspec:SVE_F
2624 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2625 (match_operand:SVE_F 2 "register_operand" "%w, 0, w")
2626 (match_operand:SVE_F 3 "register_operand" "w, w, w")
2627 (match_operand:SVE_F 4 "register_operand" "0, w, w")]
2628 SVE_COND_FP_TERNARY))]
2629 "TARGET_SVE"
2630 "@
2631 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
2632 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2633 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
2634 [(set_attr "movprfx" "*,*,yes")]
2635)
2636
915d28fe
RS
2637;; Predicated floating-point ternary operations with merging.
2638(define_expand "cond_<optab><mode>"
2639 [(set (match_operand:SVE_F 0 "register_operand")
2640 (unspec:SVE_F
2641 [(match_operand:<VPRED> 1 "register_operand")
2642 (unspec:SVE_F
0d80d083
RS
2643 [(match_dup 1)
2644 (match_operand:SVE_F 2 "register_operand")
915d28fe
RS
2645 (match_operand:SVE_F 3 "register_operand")
2646 (match_operand:SVE_F 4 "register_operand")]
2647 SVE_COND_FP_TERNARY)
2648 (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")]
2649 UNSPEC_SEL))]
2650 "TARGET_SVE"
2651{
2652 /* Swap the multiplication operands if the fallback value is the
2653 second of the two. */
2654 if (rtx_equal_p (operands[3], operands[5]))
2655 std::swap (operands[2], operands[3]);
2656})
2657
2658;; Predicated floating-point ternary operations, merging with the
2659;; first input.
2660(define_insn "*cond_<optab><mode>_2"
2661 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
2662 (unspec:SVE_F
a08acce8 2663 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
915d28fe 2664 (unspec:SVE_F
0d80d083
RS
2665 [(match_dup 1)
2666 (match_operand:SVE_F 2 "register_operand" "0, w")
915d28fe
RS
2667 (match_operand:SVE_F 3 "register_operand" "w, w")
2668 (match_operand:SVE_F 4 "register_operand" "w, w")]
2669 SVE_COND_FP_TERNARY)
2670 (match_dup 2)]
a08acce8
RH
2671 UNSPEC_SEL))]
2672 "TARGET_SVE"
2673 "@
915d28fe
RS
2674 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2675 movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
a08acce8
RH
2676 [(set_attr "movprfx" "*,yes")]
2677)
2678
915d28fe
RS
2679;; Predicated floating-point ternary operations, merging with the
2680;; third input.
2681(define_insn "*cond_<optab><mode>_4"
2682 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
2683 (unspec:SVE_F
a08acce8 2684 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
915d28fe 2685 (unspec:SVE_F
0d80d083
RS
2686 [(match_dup 1)
2687 (match_operand:SVE_F 2 "register_operand" "w, w")
915d28fe
RS
2688 (match_operand:SVE_F 3 "register_operand" "w, w")
2689 (match_operand:SVE_F 4 "register_operand" "0, w")]
2690 SVE_COND_FP_TERNARY)
2691 (match_dup 4)]
a08acce8
RH
2692 UNSPEC_SEL))]
2693 "TARGET_SVE"
2694 "@
915d28fe
RS
2695 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
2696 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
a08acce8
RH
2697 [(set_attr "movprfx" "*,yes")]
2698)
2699
915d28fe
RS
2700;; Predicated floating-point ternary operations, merging with an
2701;; independent value.
f4fde1b3 2702(define_insn_and_rewrite "*cond_<optab><mode>_any"
915d28fe
RS
2703 [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w")
2704 (unspec:SVE_F
2705 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2706 (unspec:SVE_F
0d80d083
RS
2707 [(match_dup 1)
2708 (match_operand:SVE_F 2 "register_operand" "w, w, w")
915d28fe
RS
2709 (match_operand:SVE_F 3 "register_operand" "w, w, w")
2710 (match_operand:SVE_F 4 "register_operand" "w, w, w")]
2711 SVE_COND_FP_TERNARY)
2712 (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
0d2b3bca 2713 UNSPEC_SEL))]
f4fde1b3 2714 "TARGET_SVE
915d28fe
RS
2715 && !rtx_equal_p (operands[2], operands[5])
2716 && !rtx_equal_p (operands[3], operands[5])
2717 && !rtx_equal_p (operands[4], operands[5])"
32cf949c 2718 "@
915d28fe
RS
2719 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
2720 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
32cf949c
RS
2721 #"
2722 "&& reload_completed
915d28fe
RS
2723 && !CONSTANT_P (operands[5])
2724 && !rtx_equal_p (operands[0], operands[5])"
f4fde1b3 2725 {
915d28fe
RS
2726 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
2727 operands[5], operands[1]));
2728 operands[5] = operands[4] = operands[0];
f4fde1b3 2729 }
32cf949c 2730 [(set_attr "movprfx" "yes")]
0d2b3bca
RS
2731)
2732
915d28fe
RS
2733;; =========================================================================
2734;; == Comparisons and selects
2735;; =========================================================================
2736
2737;; -------------------------------------------------------------------------
2738;; ---- [INT,FP] Select based on predicates
2739;; -------------------------------------------------------------------------
2740;; Includes merging patterns for:
2741;; - MOV
2742;; - SEL
2743;; -------------------------------------------------------------------------
2744
2745;; vcond_mask operand order: true, false, mask
2746;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
2747;; SEL operand order: mask, true, false
2748(define_insn "vcond_mask_<mode><vpred>"
2749 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
2750 (unspec:SVE_ALL
2751 [(match_operand:<VPRED> 3 "register_operand" "Upa")
2752 (match_operand:SVE_ALL 1 "register_operand" "w")
2753 (match_operand:SVE_ALL 2 "register_operand" "w")]
2754 UNSPEC_SEL))]
2755 "TARGET_SVE"
2756 "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>"
2757)
2758
2759;; Selects between a duplicated immediate and zero.
2760(define_insn "aarch64_sve_dup<mode>_const"
2761 [(set (match_operand:SVE_I 0 "register_operand" "=w")
2762 (unspec:SVE_I
2763 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2764 (match_operand:SVE_I 2 "aarch64_sve_dup_immediate")
2765 (match_operand:SVE_I 3 "aarch64_simd_imm_zero")]
2766 UNSPEC_SEL))]
43cacb12 2767 "TARGET_SVE"
915d28fe 2768 "mov\t%0.<Vetype>, %1/z, #%2"
43cacb12
RS
2769)
2770
915d28fe
RS
2771;; -------------------------------------------------------------------------
2772;; ---- [INT,FP] Compare and select
2773;; -------------------------------------------------------------------------
2774;; The patterns in this section are synthetic.
2775;; -------------------------------------------------------------------------
43cacb12 2776
915d28fe
RS
2777;; Integer (signed) vcond. Don't enforce an immediate range here, since it
2778;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
2779(define_expand "vcond<mode><v_int_equiv>"
2780 [(set (match_operand:SVE_ALL 0 "register_operand")
2781 (if_then_else:SVE_ALL
2782 (match_operator 3 "comparison_operator"
2783 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
2784 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
2785 (match_operand:SVE_ALL 1 "register_operand")
2786 (match_operand:SVE_ALL 2 "register_operand")))]
898f07b0
RS
2787 "TARGET_SVE"
2788 {
915d28fe
RS
2789 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
2790 DONE;
898f07b0
RS
2791 }
2792)
2793
915d28fe
RS
2794;; Integer vcondu. Don't enforce an immediate range here, since it
2795;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
2796(define_expand "vcondu<mode><v_int_equiv>"
2797 [(set (match_operand:SVE_ALL 0 "register_operand")
2798 (if_then_else:SVE_ALL
2799 (match_operator 3 "comparison_operator"
2800 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
2801 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
2802 (match_operand:SVE_ALL 1 "register_operand")
2803 (match_operand:SVE_ALL 2 "register_operand")))]
898f07b0 2804 "TARGET_SVE"
915d28fe
RS
2805 {
2806 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
2807 DONE;
2808 }
898f07b0
RS
2809)
2810
915d28fe
RS
2811;; Floating-point vcond. All comparisons except FCMUO allow a zero operand;
2812;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
2813(define_expand "vcond<mode><v_fp_equiv>"
2814 [(set (match_operand:SVE_SD 0 "register_operand")
2815 (if_then_else:SVE_SD
2816 (match_operator 3 "comparison_operator"
2817 [(match_operand:<V_FP_EQUIV> 4 "register_operand")
2818 (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
2819 (match_operand:SVE_SD 1 "register_operand")
2820 (match_operand:SVE_SD 2 "register_operand")))]
b781a135
RS
2821 "TARGET_SVE"
2822 {
915d28fe
RS
2823 aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
2824 DONE;
b781a135
RS
2825 }
2826)
2827
915d28fe
RS
2828;; -------------------------------------------------------------------------
2829;; ---- [INT] Comparisons
2830;; -------------------------------------------------------------------------
2831;; Includes merging patterns for:
2832;; - CMPEQ
2833;; - CMPGE
2834;; - CMPGT
2835;; - CMPHI
2836;; - CMPHS
2837;; - CMPLE
2838;; - CMPLO
2839;; - CMPLS
2840;; - CMPLT
2841;; - CMPNE
2842;; -------------------------------------------------------------------------
b781a135 2843
915d28fe
RS
2844;; Signed integer comparisons. Don't enforce an immediate range here, since
2845;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
2846;; instead.
2847(define_expand "vec_cmp<mode><vpred>"
2848 [(parallel
2849 [(set (match_operand:<VPRED> 0 "register_operand")
2850 (match_operator:<VPRED> 1 "comparison_operator"
2851 [(match_operand:SVE_I 2 "register_operand")
2852 (match_operand:SVE_I 3 "nonmemory_operand")]))
2853 (clobber (reg:CC_NZC CC_REGNUM))])]
b781a135 2854 "TARGET_SVE"
915d28fe
RS
2855 {
2856 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
2857 operands[2], operands[3]);
2858 DONE;
2859 }
b781a135
RS
2860)
2861
915d28fe
RS
2862;; Unsigned integer comparisons. Don't enforce an immediate range here, since
2863;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
2864;; instead.
2865(define_expand "vec_cmpu<mode><vpred>"
2866 [(parallel
2867 [(set (match_operand:<VPRED> 0 "register_operand")
2868 (match_operator:<VPRED> 1 "comparison_operator"
2869 [(match_operand:SVE_I 2 "register_operand")
2870 (match_operand:SVE_I 3 "nonmemory_operand")]))
2871 (clobber (reg:CC_NZC CC_REGNUM))])]
43cacb12
RS
2872 "TARGET_SVE"
2873 {
915d28fe
RS
2874 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
2875 operands[2], operands[3]);
2876 DONE;
43cacb12
RS
2877 }
2878)
2879
915d28fe
RS
2880;; Integer comparisons predicated with a PTRUE.
2881(define_insn "*cmp<cmp_op><mode>"
2882 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
2883 (unspec:<VPRED>
2884 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2885 (SVE_INT_CMP:<VPRED>
2886 (match_operand:SVE_I 2 "register_operand" "w, w")
2887 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
2888 UNSPEC_MERGE_PTRUE))
2889 (clobber (reg:CC_NZC CC_REGNUM))]
43cacb12
RS
2890 "TARGET_SVE"
2891 "@
915d28fe
RS
2892 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
2893 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
43cacb12
RS
2894)
2895
915d28fe
RS
2896;; Integer comparisons predicated with a PTRUE in which both the flag and
2897;; predicate results are interesting.
2898(define_insn "*cmp<cmp_op><mode>_cc"
2899 [(set (reg:CC_NZC CC_REGNUM)
2900 (unspec:CC_NZC
34467289
RS
2901 [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
2902 (match_operand 4)
2903 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
915d28fe 2904 (unspec:<VPRED>
34467289 2905 [(match_dup 4)
915d28fe
RS
2906 (SVE_INT_CMP:<VPRED>
2907 (match_operand:SVE_I 2 "register_operand" "w, w")
2908 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
2909 UNSPEC_MERGE_PTRUE)]
34467289 2910 UNSPEC_PTEST))
915d28fe
RS
2911 (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
2912 (unspec:<VPRED>
34467289 2913 [(match_dup 4)
915d28fe
RS
2914 (SVE_INT_CMP:<VPRED>
2915 (match_dup 2)
2916 (match_dup 3))]
43cacb12
RS
2917 UNSPEC_MERGE_PTRUE))]
2918 "TARGET_SVE"
915d28fe
RS
2919 "@
2920 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
2921 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
43cacb12
RS
2922)
2923
915d28fe
RS
2924;; Integer comparisons predicated with a PTRUE in which only the flags result
2925;; is interesting.
2926(define_insn "*cmp<cmp_op><mode>_ptest"
2927 [(set (reg:CC_NZC CC_REGNUM)
2928 (unspec:CC_NZC
34467289
RS
2929 [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
2930 (match_operand 4)
2931 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
915d28fe 2932 (unspec:<VPRED>
34467289 2933 [(match_dup 4)
915d28fe
RS
2934 (SVE_INT_CMP:<VPRED>
2935 (match_operand:SVE_I 2 "register_operand" "w, w")
2936 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
2937 UNSPEC_MERGE_PTRUE)]
34467289 2938 UNSPEC_PTEST))
915d28fe
RS
2939 (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
2940 "TARGET_SVE"
43cacb12 2941 "@
915d28fe
RS
2942 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
2943 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
43cacb12
RS
2944)
2945
915d28fe
RS
2946;; Predicated integer comparisons, formed by combining a PTRUE-predicated
2947;; comparison with an AND. Split the instruction into its preferred form
2948;; (below) at the earliest opportunity, in order to get rid of the
2949;; redundant operand 1.
2950(define_insn_and_split "*pred_cmp<cmp_op><mode>_combine"
2951 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
2952 (and:<VPRED>
2953 (unspec:<VPRED>
2954 [(match_operand:<VPRED> 1)
2955 (SVE_INT_CMP:<VPRED>
2956 (match_operand:SVE_I 2 "register_operand" "w, w")
2957 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
2958 UNSPEC_MERGE_PTRUE)
2959 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))
2960 (clobber (reg:CC_NZC CC_REGNUM))]
2961 "TARGET_SVE"
2962 "#"
2963 "&& 1"
2964 [(parallel
2965 [(set (match_dup 0)
2966 (and:<VPRED>
2967 (SVE_INT_CMP:<VPRED>
2968 (match_dup 2)
2969 (match_dup 3))
2970 (match_dup 4)))
2971 (clobber (reg:CC_NZC CC_REGNUM))])]
43cacb12
RS
2972)
2973
915d28fe 2974;; Predicated integer comparisons.
678faefc 2975(define_insn "@aarch64_pred_cmp<cmp_op><mode>"
915d28fe
RS
2976 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
2977 (and:<VPRED>
2978 (SVE_INT_CMP:<VPRED>
2979 (match_operand:SVE_I 2 "register_operand" "w, w")
2980 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))
2981 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
2982 (clobber (reg:CC_NZC CC_REGNUM))]
43cacb12
RS
2983 "TARGET_SVE"
2984 "@
915d28fe
RS
2985 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
2986 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
43cacb12
RS
2987)
2988
915d28fe
RS
2989;; -------------------------------------------------------------------------
2990;; ---- [INT] While tests
2991;; -------------------------------------------------------------------------
2992;; Includes:
2993;; - WHILELO
2994;; -------------------------------------------------------------------------
740c1ed7 2995
915d28fe
RS
2996;; Set element I of the result if operand1 + J < operand2 for all J in [0, I],
2997;; with the comparison being unsigned.
0b1fe8cf 2998(define_insn "@while_ult<GPI:mode><PRED_ALL:mode>"
915d28fe
RS
2999 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3000 (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
3001 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
3002 UNSPEC_WHILE_LO))
3003 (clobber (reg:CC_NZC CC_REGNUM))]
43cacb12 3004 "TARGET_SVE"
915d28fe 3005 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
43cacb12
RS
3006)
3007
915d28fe 3008;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP.
34467289
RS
3009;; Handle the case in which both results are useful. The GP operands
3010;; to the PTEST aren't needed, so we allow them to be anything.
915d28fe
RS
3011(define_insn_and_rewrite "*while_ult<GPI:mode><PRED_ALL:mode>_cc"
3012 [(set (reg:CC_NZC CC_REGNUM)
3013 (unspec:CC_NZC
34467289
RS
3014 [(match_operand 3)
3015 (match_operand 4)
3016 (const_int SVE_KNOWN_PTRUE)
915d28fe 3017 (unspec:PRED_ALL
34467289
RS
3018 [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
3019 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
915d28fe 3020 UNSPEC_WHILE_LO)]
34467289 3021 UNSPEC_PTEST))
915d28fe 3022 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
34467289
RS
3023 (unspec:PRED_ALL [(match_dup 1)
3024 (match_dup 2)]
915d28fe 3025 UNSPEC_WHILE_LO))]
43cacb12 3026 "TARGET_SVE"
34467289 3027 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
915d28fe
RS
3028 ;; Force the compiler to drop the unused predicate operand, so that we
3029 ;; don't have an unnecessary PTRUE.
34467289 3030 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
915d28fe 3031 {
34467289
RS
3032 operands[3] = CONSTM1_RTX (VNx16BImode);
3033 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
915d28fe 3034 }
43cacb12
RS
3035)
3036
915d28fe
RS
3037;; -------------------------------------------------------------------------
3038;; ---- [FP] Comparisons
3039;; -------------------------------------------------------------------------
3040;; Includes:
3041;; - FCMEQ
3042;; - FCMGE
3043;; - FCMGT
3044;; - FCMLE
3045;; - FCMLT
3046;; - FCMNE
3047;; - FCMUO
3048;; -------------------------------------------------------------------------
3049
3050;; Floating-point comparisons. All comparisons except FCMUO allow a zero
3051;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
3052;; with zero.
3053(define_expand "vec_cmp<mode><vpred>"
3054 [(set (match_operand:<VPRED> 0 "register_operand")
3055 (match_operator:<VPRED> 1 "comparison_operator"
3056 [(match_operand:SVE_F 2 "register_operand")
3057 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
43cacb12
RS
3058 "TARGET_SVE"
3059 {
915d28fe
RS
3060 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
3061 operands[2], operands[3], false);
3062 DONE;
43cacb12
RS
3063 }
3064)
3065
915d28fe
RS
3066;; Floating-point comparisons predicated with a PTRUE.
3067(define_insn "*fcm<cmp_op><mode>"
3068 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
3069 (unspec:<VPRED>
3070 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3071 (SVE_FP_CMP:<VPRED>
3072 (match_operand:SVE_F 2 "register_operand" "w, w")
3073 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
43cacb12
RS
3074 UNSPEC_MERGE_PTRUE))]
3075 "TARGET_SVE"
3076 "@
915d28fe
RS
3077 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
3078 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
43cacb12
RS
3079)
3080
915d28fe
RS
3081;; Same for unordered comparisons.
3082(define_insn "*fcmuo<mode>"
3083 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
3084 (unspec:<VPRED>
3085 [(match_operand:<VPRED> 1 "register_operand" "Upl")
3086 (unordered:<VPRED>
3087 (match_operand:SVE_F 2 "register_operand" "w")
3088 (match_operand:SVE_F 3 "register_operand" "w"))]
43cacb12
RS
3089 UNSPEC_MERGE_PTRUE))]
3090 "TARGET_SVE"
915d28fe 3091 "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
43cacb12
RS
3092)
3093
915d28fe
RS
3094;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
3095;; with another predicate P. This does not have the same trapping behavior
3096;; as predicating the comparison itself on P, but it's a legitimate fold,
3097;; since we can drop any potentially-trapping operations whose results
3098;; are not needed.
3099;;
3100;; Split the instruction into its preferred form (below) at the earliest
3101;; opportunity, in order to get rid of the redundant operand 1.
3102(define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
3103 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
3104 (and:<VPRED>
3105 (unspec:<VPRED>
3106 [(match_operand:<VPRED> 1)
3107 (SVE_FP_CMP
3108 (match_operand:SVE_F 2 "register_operand" "w, w")
3109 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
3110 UNSPEC_MERGE_PTRUE)
3111 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
43cacb12 3112 "TARGET_SVE"
915d28fe
RS
3113 "#"
3114 "&& 1"
3115 [(set (match_dup 0)
3116 (and:<VPRED>
3117 (SVE_FP_CMP:<VPRED>
3118 (match_dup 2)
3119 (match_dup 3))
3120 (match_dup 4)))]
43cacb12
RS
3121)
3122
915d28fe
RS
3123;; Same for unordered comparisons.
3124(define_insn_and_split "*fcmuo<mode>_and_combine"
3125 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
3126 (and:<VPRED>
3127 (unspec:<VPRED>
3128 [(match_operand:<VPRED> 1)
3129 (unordered
3130 (match_operand:SVE_F 2 "register_operand" "w")
3131 (match_operand:SVE_F 3 "register_operand" "w"))]
3132 UNSPEC_MERGE_PTRUE)
3133 (match_operand:<VPRED> 4 "register_operand" "Upl")))]
43cacb12 3134 "TARGET_SVE"
915d28fe
RS
3135 "#"
3136 "&& 1"
3137 [(set (match_dup 0)
3138 (and:<VPRED>
3139 (unordered:<VPRED>
3140 (match_dup 2)
3141 (match_dup 3))
3142 (match_dup 4)))]
43cacb12
RS
3143)
3144
915d28fe
RS
3145;; Unpredicated floating-point comparisons, with the results ANDed with
3146;; another predicate. This is a valid fold for the same reasons as above.
3147(define_insn "*fcm<cmp_op><mode>_and"
3148 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
3149 (and:<VPRED>
3150 (SVE_FP_CMP:<VPRED>
3151 (match_operand:SVE_F 2 "register_operand" "w, w")
3152 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))
3153 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))]
43cacb12
RS
3154 "TARGET_SVE"
3155 "@
915d28fe
RS
3156 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
3157 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
43cacb12
RS
3158)
3159
915d28fe
RS
3160;; Same for unordered comparisons.
3161(define_insn "*fcmuo<mode>_and"
3162 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
3163 (and:<VPRED>
3164 (unordered:<VPRED>
3165 (match_operand:SVE_F 2 "register_operand" "w")
3166 (match_operand:SVE_F 3 "register_operand" "w"))
3167 (match_operand:<VPRED> 1 "register_operand" "Upl")))]
43cacb12 3168 "TARGET_SVE"
915d28fe 3169 "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
43cacb12
RS
3170)
3171
915d28fe
RS
3172;; Predicated floating-point comparisons. We don't need a version
3173;; of this for unordered comparisons.
3174(define_insn "*pred_fcm<cmp_op><mode>"
3175 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
3176 (unspec:<VPRED>
3177 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3178 (match_operand:SVE_F 2 "register_operand" "w, w")
3179 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
3180 SVE_COND_FP_CMP))]
43cacb12
RS
3181 "TARGET_SVE"
3182 "@
915d28fe
RS
3183 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
3184 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
43cacb12
RS
3185)
3186
915d28fe
RS
3187;; -------------------------------------------------------------------------
3188;; ---- [PRED] Test bits
3189;; -------------------------------------------------------------------------
3190;; Includes:
3191;; - PTEST
3192;; -------------------------------------------------------------------------
3193
3194;; Branch based on predicate equality or inequality.
3195(define_expand "cbranch<mode>4"
3196 [(set (pc)
3197 (if_then_else
3198 (match_operator 0 "aarch64_equality_operator"
3199 [(match_operand:PRED_ALL 1 "register_operand")
3200 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
3201 (label_ref (match_operand 3 ""))
3202 (pc)))]
3203 ""
43cacb12 3204 {
34467289
RS
3205 rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
3206 rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
3207 rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
915d28fe
RS
3208 rtx pred;
3209 if (operands[2] == CONST0_RTX (<MODE>mode))
3210 pred = operands[1];
3211 else
3212 {
3213 pred = gen_reg_rtx (<MODE>mode);
34467289
RS
3214 emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
3215 operands[2]));
915d28fe 3216 }
34467289 3217 emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
915d28fe
RS
3218 operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
3219 operands[2] = const0_rtx;
43cacb12
RS
3220 }
3221)
3222
34467289
RS
3223;; See "Description of UNSPEC_PTEST" above for details.
3224(define_insn "aarch64_ptest<mode>"
915d28fe 3225 [(set (reg:CC_NZC CC_REGNUM)
34467289
RS
3226 (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
3227 (match_operand 1)
3228 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
3229 (match_operand:PRED_ALL 3 "register_operand" "Upa")]
3230 UNSPEC_PTEST))]
43cacb12 3231 "TARGET_SVE"
34467289 3232 "ptest\t%0, %3.b"
43cacb12
RS
3233)
3234
915d28fe
RS
3235;; =========================================================================
3236;; == Reductions
3237;; =========================================================================
3238
3239;; -------------------------------------------------------------------------
3240;; ---- [INT,FP] Conditional reductions
3241;; -------------------------------------------------------------------------
3242;; Includes:
3243;; - CLASTB
3244;; -------------------------------------------------------------------------
3245
3246;; Set operand 0 to the last active element in operand 3, or to tied
3247;; operand 1 if no elements are active.
3248(define_insn "fold_extract_last_<mode>"
801790b3 3249 [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
915d28fe
RS
3250 (unspec:<VEL>
3251 [(match_operand:<VEL> 1 "register_operand" "0, 0")
3252 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
3253 (match_operand:SVE_ALL 3 "register_operand" "w, w")]
3254 UNSPEC_CLASTB))]
3db85990 3255 "TARGET_SVE"
915d28fe
RS
3256 "@
3257 clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
801790b3 3258 clastb\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
3db85990
ST
3259)
3260
915d28fe
RS
3261;; -------------------------------------------------------------------------
3262;; ---- [INT] Tree reductions
3263;; -------------------------------------------------------------------------
3264;; Includes:
3265;; - ANDV
3266;; - EORV
3267;; - ORV
3268;; - SMAXV
3269;; - SMINV
3270;; - UADDV
3271;; - UMAXV
3272;; - UMINV
3273;; -------------------------------------------------------------------------
3274
3275;; Unpredicated integer add reduction.
3276(define_expand "reduc_plus_scal_<mode>"
3277 [(set (match_operand:<VEL> 0 "register_operand")
3278 (unspec:<VEL> [(match_dup 2)
3279 (match_operand:SVE_I 1 "register_operand")]
3280 UNSPEC_ADDV))]
43cacb12
RS
3281 "TARGET_SVE"
3282 {
16de3637 3283 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
3284 }
3285)
3286
915d28fe
RS
3287;; Predicated integer add reduction. The result is always 64-bits.
3288(define_insn "*reduc_plus_scal_<mode>"
3289 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3290 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
3291 (match_operand:SVE_I 2 "register_operand" "w")]
3292 UNSPEC_ADDV))]
43cacb12 3293 "TARGET_SVE"
915d28fe 3294 "uaddv\t%d0, %1, %2.<Vetype>"
43cacb12
RS
3295)
3296
b0760a40 3297;; Unpredicated integer reductions.
915d28fe
RS
3298(define_expand "reduc_<optab>_scal_<mode>"
3299 [(set (match_operand:<VEL> 0 "register_operand")
3300 (unspec:<VEL> [(match_dup 2)
3301 (match_operand:SVE_I 1 "register_operand")]
b0760a40 3302 SVE_INT_REDUCTION))]
43cacb12 3303 "TARGET_SVE"
915d28fe
RS
3304 {
3305 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3306 }
43cacb12
RS
3307)
3308
b0760a40 3309;; Predicated integer reductions.
915d28fe
RS
3310(define_insn "*reduc_<optab>_scal_<mode>"
3311 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3312 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
3313 (match_operand:SVE_I 2 "register_operand" "w")]
b0760a40 3314 SVE_INT_REDUCTION))]
43cacb12 3315 "TARGET_SVE"
b0760a40 3316 "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>"
43cacb12
RS
3317)
3318
915d28fe
RS
3319;; -------------------------------------------------------------------------
3320;; ---- [FP] Tree reductions
3321;; -------------------------------------------------------------------------
3322;; Includes:
3323;; - FADDV
3324;; - FMAXNMV
3325;; - FMAXV
3326;; - FMINNMV
3327;; - FMINV
3328;; -------------------------------------------------------------------------
3329
b0760a40
RS
3330;; Unpredicated floating-point tree reductions.
3331(define_expand "reduc_<optab>_scal_<mode>"
915d28fe
RS
3332 [(set (match_operand:<VEL> 0 "register_operand")
3333 (unspec:<VEL> [(match_dup 2)
3334 (match_operand:SVE_F 1 "register_operand")]
b0760a40 3335 SVE_FP_REDUCTION))]
43cacb12 3336 "TARGET_SVE"
915d28fe
RS
3337 {
3338 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3339 }
43cacb12
RS
3340)
3341
b0760a40
RS
3342;; Predicated floating-point tree reductions.
3343(define_insn "*reduc_<optab>_scal_<mode>"
915d28fe
RS
3344 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3345 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
3346 (match_operand:SVE_F 2 "register_operand" "w")]
b0760a40 3347 SVE_FP_REDUCTION))]
43cacb12 3348 "TARGET_SVE"
b0760a40 3349 "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>"
43cacb12
RS
3350)
3351
915d28fe
RS
3352;; -------------------------------------------------------------------------
3353;; ---- [FP] Left-to-right reductions
3354;; -------------------------------------------------------------------------
3355;; Includes:
3356;; - FADDA
3357;; -------------------------------------------------------------------------
3358
3359;; Unpredicated in-order FP reductions.
3360(define_expand "fold_left_plus_<mode>"
3361 [(set (match_operand:<VEL> 0 "register_operand")
3362 (unspec:<VEL> [(match_dup 3)
3363 (match_operand:<VEL> 1 "register_operand")
3364 (match_operand:SVE_F 2 "register_operand")]
3365 UNSPEC_FADDA))]
43cacb12 3366 "TARGET_SVE"
915d28fe
RS
3367 {
3368 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3369 }
43cacb12
RS
3370)
3371
915d28fe
RS
3372;; Predicated in-order FP reductions.
3373(define_insn "mask_fold_left_plus_<mode>"
3374 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3375 (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
3376 (match_operand:<VEL> 1 "register_operand" "0")
3377 (match_operand:SVE_F 2 "register_operand" "w")]
3378 UNSPEC_FADDA))]
43cacb12 3379 "TARGET_SVE"
915d28fe 3380 "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
43cacb12
RS
3381)
3382
915d28fe
RS
3383;; =========================================================================
3384;; == Permutes
3385;; =========================================================================
3386
3387;; -------------------------------------------------------------------------
3388;; ---- [INT,FP] General permutes
3389;; -------------------------------------------------------------------------
3390;; Includes:
3391;; - TBL
3392;; -------------------------------------------------------------------------
3393
3394(define_expand "vec_perm<mode>"
3395 [(match_operand:SVE_ALL 0 "register_operand")
3396 (match_operand:SVE_ALL 1 "register_operand")
3397 (match_operand:SVE_ALL 2 "register_operand")
3398 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
3399 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
9bfb28ed 3400 {
915d28fe
RS
3401 aarch64_expand_sve_vec_perm (operands[0], operands[1],
3402 operands[2], operands[3]);
9bfb28ed
RS
3403 DONE;
3404 }
3405)
3406
915d28fe
RS
3407(define_insn "*aarch64_sve_tbl<mode>"
3408 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
3409 (unspec:SVE_ALL
3410 [(match_operand:SVE_ALL 1 "register_operand" "w")
3411 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
3412 UNSPEC_TBL))]
43cacb12 3413 "TARGET_SVE"
915d28fe 3414 "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
43cacb12
RS
3415)
3416
915d28fe
RS
3417;; -------------------------------------------------------------------------
3418;; ---- [INT,FP] Special-purpose unary permutes
3419;; -------------------------------------------------------------------------
3420;; Includes:
3421;; - DUP
3422;; - REV
3423;; - REVB
3424;; - REVH
3425;; - REVW
3426;; -------------------------------------------------------------------------
3427
3428;; Duplicate one element of a vector.
3429(define_insn "*aarch64_sve_dup_lane<mode>"
3430 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
3431 (vec_duplicate:SVE_ALL
3432 (vec_select:<VEL>
3433 (match_operand:SVE_ALL 1 "register_operand" "w")
3434 (parallel [(match_operand:SI 2 "const_int_operand")]))))]
3435 "TARGET_SVE
3436 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
3437 "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
3438)
3439
3440;; Reverse the order of elements within a full vector.
3441(define_insn "@aarch64_sve_rev<mode>"
3442 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
3443 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")]
3444 UNSPEC_REV))]
9bfb28ed 3445 "TARGET_SVE"
915d28fe
RS
3446 "rev\t%0.<Vetype>, %1.<Vetype>")
3447
3448;; Reverse the order elements within a 64-bit container.
3449(define_insn "*aarch64_sve_rev64<mode>"
3450 [(set (match_operand:SVE_BHS 0 "register_operand" "=w")
3451 (unspec:SVE_BHS
3452 [(match_operand:VNx2BI 1 "register_operand" "Upl")
3453 (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
3454 UNSPEC_REV64)]
3455 UNSPEC_MERGE_PTRUE))]
3456 "TARGET_SVE"
3457 "rev<Vesize>\t%0.d, %1/m, %2.d"
9bfb28ed
RS
3458)
3459
915d28fe
RS
3460;; Reverse the order elements within a 32-bit container.
3461(define_insn "*aarch64_sve_rev32<mode>"
3462 [(set (match_operand:SVE_BH 0 "register_operand" "=w")
3463 (unspec:SVE_BH
3464 [(match_operand:VNx4BI 1 "register_operand" "Upl")
3465 (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
3466 UNSPEC_REV32)]
3467 UNSPEC_MERGE_PTRUE))]
43cacb12 3468 "TARGET_SVE"
915d28fe 3469 "rev<Vesize>\t%0.s, %1/m, %2.s"
43cacb12
RS
3470)
3471
915d28fe
RS
3472;; Reverse the order elements within a 16-bit container.
3473(define_insn "*aarch64_sve_rev16vnx16qi"
3474 [(set (match_operand:VNx16QI 0 "register_operand" "=w")
3475 (unspec:VNx16QI
3476 [(match_operand:VNx8BI 1 "register_operand" "Upl")
3477 (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
3478 UNSPEC_REV16)]
3479 UNSPEC_MERGE_PTRUE))]
43cacb12 3480 "TARGET_SVE"
915d28fe 3481 "revb\t%0.h, %1/m, %2.h"
43cacb12
RS
3482)
3483
915d28fe
RS
3484;; -------------------------------------------------------------------------
3485;; ---- [INT,FP] Special-purpose binary permutes
3486;; -------------------------------------------------------------------------
3487;; Includes:
3488;; - TRN1
3489;; - TRN2
3490;; - UZP1
3491;; - UZP2
3492;; - ZIP1
3493;; - ZIP2
3494;; -------------------------------------------------------------------------
3495
3496;; Permutes that take half the elements from one vector and half the
3497;; elements from the other.
3e2751ce 3498(define_insn "aarch64_sve_<perm_insn><mode>"
915d28fe
RS
3499 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
3500 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")
3501 (match_operand:SVE_ALL 2 "register_operand" "w")]
3502 PERMUTE))]
9bfb28ed 3503 "TARGET_SVE"
3e2751ce 3504 "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
915d28fe
RS
3505)
3506
3507;; Concatenate two vectors and extract a subvector. Note that the
3508;; immediate (third) operand is the lane index not the byte index.
3509(define_insn "*aarch64_sve_ext<mode>"
3510 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
3511 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0")
3512 (match_operand:SVE_ALL 2 "register_operand" "w")
3513 (match_operand:SI 3 "const_int_operand")]
3514 UNSPEC_EXT))]
3515 "TARGET_SVE
3516 && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
9bfb28ed 3517 {
915d28fe
RS
3518 operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
3519 return "ext\\t%0.b, %0.b, %2.b, #%3";
43cacb12
RS
3520 }
3521)
3522
915d28fe
RS
3523;; -------------------------------------------------------------------------
3524;; ---- [PRED] Special-purpose binary permutes
3525;; -------------------------------------------------------------------------
3526;; Includes:
3527;; - TRN1
3528;; - TRN2
3529;; - UZP1
3530;; - UZP2
3531;; - ZIP1
3532;; - ZIP2
3533;; -------------------------------------------------------------------------
3534
3535;; Permutes that take half the elements from one vector and half the
3536;; elements from the other.
3e2751ce 3537(define_insn "*aarch64_sve_<perm_insn><mode>"
915d28fe
RS
3538 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3539 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
3540 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
3541 PERMUTE))]
43cacb12 3542 "TARGET_SVE"
3e2751ce 3543 "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
43cacb12
RS
3544)
3545
915d28fe
RS
3546;; =========================================================================
3547;; == Conversions
3548;; =========================================================================
3549
3550;; -------------------------------------------------------------------------
3551;; ---- [INT<-INT] Packs
3552;; -------------------------------------------------------------------------
3553;; Includes:
3554;; - UZP1
3555;; -------------------------------------------------------------------------
3556
43cacb12
RS
3557;; Integer pack. Use UZP1 on the narrower type, which discards
3558;; the high part of each wide element.
3559(define_insn "vec_pack_trunc_<Vwide>"
3560 [(set (match_operand:SVE_BHSI 0 "register_operand" "=w")
3561 (unspec:SVE_BHSI
3562 [(match_operand:<VWIDE> 1 "register_operand" "w")
3563 (match_operand:<VWIDE> 2 "register_operand" "w")]
3564 UNSPEC_PACK))]
3565 "TARGET_SVE"
3566 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3567)
3568
915d28fe
RS
3569;; -------------------------------------------------------------------------
3570;; ---- [INT<-INT] Unpacks
3571;; -------------------------------------------------------------------------
3572;; Includes:
3573;; - SUNPKHI
3574;; - SUNPKLO
3575;; - UUNPKHI
3576;; - UUNPKLO
3577;; -------------------------------------------------------------------------
3578
3579;; Unpack the low or high half of a vector, where "high" refers to
3580;; the low-numbered lanes for big-endian and the high-numbered lanes
3581;; for little-endian.
3582(define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
3583 [(match_operand:<VWIDE> 0 "register_operand")
3584 (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)]
43cacb12
RS
3585 "TARGET_SVE"
3586 {
915d28fe
RS
3587 emit_insn ((<hi_lanes_optab>
3588 ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode>
3589 : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>)
3590 (operands[0], operands[1]));
3591 DONE;
3592 }
3593)
3594
3595(define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>"
3596 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3597 (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")]
3598 UNPACK))]
3599 "TARGET_SVE"
3600 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
3601)
3602
3603;; -------------------------------------------------------------------------
3604;; ---- [INT<-FP] Conversions
3605;; -------------------------------------------------------------------------
3606;; Includes:
3607;; - FCVTZS
3608;; - FCVTZU
3609;; -------------------------------------------------------------------------
3610
3611;; Unpredicated conversion of floats to integers of the same size (HF to HI,
3612;; SF to SI or DF to DI).
3613(define_expand "<fix_trunc_optab><mode><v_int_equiv>2"
3614 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
3615 (unspec:<V_INT_EQUIV>
3616 [(match_dup 2)
3617 (FIXUORS:<V_INT_EQUIV>
3618 (match_operand:SVE_F 1 "register_operand"))]
3619 UNSPEC_MERGE_PTRUE))]
3620 "TARGET_SVE"
3621 {
3622 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
3623 }
3624)
3625
915d28fe
RS
3626;; Conversion of SF to DI, SI or HI, predicated with a PTRUE.
3627(define_insn "*<fix_trunc_optab>v16hsf<mode>2"
3628 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
3629 (unspec:SVE_HSDI
3630 [(match_operand:<VPRED> 1 "register_operand" "Upl")
3631 (FIXUORS:SVE_HSDI
3632 (match_operand:VNx8HF 2 "register_operand" "w"))]
3633 UNSPEC_MERGE_PTRUE))]
3634 "TARGET_SVE"
3635 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h"
3636)
3637
3638;; Conversion of SF to DI or SI, predicated with a PTRUE.
3639(define_insn "*<fix_trunc_optab>vnx4sf<mode>2"
3640 [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
3641 (unspec:SVE_SDI
3642 [(match_operand:<VPRED> 1 "register_operand" "Upl")
3643 (FIXUORS:SVE_SDI
3644 (match_operand:VNx4SF 2 "register_operand" "w"))]
3645 UNSPEC_MERGE_PTRUE))]
3646 "TARGET_SVE"
3647 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s"
3648)
3649
3650;; Conversion of DF to DI or SI, predicated with a PTRUE.
3651(define_insn "*<fix_trunc_optab>vnx2df<mode>2"
3652 [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
3653 (unspec:SVE_SDI
3654 [(match_operand:VNx2BI 1 "register_operand" "Upl")
3655 (FIXUORS:SVE_SDI
3656 (match_operand:VNx2DF 2 "register_operand" "w"))]
3657 UNSPEC_MERGE_PTRUE))]
3658 "TARGET_SVE"
3659 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d"
3660)
3661
3662;; -------------------------------------------------------------------------
3663;; ---- [INT<-FP] Packs
3664;; -------------------------------------------------------------------------
3665;; The patterns in this section are synthetic.
3666;; -------------------------------------------------------------------------
3667
43cacb12
RS
3668;; Convert two vectors of DF to SI and pack the results into a single vector.
3669(define_expand "vec_pack_<su>fix_trunc_vnx2df"
3670 [(set (match_dup 4)
3671 (unspec:VNx4SI
3672 [(match_dup 3)
3673 (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))]
3674 UNSPEC_MERGE_PTRUE))
3675 (set (match_dup 5)
3676 (unspec:VNx4SI
3677 [(match_dup 3)
3678 (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))]
3679 UNSPEC_MERGE_PTRUE))
3680 (set (match_operand:VNx4SI 0 "register_operand")
3681 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
3682 "TARGET_SVE"
3683 {
16de3637 3684 operands[3] = aarch64_ptrue_reg (VNx2BImode);
43cacb12
RS
3685 operands[4] = gen_reg_rtx (VNx4SImode);
3686 operands[5] = gen_reg_rtx (VNx4SImode);
3687 }
3688)
f1739b48 3689
915d28fe
RS
3690;; -------------------------------------------------------------------------
3691;; ---- [INT<-FP] Unpacks
3692;; -------------------------------------------------------------------------
3693;; No patterns here yet!
3694;; -------------------------------------------------------------------------
9d4ac06e 3695
915d28fe
RS
3696;; -------------------------------------------------------------------------
3697;; ---- [FP<-INT] Conversions
3698;; -------------------------------------------------------------------------
3699;; Includes:
3700;; - SCVTF
3701;; - UCVTF
3702;; -------------------------------------------------------------------------
a08acce8 3703
915d28fe
RS
3704;; Unpredicated conversion of integers to floats of the same size
3705;; (HI to HF, SI to SF or DI to DF).
3706(define_expand "<optab><v_int_equiv><mode>2"
3707 [(set (match_operand:SVE_F 0 "register_operand")
a08acce8 3708 (unspec:SVE_F
915d28fe
RS
3709 [(match_dup 2)
3710 (FLOATUORS:SVE_F
3711 (match_operand:<V_INT_EQUIV> 1 "register_operand"))]
3712 UNSPEC_MERGE_PTRUE))]
a08acce8 3713 "TARGET_SVE"
f4fde1b3 3714 {
915d28fe 3715 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
f4fde1b3 3716 }
b41d1f6e
RS
3717)
3718
915d28fe
RS
3719;; Conversion of DI, SI or HI to the same number of HFs, predicated
3720;; with a PTRUE.
3721(define_insn "*<optab><mode>vnx8hf2"
3722 [(set (match_operand:VNx8HF 0 "register_operand" "=w")
3723 (unspec:VNx8HF
3724 [(match_operand:<VPRED> 1 "register_operand" "Upl")
3725 (FLOATUORS:VNx8HF
3726 (match_operand:SVE_HSDI 2 "register_operand" "w"))]
3727 UNSPEC_MERGE_PTRUE))]
3728 "TARGET_SVE"
3729 "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>"
b41d1f6e
RS
3730)
3731
915d28fe
RS
3732;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE.
3733(define_insn "*<optab><mode>vnx4sf2"
3734 [(set (match_operand:VNx4SF 0 "register_operand" "=w")
3735 (unspec:VNx4SF
3736 [(match_operand:<VPRED> 1 "register_operand" "Upl")
3737 (FLOATUORS:VNx4SF
3738 (match_operand:SVE_SDI 2 "register_operand" "w"))]
3739 UNSPEC_MERGE_PTRUE))]
f1739b48 3740 "TARGET_SVE"
915d28fe 3741 "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>"
f1739b48 3742)
6c9c7b73 3743
915d28fe
RS
3744;; Conversion of DI or SI to DF, predicated with a PTRUE.
3745(define_insn "aarch64_sve_<optab><mode>vnx2df2"
3746 [(set (match_operand:VNx2DF 0 "register_operand" "=w")
3747 (unspec:VNx2DF
3748 [(match_operand:VNx2BI 1 "register_operand" "Upl")
3749 (FLOATUORS:VNx2DF
3750 (match_operand:SVE_SDI 2 "register_operand" "w"))]
3751 UNSPEC_MERGE_PTRUE))]
6c9c7b73 3752 "TARGET_SVE"
915d28fe
RS
3753 "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>"
3754)
6c9c7b73 3755
915d28fe
RS
3756;; -------------------------------------------------------------------------
3757;; ---- [FP<-INT] Packs
3758;; -------------------------------------------------------------------------
3759;; No patterns here yet!
3760;; -------------------------------------------------------------------------
6c9c7b73 3761
915d28fe
RS
3762;; -------------------------------------------------------------------------
3763;; ---- [FP<-INT] Unpacks
3764;; -------------------------------------------------------------------------
3765;; The patterns in this section are synthetic.
3766;; -------------------------------------------------------------------------
3767
3768;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI
3769;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
3770;; unpacked VNx4SI to VNx2DF.
3771(define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
3772 [(match_operand:VNx2DF 0 "register_operand")
3773 (FLOATUORS:VNx2DF
3774 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
3775 UNPACK_UNSIGNED))]
3776 "TARGET_SVE"
3777 {
3778 /* Use ZIP to do the unpack, since we don't care about the upper halves
3779 and since it has the nice property of not needing any subregs.
3780 If using UUNPK* turns out to be preferable, we could model it as
3781 a ZIP whose first operand is zero. */
3782 rtx temp = gen_reg_rtx (VNx4SImode);
3783 emit_insn ((<hi_lanes_optab>
3784 ? gen_aarch64_sve_zip2vnx4si
3785 : gen_aarch64_sve_zip1vnx4si)
3786 (temp, operands[1], operands[1]));
3787 rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
3788 emit_insn (gen_aarch64_sve_<FLOATUORS:optab>vnx4sivnx2df2 (operands[0],
3789 ptrue, temp));
6c9c7b73
AM
3790 DONE;
3791 }
3792)
3793
915d28fe
RS
3794;; -------------------------------------------------------------------------
3795;; ---- [FP<-FP] Packs
3796;; -------------------------------------------------------------------------
3797;; Includes:
3798;; - FCVT
3799;; -------------------------------------------------------------------------
3800
3801;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
3802;; the results into a single vector.
3803(define_expand "vec_pack_trunc_<Vwide>"
3804 [(set (match_dup 4)
3805 (unspec:SVE_HSF
3806 [(match_dup 3)
3807 (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")]
3808 UNSPEC_FLOAT_CONVERT)]
3809 UNSPEC_MERGE_PTRUE))
3810 (set (match_dup 5)
3811 (unspec:SVE_HSF
3812 [(match_dup 3)
3813 (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")]
3814 UNSPEC_FLOAT_CONVERT)]
3815 UNSPEC_MERGE_PTRUE))
3816 (set (match_operand:SVE_HSF 0 "register_operand")
3817 (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
6c9c7b73
AM
3818 "TARGET_SVE"
3819 {
915d28fe
RS
3820 operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
3821 operands[4] = gen_reg_rtx (<MODE>mode);
3822 operands[5] = gen_reg_rtx (<MODE>mode);
6c9c7b73
AM
3823 }
3824)
9feeafd7 3825
915d28fe
RS
3826;; Conversion of DFs to the same number of SFs, or SFs to the same number
3827;; of HFs.
3828(define_insn "*trunc<Vwide><mode>2"
3829 [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
3830 (unspec:SVE_HSF
3831 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
3832 (unspec:SVE_HSF
3833 [(match_operand:<VWIDE> 2 "register_operand" "w")]
3834 UNSPEC_FLOAT_CONVERT)]
3835 UNSPEC_MERGE_PTRUE))]
9feeafd7 3836 "TARGET_SVE"
915d28fe 3837 "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>"
9feeafd7 3838)
a9fad8fe 3839
915d28fe
RS
3840;; -------------------------------------------------------------------------
3841;; ---- [FP<-FP] Unpacks
3842;; -------------------------------------------------------------------------
3843;; Includes:
3844;; - FCVT
3845;; -------------------------------------------------------------------------
3846
3847;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
3848;; First unpack the source without conversion, then float-convert the
3849;; unpacked source.
3850(define_expand "vec_unpacks_<perm_hilo>_<mode>"
3851 [(match_operand:<VWIDE> 0 "register_operand")
3852 (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
3853 UNPACK_UNSIGNED)]
a9fad8fe
AM
3854 "TARGET_SVE"
3855 {
915d28fe
RS
3856 /* Use ZIP to do the unpack, since we don't care about the upper halves
3857 and since it has the nice property of not needing any subregs.
3858 If using UUNPK* turns out to be preferable, we could model it as
3859 a ZIP whose first operand is zero. */
3860 rtx temp = gen_reg_rtx (<MODE>mode);
3861 emit_insn ((<hi_lanes_optab>
3862 ? gen_aarch64_sve_zip2<mode>
3863 : gen_aarch64_sve_zip1<mode>)
3864 (temp, operands[1], operands[1]));
3865 rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
3866 emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0],
3867 ptrue, temp));
a9fad8fe
AM
3868 DONE;
3869 }
3870)
3871
915d28fe
RS
3872;; Conversion of SFs to the same number of DFs, or HFs to the same number
3873;; of SFs.
3874(define_insn "aarch64_sve_extend<mode><Vwide>2"
3875 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3876 (unspec:<VWIDE>
3877 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
3878 (unspec:<VWIDE>
3879 [(match_operand:SVE_HSF 2 "register_operand" "w")]
3880 UNSPEC_FLOAT_CONVERT)]
a9fad8fe
AM
3881 UNSPEC_MERGE_PTRUE))]
3882 "TARGET_SVE"
915d28fe 3883 "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>"
a9fad8fe
AM
3884)
3885
915d28fe
RS
3886;; -------------------------------------------------------------------------
3887;; ---- [PRED<-PRED] Packs
3888;; -------------------------------------------------------------------------
3889;; Includes:
3890;; - UZP1
3891;; -------------------------------------------------------------------------
a9fad8fe 3892
915d28fe
RS
3893;; Predicate pack. Use UZP1 on the narrower type, which discards
3894;; the high part of each wide element.
3895(define_insn "vec_pack_trunc_<Vwide>"
3896 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
3897 (unspec:PRED_BHS
3898 [(match_operand:<VWIDE> 1 "register_operand" "Upa")
3899 (match_operand:<VWIDE> 2 "register_operand" "Upa")]
3900 UNSPEC_PACK))]
a9fad8fe 3901 "TARGET_SVE"
915d28fe 3902 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
a9fad8fe 3903)
3a0afad0 3904
915d28fe
RS
3905;; -------------------------------------------------------------------------
3906;; ---- [PRED<-PRED] Unpacks
3907;; -------------------------------------------------------------------------
3908;; Includes:
3909;; - PUNPKHI
3910;; - PUNPKLO
3911;; -------------------------------------------------------------------------
3912
3913;; Unpack the low or high half of a predicate, where "high" refers to
3914;; the low-numbered lanes for big-endian and the high-numbered lanes
3915;; for little-endian.
3916(define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
3917 [(match_operand:<VWIDE> 0 "register_operand")
3918 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
3919 UNPACK)]
3a0afad0
PK
3920 "TARGET_SVE"
3921 {
915d28fe
RS
3922 emit_insn ((<hi_lanes_optab>
3923 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
3924 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
3925 (operands[0], operands[1]));
3a0afad0
PK
3926 DONE;
3927 }
3928)
915d28fe
RS
3929
3930(define_insn "aarch64_sve_punpk<perm_hilo>_<mode>"
3931 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
3932 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
3933 UNPACK_UNSIGNED))]
3934 "TARGET_SVE"
3935 "punpk<perm_hilo>\t%0.h, %1.b"
3936)