1 ;; Machine description for AArch64 SVE.
2 ;; Copyright (C) 2009-2019 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; Note on the handling of big-endian SVE
22 ;; --------------------------------------
24 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
25 ;; same way as movdi or movti would: the first byte of memory goes
26 ;; into the most significant byte of the register and the last byte
27 ;; of memory goes into the least significant byte of the register.
28 ;; This is the most natural ordering for Advanced SIMD and matches
29 ;; the ABI layout for 64-bit and 128-bit vector types.
31 ;; As a result, the order of bytes within the register is what GCC
32 ;; expects for a big-endian target, and subreg offsets therefore work
33 ;; as expected, with the first element in memory having subreg offset 0
34 ;; and the last element in memory having the subreg offset associated
35 ;; with a big-endian lowpart. However, this ordering also means that
36 ;; GCC's lane numbering does not match the architecture's numbering:
37 ;; GCC always treats the element at the lowest address in memory
38 ;; (subreg offset 0) as element 0, while the architecture treats
39 ;; the least significant end of the register as element 0.
41 ;; The situation for SVE is different. We want the layout of the
42 ;; SVE register to be same for mov<mode> as it is for maskload<mode>:
43 ;; logically, a mov<mode> load must be indistinguishable from a
44 ;; maskload<mode> whose mask is all true. We therefore need the
45 ;; register layout to match LD1 rather than LDR. The ABI layout of
46 ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
48 ;; As a result, the architecture lane numbering matches GCC's lane
49 ;; numbering, with element 0 always being the first in memory.
52 ;; - Applying a subreg offset to a register does not give the element
53 ;; that GCC expects: the first element in memory has the subreg offset
54 ;; associated with a big-endian lowpart while the last element in memory
55 ;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
57 ;; - We cannot use LDR and STR for spill slots that might be accessed
58 ;; via subregs, since although the elements have the order GCC expects,
59 ;; the order of the bytes within the elements is different. We instead
60 ;; access spill slots via LD1 and ST1, using secondary reloads to
61 ;; reserve a predicate register.
65 (define_expand "mov<mode>"
66 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
67 (match_operand:SVE_ALL 1 "general_operand"))]
70 /* Use the predicated load and store patterns where possible.
71 This is required for big-endian targets (see the comment at the
72 head of the file) and increases the addressing choices for
74 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
75 && can_create_pseudo_p ())
77 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
81 if (CONSTANT_P (operands[1]))
83 aarch64_expand_mov_immediate (operands[0], operands[1],
84 gen_vec_duplicate<mode>);
88 /* Optimize subregs on big-endian targets: we can use REV[BHW]
89 instead of going through memory. */
91 && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
96 ;; A pattern for optimizing SUBREGs that have a reinterpreting effect
97 ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
98 ;; for details. We use a special predicate for operand 2 to reduce
99 ;; the number of patterns.
100 (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
101 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
103 [(match_operand:VNx16BI 1 "register_operand" "Upl")
104 (match_operand 2 "aarch64_any_register_operand" "w")]
106 "TARGET_SVE && BYTES_BIG_ENDIAN"
108 "&& reload_completed"
111 aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
116 ;; Unpredicated moves (little-endian). Only allow memory operations
117 ;; during and after RA; before RA we want the predicated load and
118 ;; store patterns to be used instead.
119 (define_insn "*aarch64_sve_mov<mode>_le"
120 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
121 (match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
124 && ((lra_in_progress || reload_completed)
125 || (register_operand (operands[0], <MODE>mode)
126 && nonmemory_operand (operands[1], <MODE>mode)))"
131 * return aarch64_output_sve_mov_immediate (operands[1]);"
134 ;; Unpredicated moves (big-endian). Memory accesses require secondary
136 (define_insn "*aarch64_sve_mov<mode>_be"
137 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
138 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
139 "TARGET_SVE && BYTES_BIG_ENDIAN"
142 * return aarch64_output_sve_mov_immediate (operands[1]);"
145 ;; Handle big-endian memory reloads. We use byte PTRUE for all modes
146 ;; to try to encourage reuse.
147 (define_expand "aarch64_sve_reload_be"
149 [(set (match_operand 0)
151 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
152 "TARGET_SVE && BYTES_BIG_ENDIAN"
154 /* Create a PTRUE. */
155 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
157 /* Refer to the PTRUE in the appropriate mode for this move. */
158 machine_mode mode = GET_MODE (operands[0]);
159 machine_mode pred_mode
160 = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require ();
161 rtx pred = gen_lowpart (pred_mode, operands[2]);
163 /* Emit a predicated load or store. */
164 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
169 ;; A predicated load or store for which the predicate is known to be
170 ;; all-true. Note that this pattern is generated directly by
171 ;; aarch64_emit_sve_pred_move, so changes to this pattern will
172 ;; need changes there as well.
173 (define_insn_and_split "@aarch64_pred_mov<mode>"
174 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
176 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
177 (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")]
178 UNSPEC_MERGE_PTRUE))]
180 && (register_operand (operands[0], <MODE>mode)
181 || register_operand (operands[2], <MODE>mode))"
184 ld1<Vesize>\t%0.<Vetype>, %1/z, %2
185 st1<Vesize>\t%2.<Vetype>, %1, %0"
186 "&& register_operand (operands[0], <MODE>mode)
187 && register_operand (operands[2], <MODE>mode)"
188 [(set (match_dup 0) (match_dup 2))]
191 (define_expand "movmisalign<mode>"
192 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
193 (match_operand:SVE_ALL 1 "general_operand"))]
196 /* Equivalent to a normal move for our purpooses. */
197 emit_move_insn (operands[0], operands[1]);
202 (define_insn "maskload<mode><vpred>"
203 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
205 [(match_operand:<VPRED> 2 "register_operand" "Upl")
206 (match_operand:SVE_ALL 1 "memory_operand" "m")]
209 "ld1<Vesize>\t%0.<Vetype>, %2/z, %1"
212 (define_insn "maskstore<mode><vpred>"
213 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
214 (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl")
215 (match_operand:SVE_ALL 1 "register_operand" "w")
219 "st1<Vesize>\t%1.<Vetype>, %2, %0"
222 ;; Unpredicated gather loads.
223 (define_expand "gather_load<mode>"
224 [(set (match_operand:SVE_SD 0 "register_operand")
227 (match_operand:DI 1 "aarch64_reg_or_zero")
228 (match_operand:<V_INT_EQUIV> 2 "register_operand")
229 (match_operand:DI 3 "const_int_operand")
230 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
235 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
239 ;; Predicated gather loads for 32-bit elements. Operand 3 is true for
240 ;; unsigned extension and false for signed extension.
241 (define_insn "mask_gather_load<mode>"
242 [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w")
244 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
245 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
246 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w")
247 (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1")
248 (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
253 ld1w\t%0.s, %5/z, [%2.s]
254 ld1w\t%0.s, %5/z, [%1, %2.s, sxtw]
255 ld1w\t%0.s, %5/z, [%1, %2.s, uxtw]
256 ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
257 ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
260 ;; Predicated gather loads for 64-bit elements. The value of operand 3
261 ;; doesn't matter in this case.
262 (define_insn "mask_gather_load<mode>"
263 [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w")
265 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
266 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk")
267 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w")
268 (match_operand:DI 3 "const_int_operand")
269 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
274 ld1d\t%0.d, %5/z, [%2.d]
275 ld1d\t%0.d, %5/z, [%1, %2.d]
276 ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
279 ;; Unpredicated scatter store.
280 (define_expand "scatter_store<mode>"
281 [(set (mem:BLK (scratch))
284 (match_operand:DI 0 "aarch64_reg_or_zero")
285 (match_operand:<V_INT_EQUIV> 1 "register_operand")
286 (match_operand:DI 2 "const_int_operand")
287 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
288 (match_operand:SVE_SD 4 "register_operand")]
289 UNSPEC_ST1_SCATTER))]
292 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
296 ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
297 ;; unsigned extension and false for signed extension.
298 (define_insn "mask_scatter_store<mode>"
299 [(set (mem:BLK (scratch))
301 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
302 (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
303 (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w")
304 (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1")
305 (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
306 (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")]
307 UNSPEC_ST1_SCATTER))]
310 st1w\t%4.s, %5, [%1.s]
311 st1w\t%4.s, %5, [%0, %1.s, sxtw]
312 st1w\t%4.s, %5, [%0, %1.s, uxtw]
313 st1w\t%4.s, %5, [%0, %1.s, sxtw %p3]
314 st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]"
317 ;; Predicated scatter stores for 64-bit elements. The value of operand 2
318 ;; doesn't matter in this case.
319 (define_insn "mask_scatter_store<mode>"
320 [(set (mem:BLK (scratch))
322 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
323 (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk")
324 (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w")
325 (match_operand:DI 2 "const_int_operand")
326 (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
327 (match_operand:SVE_D 4 "register_operand" "w, w, w")]
328 UNSPEC_ST1_SCATTER))]
331 st1d\t%4.d, %5, [%1.d]
332 st1d\t%4.d, %5, [%0, %1.d]
333 st1d\t%4.d, %5, [%0, %1.d, lsl %p3]"
336 ;; SVE structure moves.
337 (define_expand "mov<mode>"
338 [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
339 (match_operand:SVE_STRUCT 1 "general_operand"))]
342 /* Big-endian loads and stores need to be done via LD1 and ST1;
343 see the comment at the head of the file for details. */
344 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
347 gcc_assert (can_create_pseudo_p ());
348 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
352 if (CONSTANT_P (operands[1]))
354 aarch64_expand_mov_immediate (operands[0], operands[1]);
360 ;; Unpredicated structure moves (little-endian).
361 (define_insn "*aarch64_sve_mov<mode>_le"
362 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
363 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
364 "TARGET_SVE && !BYTES_BIG_ENDIAN"
366 [(set_attr "length" "<insn_length>")]
369 ;; Unpredicated structure moves (big-endian). Memory accesses require
370 ;; secondary reloads.
371 (define_insn "*aarch64_sve_mov<mode>_le"
372 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
373 (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
374 "TARGET_SVE && BYTES_BIG_ENDIAN"
376 [(set_attr "length" "<insn_length>")]
379 ;; Split unpredicated structure moves into pieces. This is the same
380 ;; for both big-endian and little-endian code, although it only needs
381 ;; to handle memory operands for little-endian code.
383 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
384 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
385 "TARGET_SVE && reload_completed"
388 rtx dest = operands[0];
389 rtx src = operands[1];
390 if (REG_P (dest) && REG_P (src))
391 aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
393 for (unsigned int i = 0; i < <vector_count>; ++i)
395 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
396 i * BYTES_PER_SVE_VECTOR);
397 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
398 i * BYTES_PER_SVE_VECTOR);
399 emit_insn (gen_rtx_SET (subdest, subsrc));
405 ;; Predicated structure moves. This works for both endiannesses but in
406 ;; practice is only useful for big-endian.
407 (define_insn_and_split "@aarch64_pred_mov<mode>"
408 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
410 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
411 (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
412 UNSPEC_MERGE_PTRUE))]
414 && (register_operand (operands[0], <MODE>mode)
415 || register_operand (operands[2], <MODE>mode))"
417 "&& reload_completed"
420 for (unsigned int i = 0; i < <vector_count>; ++i)
422 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
424 i * BYTES_PER_SVE_VECTOR);
425 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
427 i * BYTES_PER_SVE_VECTOR);
428 aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
432 [(set_attr "length" "<insn_length>")]
435 (define_expand "mov<mode>"
436 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
437 (match_operand:PRED_ALL 1 "general_operand"))]
440 if (GET_CODE (operands[0]) == MEM)
441 operands[1] = force_reg (<MODE>mode, operands[1]);
445 (define_insn "*aarch64_sve_mov<mode>"
446 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa, Upa")
447 (match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dz, Dm"))]
449 && (register_operand (operands[0], <MODE>mode)
450 || register_operand (operands[1], <MODE>mode))"
456 * return aarch64_output_ptrue (<MODE>mode, '<Vetype>');"
459 ;; Handle extractions from a predicate by converting to an integer vector
460 ;; and extracting from there.
461 (define_expand "vec_extract<vpred><Vel>"
462 [(match_operand:<VEL> 0 "register_operand")
463 (match_operand:<VPRED> 1 "register_operand")
464 (match_operand:SI 2 "nonmemory_operand")
465 ;; Dummy operand to which we can attach the iterator.
466 (reg:SVE_I V0_REGNUM)]
469 rtx tmp = gen_reg_rtx (<MODE>mode);
470 emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1],
471 CONST1_RTX (<MODE>mode),
472 CONST0_RTX (<MODE>mode)));
473 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
478 (define_expand "vec_extract<mode><Vel>"
479 [(set (match_operand:<VEL> 0 "register_operand")
481 (match_operand:SVE_ALL 1 "register_operand")
482 (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
486 if (poly_int_rtx_p (operands[2], &val)
487 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
489 /* The last element can be extracted with a LASTB and a false
491 rtx sel = force_reg (<VPRED>mode, CONST0_RTX (<VPRED>mode));
492 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
495 if (!CONST_INT_P (operands[2]))
497 /* Create an index with operand[2] as the base and -1 as the step.
498 It will then be zero for the element we care about. */
499 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
500 index = force_reg (<VEL_INT>mode, index);
501 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
502 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
504 /* Get a predicate that is true for only that element. */
505 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
506 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
507 rtx sel = gen_reg_rtx (<VPRED>mode);
508 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
510 /* Select the element using LASTB. */
511 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
517 ;; Extract element zero. This is a special case because we want to force
518 ;; the registers to be the same for the second alternative, and then
519 ;; split the instruction into nothing after RA.
520 (define_insn_and_split "*vec_extract<mode><Vel>_0"
521 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
523 (match_operand:SVE_ALL 1 "register_operand" "w, 0, w")
524 (parallel [(const_int 0)])))]
527 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
528 switch (which_alternative)
531 return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
535 return "st1\\t{%1.<Vetype>}[0], %0";
541 && REG_P (operands[0])
542 && REGNO (operands[0]) == REGNO (operands[1])"
545 emit_note (NOTE_INSN_DELETED);
548 [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
551 ;; Extract an element from the Advanced SIMD portion of the register.
552 ;; We don't just reuse the aarch64-simd.md pattern because we don't
553 ;; want any change in lane number on big-endian targets.
554 (define_insn "*vec_extract<mode><Vel>_v128"
555 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
557 (match_operand:SVE_ALL 1 "register_operand" "w, w, w")
558 (parallel [(match_operand:SI 2 "const_int_operand")])))]
560 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
562 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
563 switch (which_alternative)
566 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
568 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
570 return "st1\\t{%1.<Vetype>}[%2], %0";
575 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
578 ;; Extract an element in the range of DUP. This pattern allows the
579 ;; source and destination to be different.
580 (define_insn "*vec_extract<mode><Vel>_dup"
581 [(set (match_operand:<VEL> 0 "register_operand" "=w")
583 (match_operand:SVE_ALL 1 "register_operand" "w")
584 (parallel [(match_operand:SI 2 "const_int_operand")])))]
586 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
588 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
589 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
593 ;; Extract an element outside the range of DUP. This pattern requires the
594 ;; source and destination to be the same.
595 (define_insn "*vec_extract<mode><Vel>_ext"
596 [(set (match_operand:<VEL> 0 "register_operand" "=w")
598 (match_operand:SVE_ALL 1 "register_operand" "0")
599 (parallel [(match_operand:SI 2 "const_int_operand")])))]
600 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
602 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
603 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
604 return "ext\t%0.b, %0.b, %0.b, #%2";
608 ;; Extract the last active element of operand 1 into operand 0.
609 ;; If no elements are active, extract the last inactive element instead.
610 (define_insn "extract_last_<mode>"
611 [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
613 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
614 (match_operand:SVE_ALL 2 "register_operand" "w, w")]
618 lastb\t%<vwcore>0, %1, %2.<Vetype>
619 lastb\t%<Vetype>0, %1, %2.<Vetype>"
622 (define_expand "vec_duplicate<mode>"
624 [(set (match_operand:SVE_ALL 0 "register_operand")
625 (vec_duplicate:SVE_ALL
626 (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
627 (clobber (scratch:<VPRED>))])]
630 if (MEM_P (operands[1]))
632 rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
633 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
634 CONST0_RTX (<MODE>mode)));
640 ;; Accept memory operands for the benefit of combine, and also in case
641 ;; the scalar input gets spilled to memory during RA. We want to split
642 ;; the load at the first opportunity in order to allow the PTRUE to be
643 ;; optimized with surrounding code.
644 (define_insn_and_split "*vec_duplicate<mode>_reg"
645 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
646 (vec_duplicate:SVE_ALL
647 (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
648 (clobber (match_scratch:<VPRED> 2 "=X, X, Upl"))]
651 mov\t%0.<Vetype>, %<vwcore>1
652 mov\t%0.<Vetype>, %<Vetype>1
654 "&& MEM_P (operands[1])"
657 if (GET_CODE (operands[2]) == SCRATCH)
658 operands[2] = gen_reg_rtx (<VPRED>mode);
659 emit_move_insn (operands[2], CONSTM1_RTX (<VPRED>mode));
660 emit_insn (gen_sve_ld1r<mode> (operands[0], operands[2], operands[1],
661 CONST0_RTX (<MODE>mode)));
664 [(set_attr "length" "4,4,8")]
667 ;; This is used for vec_duplicate<mode>s from memory, but can also
668 ;; be used by combine to optimize selects of a a vec_duplicate<mode>
670 (define_insn "sve_ld1r<mode>"
671 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
673 [(match_operand:<VPRED> 1 "register_operand" "Upl")
674 (vec_duplicate:SVE_ALL
675 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
676 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
679 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
682 ;; Load 128 bits from memory and duplicate to fill a vector. Since there
683 ;; are so few operations on 128-bit "elements", we don't define a VNx1TI
684 ;; and simply use vectors of bytes instead.
685 (define_insn "*sve_ld1rq<Vesize>"
686 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
688 [(match_operand:<VPRED> 1 "register_operand" "Upl")
689 (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")]
692 "ld1rq<Vesize>\t%0.<Vetype>, %1/z, %2"
695 ;; Implement a predicate broadcast by shifting the low bit of the scalar
696 ;; input into the top bit and using a WHILELO. An alternative would be to
697 ;; duplicate the input and do a compare with zero.
698 (define_expand "vec_duplicate<mode>"
699 [(set (match_operand:PRED_ALL 0 "register_operand")
700 (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))]
703 rtx tmp = gen_reg_rtx (DImode);
704 rtx op1 = gen_lowpart (DImode, operands[1]);
705 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
706 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
711 (define_insn "vec_series<mode>"
712 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
714 (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
715 (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
718 index\t%0.<Vetype>, #%1, %<vw>2
719 index\t%0.<Vetype>, %<vw>1, #%2
720 index\t%0.<Vetype>, %<vw>1, %<vw>2"
723 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
724 ;; of an INDEX instruction.
725 (define_insn "*vec_series<mode>_plus"
726 [(set (match_operand:SVE_I 0 "register_operand" "=w")
729 (match_operand:<VEL> 1 "register_operand" "r"))
730 (match_operand:SVE_I 2 "immediate_operand")))]
731 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
733 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
734 return "index\t%0.<Vetype>, %<vw>1, #%2";
738 ;; Unpredicated LD[234].
739 (define_expand "vec_load_lanes<mode><vsingle>"
740 [(set (match_operand:SVE_STRUCT 0 "register_operand")
743 (match_operand:SVE_STRUCT 1 "memory_operand")]
747 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
751 ;; Predicated LD[234].
752 (define_insn "vec_mask_load_lanes<mode><vsingle>"
753 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
755 [(match_operand:<VPRED> 2 "register_operand" "Upl")
756 (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
759 "ld<vector_count><Vesize>\t%0, %2/z, %1"
762 ;; Unpredicated ST[234]. This is always a full update, so the dependence
763 ;; on the old value of the memory location (via (match_dup 0)) is redundant.
764 ;; There doesn't seem to be any obvious benefit to treating the all-true
765 ;; case differently though. In particular, it's very unlikely that we'll
766 ;; only find out during RTL that a store_lanes is dead.
767 (define_expand "vec_store_lanes<mode><vsingle>"
768 [(set (match_operand:SVE_STRUCT 0 "memory_operand")
771 (match_operand:SVE_STRUCT 1 "register_operand")
776 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
780 ;; Predicated ST[234].
781 (define_insn "vec_mask_store_lanes<mode><vsingle>"
782 [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
784 [(match_operand:<VPRED> 2 "register_operand" "Upl")
785 (match_operand:SVE_STRUCT 1 "register_operand" "w")
789 "st<vector_count><Vesize>\t%1, %2, %0"
792 (define_expand "vec_perm<mode>"
793 [(match_operand:SVE_ALL 0 "register_operand")
794 (match_operand:SVE_ALL 1 "register_operand")
795 (match_operand:SVE_ALL 2 "register_operand")
796 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
797 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
799 aarch64_expand_sve_vec_perm (operands[0], operands[1],
800 operands[2], operands[3]);
805 (define_insn "*aarch64_sve_tbl<mode>"
806 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
808 [(match_operand:SVE_ALL 1 "register_operand" "w")
809 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
812 "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
815 (define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
816 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
817 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
818 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
821 "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
824 (define_insn "aarch64_sve_<perm_insn><perm_hilo><mode>"
825 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
826 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")
827 (match_operand:SVE_ALL 2 "register_operand" "w")]
830 "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
833 (define_insn "*aarch64_sve_rev64<mode>"
834 [(set (match_operand:SVE_BHS 0 "register_operand" "=w")
836 [(match_operand:VNx2BI 1 "register_operand" "Upl")
837 (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
839 UNSPEC_MERGE_PTRUE))]
841 "rev<Vesize>\t%0.d, %1/m, %2.d"
844 (define_insn "*aarch64_sve_rev32<mode>"
845 [(set (match_operand:SVE_BH 0 "register_operand" "=w")
847 [(match_operand:VNx4BI 1 "register_operand" "Upl")
848 (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
850 UNSPEC_MERGE_PTRUE))]
852 "rev<Vesize>\t%0.s, %1/m, %2.s"
855 (define_insn "*aarch64_sve_rev16vnx16qi"
856 [(set (match_operand:VNx16QI 0 "register_operand" "=w")
858 [(match_operand:VNx8BI 1 "register_operand" "Upl")
859 (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
861 UNSPEC_MERGE_PTRUE))]
863 "revb\t%0.h, %1/m, %2.h"
866 (define_insn "@aarch64_sve_rev<mode>"
867 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
868 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")]
871 "rev\t%0.<Vetype>, %1.<Vetype>")
873 (define_insn "*aarch64_sve_dup_lane<mode>"
874 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
875 (vec_duplicate:SVE_ALL
877 (match_operand:SVE_ALL 1 "register_operand" "w")
878 (parallel [(match_operand:SI 2 "const_int_operand")]))))]
880 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
881 "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
884 ;; Note that the immediate (third) operand is the lane index not
886 (define_insn "*aarch64_sve_ext<mode>"
887 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
888 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0")
889 (match_operand:SVE_ALL 2 "register_operand" "w")
890 (match_operand:SI 3 "const_int_operand")]
893 && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
895 operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
896 return "ext\\t%0.b, %0.b, %2.b, #%3";
900 (define_insn "add<mode>3"
901 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w")
903 (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w")
904 (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, w")))]
907 add\t%0.<Vetype>, %0.<Vetype>, #%D2
908 sub\t%0.<Vetype>, %0.<Vetype>, #%N2
909 * return aarch64_output_sve_inc_dec_immediate (\"%0.<Vetype>\", operands[2]);
910 add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
913 (define_insn "sub<mode>3"
914 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
916 (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa")
917 (match_operand:SVE_I 2 "register_operand" "w, 0")))]
920 sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
921 subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
924 ;; Unpredicated multiplication.
925 (define_expand "mul<mode>3"
926 [(set (match_operand:SVE_I 0 "register_operand")
930 (match_operand:SVE_I 1 "register_operand")
931 (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))]
932 UNSPEC_MERGE_PTRUE))]
935 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
939 ;; Multiplication predicated with a PTRUE. We don't actually need the
940 ;; predicate for the first alternative, but using Upa or X isn't likely
941 ;; to gain much and would make the instruction seem less uniform to the
942 ;; register allocator.
943 (define_insn_and_split "*mul<mode>3"
944 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
946 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
948 (match_operand:SVE_I 2 "register_operand" "%0, 0, w")
949 (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w, w"))]
950 UNSPEC_MERGE_PTRUE))]
954 mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
955 movprfx\t%0, %2\;mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
956 ; Split the unpredicated form after reload, so that we don't have
957 ; the unnecessary PTRUE.
959 && !register_operand (operands[3], <MODE>mode)"
960 [(set (match_dup 0) (mult:SVE_I (match_dup 2) (match_dup 3)))]
962 [(set_attr "movprfx" "*,*,yes")]
965 ;; Unpredicated multiplications by a constant (post-RA only).
966 ;; These are generated by splitting a predicated instruction whose
967 ;; predicate is unused.
968 (define_insn "*post_ra_mul<mode>3"
969 [(set (match_operand:SVE_I 0 "register_operand" "=w")
971 (match_operand:SVE_I 1 "register_operand" "0")
972 (match_operand:SVE_I 2 "aarch64_sve_mul_immediate")))]
973 "TARGET_SVE && reload_completed"
974 "mul\t%0.<Vetype>, %0.<Vetype>, #%2"
977 (define_insn "*madd<mode>"
978 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
981 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
982 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
983 (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
985 (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
988 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
989 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
990 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
991 [(set_attr "movprfx" "*,*,yes")]
994 (define_insn "*msub<mode>3"
995 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
997 (match_operand:SVE_I 4 "register_operand" "w, 0, w")
999 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
1000 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
1001 (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
1002 UNSPEC_MERGE_PTRUE)))]
1005 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
1006 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
1007 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
1008 [(set_attr "movprfx" "*,*,yes")]
1011 ;; Unpredicated highpart multiplication.
1012 (define_expand "<su>mul<mode>3_highpart"
1013 [(set (match_operand:SVE_I 0 "register_operand")
1016 (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand")
1017 (match_operand:SVE_I 2 "register_operand")]
1019 UNSPEC_MERGE_PTRUE))]
1022 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
1026 ;; Predicated highpart multiplication.
1027 (define_insn "*<su>mul<mode>3_highpart"
1028 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1030 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1031 (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0, w")
1032 (match_operand:SVE_I 3 "register_operand" "w, w")]
1034 UNSPEC_MERGE_PTRUE))]
1037 <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1038 movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1039 [(set_attr "movprfx" "*,yes")]
1042 ;; Unpredicated division.
1043 (define_expand "<optab><mode>3"
1044 [(set (match_operand:SVE_SDI 0 "register_operand")
1047 (SVE_INT_BINARY_SD:SVE_SDI
1048 (match_operand:SVE_SDI 1 "register_operand")
1049 (match_operand:SVE_SDI 2 "register_operand"))]
1050 UNSPEC_MERGE_PTRUE))]
1053 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
1057 ;; Division predicated with a PTRUE.
1058 (define_insn "*<optab><mode>3"
1059 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w, ?&w")
1061 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
1062 (SVE_INT_BINARY_SD:SVE_SDI
1063 (match_operand:SVE_SDI 2 "register_operand" "0, w, w")
1064 (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0, w"))]
1065 UNSPEC_MERGE_PTRUE))]
1068 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1069 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1070 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1071 [(set_attr "movprfx" "*,*,yes")]
1074 ;; Unpredicated NEG, NOT and POPCOUNT.
1075 (define_expand "<optab><mode>2"
1076 [(set (match_operand:SVE_I 0 "register_operand")
1079 (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))]
1080 UNSPEC_MERGE_PTRUE))]
1083 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1087 ;; NEG, NOT and POPCOUNT predicated with a PTRUE.
1088 (define_insn "*<optab><mode>2"
1089 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1091 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1092 (SVE_INT_UNARY:SVE_I
1093 (match_operand:SVE_I 2 "register_operand" "w"))]
1094 UNSPEC_MERGE_PTRUE))]
1096 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
1099 ;; Vector AND, ORR and XOR.
1100 (define_insn "<optab><mode>3"
1101 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
1103 (match_operand:SVE_I 1 "register_operand" "%0, w")
1104 (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))]
1107 <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
1108 <logical>\t%0.d, %1.d, %2.d"
1111 ;; Vector AND, ORR and XOR on floating-point modes. We avoid subregs
1112 ;; by providing this, but we need to use UNSPECs since rtx logical ops
1113 ;; aren't defined for floating-point modes.
1114 (define_insn "*<optab><mode>3"
1115 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1116 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w")
1117 (match_operand:SVE_F 2 "register_operand" "w")]
1120 "<logicalf_op>\t%0.d, %1.d, %2.d"
1123 ;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate
1124 ;; this pattern even though the NOT instruction itself is predicated.
1125 (define_insn "bic<mode>3"
1126 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1128 (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w"))
1129 (match_operand:SVE_I 2 "register_operand" "w")))]
1131 "bic\t%0.d, %2.d, %1.d"
1134 ;; Predicate AND. We can reuse one of the inputs as the GP.
1135 (define_insn "and<mode>3"
1136 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1137 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
1138 (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
1140 "and\t%0.b, %1/z, %1.b, %2.b"
1143 ;; Unpredicated predicate ORR and XOR.
1144 (define_expand "<optab><mode>3"
1145 [(set (match_operand:PRED_ALL 0 "register_operand")
1147 (LOGICAL_OR:PRED_ALL
1148 (match_operand:PRED_ALL 1 "register_operand")
1149 (match_operand:PRED_ALL 2 "register_operand"))
1153 operands[3] = aarch64_ptrue_reg (<MODE>mode);
1157 ;; Predicated predicate ORR and XOR.
1158 (define_insn "pred_<optab><mode>3"
1159 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1162 (match_operand:PRED_ALL 2 "register_operand" "Upa")
1163 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1164 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1166 "<logical>\t%0.b, %1/z, %2.b, %3.b"
1169 ;; Perform a logical operation on operands 2 and 3, using operand 1 as
1170 ;; the GP (which is known to be a PTRUE). Store the result in operand 0
1171 ;; and set the flags in the same way as for PTEST. The (and ...) in the
1172 ;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested
1173 ;; value is structurally equivalent to rhs of the second set.
1174 (define_insn "*<optab><mode>3_cc"
1175 [(set (reg:CC CC_REGNUM)
1177 (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upa")
1180 (match_operand:PRED_ALL 2 "register_operand" "Upa")
1181 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1185 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1186 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
1189 "<logical>s\t%0.b, %1/z, %2.b, %3.b"
1192 ;; Unpredicated predicate inverse.
1193 (define_expand "one_cmpl<mode>2"
1194 [(set (match_operand:PRED_ALL 0 "register_operand")
1196 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
1200 operands[2] = aarch64_ptrue_reg (<MODE>mode);
1204 ;; Predicated predicate inverse.
1205 (define_insn "*one_cmpl<mode>3"
1206 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1208 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1209 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1211 "not\t%0.b, %1/z, %2.b"
1214 ;; Predicated predicate BIC and ORN.
1215 (define_insn "*<nlogical><mode>3"
1216 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1219 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1220 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1221 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1223 "<nlogical>\t%0.b, %1/z, %3.b, %2.b"
1226 ;; Predicated predicate NAND and NOR.
1227 (define_insn "*<logical_nn><mode>3"
1228 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1231 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1232 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
1233 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1235 "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
1238 ;; Unpredicated LSL, LSR and ASR by a vector.
1239 (define_expand "v<optab><mode>3"
1240 [(set (match_operand:SVE_I 0 "register_operand")
1244 (match_operand:SVE_I 1 "register_operand")
1245 (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
1246 UNSPEC_MERGE_PTRUE))]
1249 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
1253 ;; LSL, LSR and ASR by a vector, predicated with a PTRUE. We don't
1254 ;; actually need the predicate for the first alternative, but using Upa
1255 ;; or X isn't likely to gain much and would make the instruction seem
1256 ;; less uniform to the register allocator.
1257 (define_insn_and_split "*v<optab><mode>3"
1258 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
1260 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
1262 (match_operand:SVE_I 2 "register_operand" "w, 0, w")
1263 (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, w"))]
1264 UNSPEC_MERGE_PTRUE))]
1268 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1269 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1270 "&& reload_completed
1271 && !register_operand (operands[3], <MODE>mode)"
1272 [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
1274 [(set_attr "movprfx" "*,*,yes")]
1277 ;; Unpredicated shift operations by a constant (post-RA only).
1278 ;; These are generated by splitting a predicated instruction whose
1279 ;; predicate is unused.
1280 (define_insn "*post_ra_v<optab><mode>3"
1281 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1283 (match_operand:SVE_I 1 "register_operand" "w")
1284 (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))]
1285 "TARGET_SVE && reload_completed"
1286 "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
1289 ;; LSL, LSR and ASR by a scalar, which expands into one of the vector
1291 (define_expand "<ASHIFT:optab><mode>3"
1292 [(set (match_operand:SVE_I 0 "register_operand")
1293 (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand")
1294 (match_operand:<VEL> 2 "general_operand")))]
1298 if (CONST_INT_P (operands[2]))
1300 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
1301 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
1302 amount = force_reg (<MODE>mode, amount);
1306 amount = gen_reg_rtx (<MODE>mode);
1307 emit_insn (gen_vec_duplicate<mode> (amount,
1308 convert_to_mode (<VEL>mode,
1311 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
1316 ;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE.
1318 ;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP
1319 ;; is a PTRUE even if the optimizers haven't yet been able to propagate
1320 ;; the constant. We would use a separate unspec code for PTESTs involving
1321 ;; GPs that might not be PTRUEs.
1322 (define_insn "ptest_ptrue<mode>"
1323 [(set (reg:CC CC_REGNUM)
1325 (unspec:SI [(match_operand:PRED_ALL 0 "register_operand" "Upa")
1326 (match_operand:PRED_ALL 1 "register_operand" "Upa")]
1333 ;; Set element I of the result if operand1 + J < operand2 for all J in [0, I].
1334 ;; with the comparison being unsigned.
1335 (define_insn "while_ult<GPI:mode><PRED_ALL:mode>"
1336 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1337 (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
1338 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
1340 (clobber (reg:CC CC_REGNUM))]
1342 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
1345 ;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP.
1346 ;; Handle the case in which both results are useful. The GP operand
1347 ;; to the PTEST isn't needed, so we allow it to be anything.
1348 (define_insn_and_rewrite "*while_ult<GPI:mode><PRED_ALL:mode>_cc"
1349 [(set (reg:CC CC_REGNUM)
1351 (unspec:SI [(match_operand:PRED_ALL 1)
1353 [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")
1354 (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")]
1358 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1359 (unspec:PRED_ALL [(match_dup 2)
1363 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3"
1364 ;; Force the compiler to drop the unused predicate operand, so that we
1365 ;; don't have an unnecessary PTRUE.
1366 "&& !CONSTANT_P (operands[1])"
1368 operands[1] = CONSTM1_RTX (<MODE>mode);
1372 ;; Integer comparisons predicated with a PTRUE.
1373 (define_insn "*cmp<cmp_op><mode>"
1374 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1376 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1377 (SVE_INT_CMP:<VPRED>
1378 (match_operand:SVE_I 2 "register_operand" "w, w")
1379 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1380 UNSPEC_MERGE_PTRUE))
1381 (clobber (reg:CC CC_REGNUM))]
1384 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1385 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1388 ;; Integer comparisons predicated with a PTRUE in which only the flags result
1390 (define_insn "*cmp<cmp_op><mode>_ptest"
1391 [(set (reg:CC CC_REGNUM)
1394 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1397 (SVE_INT_CMP:<VPRED>
1398 (match_operand:SVE_I 2 "register_operand" "w, w")
1399 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1400 UNSPEC_MERGE_PTRUE)]
1403 (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
1406 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1407 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1410 ;; Integer comparisons predicated with a PTRUE in which both the flag and
1411 ;; predicate results are interesting.
1412 (define_insn "*cmp<cmp_op><mode>_cc"
1413 [(set (reg:CC CC_REGNUM)
1416 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1419 (SVE_INT_CMP:<VPRED>
1420 (match_operand:SVE_I 2 "register_operand" "w, w")
1421 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1422 UNSPEC_MERGE_PTRUE)]
1425 (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1428 (SVE_INT_CMP:<VPRED>
1431 UNSPEC_MERGE_PTRUE))]
1434 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1435 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1438 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated
1439 ;; comparison with an AND. Split the instruction into its preferred form
1440 ;; (below) at the earliest opportunity, in order to get rid of the
1441 ;; redundant operand 1.
1442 (define_insn_and_split "*pred_cmp<cmp_op><mode>_combine"
1443 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1446 [(match_operand:<VPRED> 1)
1447 (SVE_INT_CMP:<VPRED>
1448 (match_operand:SVE_I 2 "register_operand" "w, w")
1449 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1451 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))
1452 (clobber (reg:CC CC_REGNUM))]
1459 (SVE_INT_CMP:<VPRED>
1463 (clobber (reg:CC CC_REGNUM))])]
1466 ;; Predicated integer comparisons.
1467 (define_insn "*pred_cmp<cmp_op><mode>"
1468 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1470 (SVE_INT_CMP:<VPRED>
1471 (match_operand:SVE_I 2 "register_operand" "w, w")
1472 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))
1473 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
1474 (clobber (reg:CC CC_REGNUM))]
1477 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1478 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1481 ;; Floating-point comparisons predicated with a PTRUE.
1482 (define_insn "*fcm<cmp_op><mode>"
1483 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1485 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1487 (match_operand:SVE_F 2 "register_operand" "w, w")
1488 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
1489 UNSPEC_MERGE_PTRUE))]
1492 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
1493 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1496 (define_insn "*fcmuo<mode>"
1497 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
1499 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1501 (match_operand:SVE_F 2 "register_operand" "w")
1502 (match_operand:SVE_F 3 "register_operand" "w"))]
1503 UNSPEC_MERGE_PTRUE))]
1505 "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1508 ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
1509 ;; with another predicate P. This does not have the same trapping behavior
1510 ;; as predicating the comparison itself on P, but it's a legitimate fold,
1511 ;; since we can drop any potentially-trapping operations whose results
1514 ;; Split the instruction into its preferred form (below) at the earliest
1515 ;; opportunity, in order to get rid of the redundant operand 1.
1516 (define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
1517 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1520 [(match_operand:<VPRED> 1)
1522 (match_operand:SVE_F 2 "register_operand" "w, w")
1523 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
1525 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
1537 (define_insn_and_split "*fcmuo<mode>_and_combine"
1538 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
1541 [(match_operand:<VPRED> 1)
1543 (match_operand:SVE_F 2 "register_operand" "w")
1544 (match_operand:SVE_F 3 "register_operand" "w"))]
1546 (match_operand:<VPRED> 4 "register_operand" "Upl")))]
1558 ;; Unpredicated floating-point comparisons, with the results ANDed
1559 ;; with another predicate. This is a valid fold for the same reasons
1561 (define_insn "*fcm<cmp_op><mode>_and"
1562 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1565 (match_operand:SVE_F 2 "register_operand" "w, w")
1566 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))
1567 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))]
1570 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
1571 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1574 (define_insn "*fcmuo<mode>_and"
1575 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
1578 (match_operand:SVE_F 2 "register_operand" "w")
1579 (match_operand:SVE_F 3 "register_operand" "w"))
1580 (match_operand:<VPRED> 1 "register_operand" "Upl")))]
1582 "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1585 ;; Predicated floating-point comparisons. We don't need a version
1586 ;; of this for unordered comparisons.
1587 (define_insn "*pred_fcm<cmp_op><mode>"
1588 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1590 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1591 (match_operand:SVE_F 2 "register_operand" "w, w")
1592 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
1596 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
1597 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1600 ;; vcond_mask operand order: true, false, mask
1601 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
1602 ;; SEL operand order: mask, true, false
1603 (define_insn "vcond_mask_<mode><vpred>"
1604 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1606 [(match_operand:<VPRED> 3 "register_operand" "Upa")
1607 (match_operand:SVE_ALL 1 "register_operand" "w")
1608 (match_operand:SVE_ALL 2 "register_operand" "w")]
1611 "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>"
1614 ;; Selects between a duplicated immediate and zero.
1615 (define_insn "aarch64_sve_dup<mode>_const"
1616 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1618 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1619 (match_operand:SVE_I 2 "aarch64_sve_dup_immediate")
1620 (match_operand:SVE_I 3 "aarch64_simd_imm_zero")]
1623 "mov\t%0.<Vetype>, %1/z, #%2"
1626 ;; Integer (signed) vcond. Don't enforce an immediate range here, since it
1627 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
1628 (define_expand "vcond<mode><v_int_equiv>"
1629 [(set (match_operand:SVE_ALL 0 "register_operand")
1630 (if_then_else:SVE_ALL
1631 (match_operator 3 "comparison_operator"
1632 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
1633 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
1634 (match_operand:SVE_ALL 1 "register_operand")
1635 (match_operand:SVE_ALL 2 "register_operand")))]
1638 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
1643 ;; Integer vcondu. Don't enforce an immediate range here, since it
1644 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
1645 (define_expand "vcondu<mode><v_int_equiv>"
1646 [(set (match_operand:SVE_ALL 0 "register_operand")
1647 (if_then_else:SVE_ALL
1648 (match_operator 3 "comparison_operator"
1649 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
1650 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
1651 (match_operand:SVE_ALL 1 "register_operand")
1652 (match_operand:SVE_ALL 2 "register_operand")))]
1655 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
1660 ;; Floating-point vcond. All comparisons except FCMUO allow a zero
1661 ;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO
1663 (define_expand "vcond<mode><v_fp_equiv>"
1664 [(set (match_operand:SVE_SD 0 "register_operand")
1665 (if_then_else:SVE_SD
1666 (match_operator 3 "comparison_operator"
1667 [(match_operand:<V_FP_EQUIV> 4 "register_operand")
1668 (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
1669 (match_operand:SVE_SD 1 "register_operand")
1670 (match_operand:SVE_SD 2 "register_operand")))]
1673 aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
1678 ;; Signed integer comparisons. Don't enforce an immediate range here, since
1679 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
1681 (define_expand "vec_cmp<mode><vpred>"
1683 [(set (match_operand:<VPRED> 0 "register_operand")
1684 (match_operator:<VPRED> 1 "comparison_operator"
1685 [(match_operand:SVE_I 2 "register_operand")
1686 (match_operand:SVE_I 3 "nonmemory_operand")]))
1687 (clobber (reg:CC CC_REGNUM))])]
1690 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
1691 operands[2], operands[3]);
1696 ;; Unsigned integer comparisons. Don't enforce an immediate range here, since
1697 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
1699 (define_expand "vec_cmpu<mode><vpred>"
1701 [(set (match_operand:<VPRED> 0 "register_operand")
1702 (match_operator:<VPRED> 1 "comparison_operator"
1703 [(match_operand:SVE_I 2 "register_operand")
1704 (match_operand:SVE_I 3 "nonmemory_operand")]))
1705 (clobber (reg:CC CC_REGNUM))])]
1708 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
1709 operands[2], operands[3]);
1714 ;; Floating-point comparisons. All comparisons except FCMUO allow a zero
1715 ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
1717 (define_expand "vec_cmp<mode><vpred>"
1718 [(set (match_operand:<VPRED> 0 "register_operand")
1719 (match_operator:<VPRED> 1 "comparison_operator"
1720 [(match_operand:SVE_F 2 "register_operand")
1721 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
1724 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
1725 operands[2], operands[3], false);
1730 ;; Branch based on predicate equality or inequality.
1731 (define_expand "cbranch<mode>4"
1734 (match_operator 0 "aarch64_equality_operator"
1735 [(match_operand:PRED_ALL 1 "register_operand")
1736 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
1737 (label_ref (match_operand 3 ""))
1741 rtx ptrue = aarch64_ptrue_reg (<MODE>mode);
1743 if (operands[2] == CONST0_RTX (<MODE>mode))
1747 pred = gen_reg_rtx (<MODE>mode);
1748 emit_insn (gen_pred_xor<mode>3 (pred, ptrue, operands[1],
1751 emit_insn (gen_ptest_ptrue<mode> (ptrue, pred));
1752 operands[1] = gen_rtx_REG (CCmode, CC_REGNUM);
1753 operands[2] = const0_rtx;
1757 ;; Unpredicated integer MIN/MAX.
1758 (define_expand "<su><maxmin><mode>3"
1759 [(set (match_operand:SVE_I 0 "register_operand")
1762 (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand")
1763 (match_operand:SVE_I 2 "register_operand"))]
1764 UNSPEC_MERGE_PTRUE))]
1767 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
1771 ;; Integer MIN/MAX predicated with a PTRUE.
1772 (define_insn "*<su><maxmin><mode>3"
1773 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1775 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1776 (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
1777 (match_operand:SVE_I 3 "register_operand" "w, w"))]
1778 UNSPEC_MERGE_PTRUE))]
1781 <su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1782 movprfx\t%0, %2\;<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1783 [(set_attr "movprfx" "*,yes")]
1786 ;; Unpredicated floating-point MIN/MAX.
1787 (define_expand "<su><maxmin><mode>3"
1788 [(set (match_operand:SVE_F 0 "register_operand")
1791 (FMAXMIN:SVE_F (match_operand:SVE_F 1 "register_operand")
1792 (match_operand:SVE_F 2 "register_operand"))]
1793 UNSPEC_MERGE_PTRUE))]
1796 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
1800 ;; Floating-point MIN/MAX predicated with a PTRUE.
1801 (define_insn "*<su><maxmin><mode>3"
1802 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
1804 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1805 (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0, w")
1806 (match_operand:SVE_F 3 "register_operand" "w, w"))]
1807 UNSPEC_MERGE_PTRUE))]
1810 f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1811 movprfx\t%0, %2\;f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1812 [(set_attr "movprfx" "*,yes")]
1815 ;; Unpredicated fmin/fmax.
1816 (define_expand "<maxmin_uns><mode>3"
1817 [(set (match_operand:SVE_F 0 "register_operand")
1820 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")
1821 (match_operand:SVE_F 2 "register_operand")]
1823 UNSPEC_MERGE_PTRUE))]
1826 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
1830 ;; fmin/fmax predicated with a PTRUE.
1831 (define_insn "*<maxmin_uns><mode>3"
1832 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
1834 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1835 (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0, w")
1836 (match_operand:SVE_F 3 "register_operand" "w, w")]
1838 UNSPEC_MERGE_PTRUE))]
1841 <maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1842 movprfx\t%0, %2\;<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1843 [(set_attr "movprfx" "*,yes")]
1846 ;; Predicated integer operations with select.
1847 (define_expand "cond_<optab><mode>"
1848 [(set (match_operand:SVE_I 0 "register_operand")
1850 [(match_operand:<VPRED> 1 "register_operand")
1851 (SVE_INT_BINARY:SVE_I
1852 (match_operand:SVE_I 2 "register_operand")
1853 (match_operand:SVE_I 3 "register_operand"))
1854 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
1859 (define_expand "cond_<optab><mode>"
1860 [(set (match_operand:SVE_SDI 0 "register_operand")
1862 [(match_operand:<VPRED> 1 "register_operand")
1863 (SVE_INT_BINARY_SD:SVE_SDI
1864 (match_operand:SVE_SDI 2 "register_operand")
1865 (match_operand:SVE_SDI 3 "register_operand"))
1866 (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
1871 ;; Predicated integer operations with select matching the first operand.
1872 (define_insn "*cond_<optab><mode>_2"
1873 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1875 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1876 (SVE_INT_BINARY:SVE_I
1877 (match_operand:SVE_I 2 "register_operand" "0, w")
1878 (match_operand:SVE_I 3 "register_operand" "w, w"))
1883 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1884 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1885 [(set_attr "movprfx" "*,yes")]
1888 (define_insn "*cond_<optab><mode>_2"
1889 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
1891 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1892 (SVE_INT_BINARY_SD:SVE_SDI
1893 (match_operand:SVE_SDI 2 "register_operand" "0, w")
1894 (match_operand:SVE_SDI 3 "register_operand" "w, w"))
1899 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1900 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1901 [(set_attr "movprfx" "*,yes")]
1904 ;; Predicated integer operations with select matching the second operand.
1905 (define_insn "*cond_<optab><mode>_3"
1906 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1908 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1909 (SVE_INT_BINARY:SVE_I
1910 (match_operand:SVE_I 2 "register_operand" "w, w")
1911 (match_operand:SVE_I 3 "register_operand" "0, w"))
1916 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1917 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
1918 [(set_attr "movprfx" "*,yes")]
1921 (define_insn "*cond_<optab><mode>_3"
1922 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
1924 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1925 (SVE_INT_BINARY_SD:SVE_SDI
1926 (match_operand:SVE_SDI 2 "register_operand" "w, w")
1927 (match_operand:SVE_SDI 3 "register_operand" "0, w"))
1932 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1933 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
1934 [(set_attr "movprfx" "*,yes")]
1937 ;; Predicated integer binary operations in which the values of inactive
1938 ;; lanes are distinct from the other inputs.
1939 (define_insn_and_rewrite "*cond_<optab><mode>_any"
1940 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
1942 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
1943 (SVE_INT_BINARY:SVE_I
1944 (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
1945 (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))
1946 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
1949 && !rtx_equal_p (operands[2], operands[4])
1950 && !rtx_equal_p (operands[3], operands[4])"
1952 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1953 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1954 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1955 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1957 "&& reload_completed
1958 && register_operand (operands[4], <MODE>mode)
1959 && !rtx_equal_p (operands[0], operands[4])"
1961 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
1962 operands[4], operands[1]));
1963 operands[4] = operands[2] = operands[0];
1965 [(set_attr "movprfx" "yes")]
1968 (define_insn_and_rewrite "*cond_<optab><mode>_any"
1969 [(set (match_operand:SVE_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
1971 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
1972 (SVE_INT_BINARY_SD:SVE_SDI
1973 (match_operand:SVE_SDI 2 "register_operand" "0, w, w, w, w")
1974 (match_operand:SVE_SDI 3 "register_operand" "w, 0, w, w, w"))
1975 (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
1978 && !rtx_equal_p (operands[2], operands[4])
1979 && !rtx_equal_p (operands[3], operands[4])"
1981 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1982 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1983 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1984 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1986 "&& reload_completed
1987 && register_operand (operands[4], <MODE>mode)
1988 && !rtx_equal_p (operands[0], operands[4])"
1990 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
1991 operands[4], operands[1]));
1992 operands[4] = operands[2] = operands[0];
1994 [(set_attr "movprfx" "yes")]
1997 ;; Set operand 0 to the last active element in operand 3, or to tied
1998 ;; operand 1 if no elements are active.
1999 (define_insn "fold_extract_last_<mode>"
2000 [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
2002 [(match_operand:<VEL> 1 "register_operand" "0, 0")
2003 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
2004 (match_operand:SVE_ALL 3 "register_operand" "w, w")]
2008 clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
2009 clastb\t%<vw>0, %2, %<vw>0, %3.<Vetype>"
2012 ;; Unpredicated integer add reduction.
2013 (define_expand "reduc_plus_scal_<mode>"
2014 [(set (match_operand:<VEL> 0 "register_operand")
2015 (unspec:<VEL> [(match_dup 2)
2016 (match_operand:SVE_I 1 "register_operand")]
2020 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2024 ;; Predicated integer add reduction. The result is always 64-bits.
2025 (define_insn "*reduc_plus_scal_<mode>"
2026 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2027 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2028 (match_operand:SVE_I 2 "register_operand" "w")]
2031 "uaddv\t%d0, %1, %2.<Vetype>"
2034 ;; Unpredicated floating-point add reduction.
2035 (define_expand "reduc_plus_scal_<mode>"
2036 [(set (match_operand:<VEL> 0 "register_operand")
2037 (unspec:<VEL> [(match_dup 2)
2038 (match_operand:SVE_F 1 "register_operand")]
2042 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2046 ;; Predicated floating-point add reduction.
2047 (define_insn "*reduc_plus_scal_<mode>"
2048 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2049 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2050 (match_operand:SVE_F 2 "register_operand" "w")]
2053 "faddv\t%<Vetype>0, %1, %2.<Vetype>"
2056 ;; Unpredicated integer MIN/MAX reduction.
2057 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2058 [(set (match_operand:<VEL> 0 "register_operand")
2059 (unspec:<VEL> [(match_dup 2)
2060 (match_operand:SVE_I 1 "register_operand")]
2064 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2068 ;; Predicated integer MIN/MAX reduction.
2069 (define_insn "*reduc_<maxmin_uns>_scal_<mode>"
2070 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2071 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2072 (match_operand:SVE_I 2 "register_operand" "w")]
2075 "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
2078 ;; Unpredicated floating-point MIN/MAX reduction.
2079 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2080 [(set (match_operand:<VEL> 0 "register_operand")
2081 (unspec:<VEL> [(match_dup 2)
2082 (match_operand:SVE_F 1 "register_operand")]
2086 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2090 ;; Predicated floating-point MIN/MAX reduction.
2091 (define_insn "*reduc_<maxmin_uns>_scal_<mode>"
2092 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2093 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2094 (match_operand:SVE_F 2 "register_operand" "w")]
2097 "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
2100 (define_expand "reduc_<optab>_scal_<mode>"
2101 [(set (match_operand:<VEL> 0 "register_operand")
2102 (unspec:<VEL> [(match_dup 2)
2103 (match_operand:SVE_I 1 "register_operand")]
2107 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2111 (define_insn "*reduc_<optab>_scal_<mode>"
2112 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2113 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2114 (match_operand:SVE_I 2 "register_operand" "w")]
2117 "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>"
2120 ;; Unpredicated in-order FP reductions.
2121 (define_expand "fold_left_plus_<mode>"
2122 [(set (match_operand:<VEL> 0 "register_operand")
2123 (unspec:<VEL> [(match_dup 3)
2124 (match_operand:<VEL> 1 "register_operand")
2125 (match_operand:SVE_F 2 "register_operand")]
2129 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
2133 ;; In-order FP reductions predicated with PTRUE.
2134 (define_insn "mask_fold_left_plus_<mode>"
2135 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2136 (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
2137 (match_operand:<VEL> 1 "register_operand" "0")
2138 (match_operand:SVE_F 2 "register_operand" "w")]
2141 "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
2144 ;; Predicated form of the above in-order reduction.
2145 (define_insn "*pred_fold_left_plus_<mode>"
2146 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2148 [(match_operand:<VEL> 1 "register_operand" "0")
2150 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2151 (match_operand:SVE_F 3 "register_operand" "w")
2152 (match_operand:SVE_F 4 "aarch64_simd_imm_zero")]
2156 "fadda\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
2159 ;; Unpredicated floating-point addition.
2160 (define_expand "add<mode>3"
2161 [(set (match_operand:SVE_F 0 "register_operand")
2165 (match_operand:SVE_F 1 "register_operand")
2166 (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))]
2167 UNSPEC_MERGE_PTRUE))]
2170 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
2174 ;; Floating-point addition predicated with a PTRUE.
2175 (define_insn_and_split "*add<mode>3"
2176 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
2178 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2180 (match_operand:SVE_F 2 "register_operand" "%0, 0, w")
2181 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))]
2182 UNSPEC_MERGE_PTRUE))]
2185 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2186 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
2188 ; Split the unpredicated form after reload, so that we don't have
2189 ; the unnecessary PTRUE.
2190 "&& reload_completed
2191 && register_operand (operands[3], <MODE>mode)"
2192 [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))]
2195 ;; Unpredicated floating-point subtraction.
2196 (define_expand "sub<mode>3"
2197 [(set (match_operand:SVE_F 0 "register_operand")
2201 (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand")
2202 (match_operand:SVE_F 2 "register_operand"))]
2203 UNSPEC_MERGE_PTRUE))]
2206 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
2210 ;; Floating-point subtraction predicated with a PTRUE.
2211 (define_insn_and_split "*sub<mode>3"
2212 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
2214 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
2216 (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w")
2217 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))]
2218 UNSPEC_MERGE_PTRUE))]
2220 && (register_operand (operands[2], <MODE>mode)
2221 || register_operand (operands[3], <MODE>mode))"
2223 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2224 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
2225 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
2227 ; Split the unpredicated form after reload, so that we don't have
2228 ; the unnecessary PTRUE.
2229 "&& reload_completed
2230 && register_operand (operands[2], <MODE>mode)
2231 && register_operand (operands[3], <MODE>mode)"
2232 [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))]
2235 ;; Unpredicated floating-point multiplication.
2236 (define_expand "mul<mode>3"
2237 [(set (match_operand:SVE_F 0 "register_operand")
2241 (match_operand:SVE_F 1 "register_operand")
2242 (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))]
2243 UNSPEC_MERGE_PTRUE))]
2246 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
2250 ;; Floating-point multiplication predicated with a PTRUE.
2251 (define_insn_and_split "*mul<mode>3"
2252 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
2254 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2256 (match_operand:SVE_F 2 "register_operand" "%0, w")
2257 (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))]
2258 UNSPEC_MERGE_PTRUE))]
2261 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2263 ; Split the unpredicated form after reload, so that we don't have
2264 ; the unnecessary PTRUE.
2265 "&& reload_completed
2266 && register_operand (operands[3], <MODE>mode)"
2267 [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))]
2270 ;; Unpredicated floating-point binary operations (post-RA only).
2271 ;; These are generated by splitting a predicated instruction whose
2272 ;; predicate is unused.
2273 (define_insn "*post_ra_<sve_fp_op><mode>3"
2274 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2275 (SVE_UNPRED_FP_BINARY:SVE_F
2276 (match_operand:SVE_F 1 "register_operand" "w")
2277 (match_operand:SVE_F 2 "register_operand" "w")))]
2278 "TARGET_SVE && reload_completed"
2279 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
2281 ;; Unpredicated fma (%0 = (%1 * %2) + %3).
2282 (define_expand "fma<mode>4"
2283 [(set (match_operand:SVE_F 0 "register_operand")
2286 (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
2287 (match_operand:SVE_F 2 "register_operand")
2288 (match_operand:SVE_F 3 "register_operand"))]
2289 UNSPEC_MERGE_PTRUE))]
2292 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
2296 ;; fma predicated with a PTRUE.
2297 (define_insn "*fma<mode>4"
2298 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
2300 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2301 (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
2302 (match_operand:SVE_F 4 "register_operand" "w, w, w")
2303 (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
2304 UNSPEC_MERGE_PTRUE))]
2307 fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
2308 fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2309 movprfx\t%0, %2\;fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2310 [(set_attr "movprfx" "*,*,yes")]
2313 ;; Unpredicated fnma (%0 = (-%1 * %2) + %3).
2314 (define_expand "fnma<mode>4"
2315 [(set (match_operand:SVE_F 0 "register_operand")
2318 (fma:SVE_F (neg:SVE_F
2319 (match_operand:SVE_F 1 "register_operand"))
2320 (match_operand:SVE_F 2 "register_operand")
2321 (match_operand:SVE_F 3 "register_operand"))]
2322 UNSPEC_MERGE_PTRUE))]
2325 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
2329 ;; fnma predicated with a PTRUE.
2330 (define_insn "*fnma<mode>4"
2331 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
2333 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2334 (fma:SVE_F (neg:SVE_F
2335 (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
2336 (match_operand:SVE_F 4 "register_operand" "w, w, w")
2337 (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
2338 UNSPEC_MERGE_PTRUE))]
2341 fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
2342 fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2343 movprfx\t%0, %2\;fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2344 [(set_attr "movprfx" "*,*,yes")]
2347 ;; Unpredicated fms (%0 = (%1 * %2) - %3).
2348 (define_expand "fms<mode>4"
2349 [(set (match_operand:SVE_F 0 "register_operand")
2352 (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
2353 (match_operand:SVE_F 2 "register_operand")
2355 (match_operand:SVE_F 3 "register_operand")))]
2356 UNSPEC_MERGE_PTRUE))]
2359 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
2363 ;; fms predicated with a PTRUE.
2364 (define_insn "*fms<mode>4"
2365 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
2367 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2368 (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
2369 (match_operand:SVE_F 4 "register_operand" "w, w, w")
2371 (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
2372 UNSPEC_MERGE_PTRUE))]
2375 fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
2376 fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2377 movprfx\t%0, %2\;fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2378 [(set_attr "movprfx" "*,*,yes")]
2381 ;; Unpredicated fnms (%0 = (-%1 * %2) - %3).
2382 (define_expand "fnms<mode>4"
2383 [(set (match_operand:SVE_F 0 "register_operand")
2386 (fma:SVE_F (neg:SVE_F
2387 (match_operand:SVE_F 1 "register_operand"))
2388 (match_operand:SVE_F 2 "register_operand")
2390 (match_operand:SVE_F 3 "register_operand")))]
2391 UNSPEC_MERGE_PTRUE))]
2394 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
2398 ;; fnms predicated with a PTRUE.
2399 (define_insn "*fnms<mode>4"
2400 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
2402 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2403 (fma:SVE_F (neg:SVE_F
2404 (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
2405 (match_operand:SVE_F 4 "register_operand" "w, w, w")
2407 (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
2408 UNSPEC_MERGE_PTRUE))]
2411 fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
2412 fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2413 movprfx\t%0, %2\;fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2414 [(set_attr "movprfx" "*,*,yes")]
2417 ;; Unpredicated floating-point division.
2418 (define_expand "div<mode>3"
2419 [(set (match_operand:SVE_F 0 "register_operand")
2422 (div:SVE_F (match_operand:SVE_F 1 "register_operand")
2423 (match_operand:SVE_F 2 "register_operand"))]
2424 UNSPEC_MERGE_PTRUE))]
2427 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
2431 ;; Floating-point division predicated with a PTRUE.
2432 (define_insn "*div<mode>3"
2433 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
2435 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2436 (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w, w")
2437 (match_operand:SVE_F 3 "register_operand" "w, 0, w"))]
2438 UNSPEC_MERGE_PTRUE))]
2441 fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2442 fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2443 movprfx\t%0, %2\;fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2444 [(set_attr "movprfx" "*,*,yes")]
2447 ;; Unpredicated FNEG, FABS and FSQRT.
2448 (define_expand "<optab><mode>2"
2449 [(set (match_operand:SVE_F 0 "register_operand")
2452 (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))]
2453 UNSPEC_MERGE_PTRUE))]
2456 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2460 ;; FNEG, FABS and FSQRT predicated with a PTRUE.
2461 (define_insn "*<optab><mode>2"
2462 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2464 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2465 (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))]
2466 UNSPEC_MERGE_PTRUE))]
2468 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2471 (define_insn "*fabd<mode>3"
2472 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2474 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2477 (match_operand:SVE_F 2 "register_operand" "0")
2478 (match_operand:SVE_F 3 "register_operand" "w")))]
2479 UNSPEC_MERGE_PTRUE))]
2481 "fabd\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
2484 ;; Unpredicated FRINTy.
2485 (define_expand "<frint_pattern><mode>2"
2486 [(set (match_operand:SVE_F 0 "register_operand")
2489 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")]
2491 UNSPEC_MERGE_PTRUE))]
2494 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2498 ;; FRINTy predicated with a PTRUE.
2499 (define_insn "*<frint_pattern><mode>2"
2500 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2502 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2503 (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")]
2505 UNSPEC_MERGE_PTRUE))]
2507 "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2510 ;; Unpredicated conversion of floats to integers of the same size (HF to HI,
2511 ;; SF to SI or DF to DI).
2512 (define_expand "<fix_trunc_optab><mode><v_int_equiv>2"
2513 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2514 (unspec:<V_INT_EQUIV>
2516 (FIXUORS:<V_INT_EQUIV>
2517 (match_operand:SVE_F 1 "register_operand"))]
2518 UNSPEC_MERGE_PTRUE))]
2521 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2525 ;; Conversion of SF to DI, SI or HI, predicated with a PTRUE.
2526 (define_insn "*<fix_trunc_optab>v16hsf<mode>2"
2527 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
2529 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2531 (match_operand:VNx8HF 2 "register_operand" "w"))]
2532 UNSPEC_MERGE_PTRUE))]
2534 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h"
2537 ;; Conversion of SF to DI or SI, predicated with a PTRUE.
2538 (define_insn "*<fix_trunc_optab>vnx4sf<mode>2"
2539 [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
2541 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2543 (match_operand:VNx4SF 2 "register_operand" "w"))]
2544 UNSPEC_MERGE_PTRUE))]
2546 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s"
2549 ;; Conversion of DF to DI or SI, predicated with a PTRUE.
2550 (define_insn "*<fix_trunc_optab>vnx2df<mode>2"
2551 [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
2553 [(match_operand:VNx2BI 1 "register_operand" "Upl")
2555 (match_operand:VNx2DF 2 "register_operand" "w"))]
2556 UNSPEC_MERGE_PTRUE))]
2558 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d"
2561 ;; Unpredicated conversion of integers to floats of the same size
2562 ;; (HI to HF, SI to SF or DI to DF).
2563 (define_expand "<optab><v_int_equiv><mode>2"
2564 [(set (match_operand:SVE_F 0 "register_operand")
2568 (match_operand:<V_INT_EQUIV> 1 "register_operand"))]
2569 UNSPEC_MERGE_PTRUE))]
2572 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2576 ;; Conversion of DI, SI or HI to the same number of HFs, predicated
2578 (define_insn "*<optab><mode>vnx8hf2"
2579 [(set (match_operand:VNx8HF 0 "register_operand" "=w")
2581 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2583 (match_operand:SVE_HSDI 2 "register_operand" "w"))]
2584 UNSPEC_MERGE_PTRUE))]
2586 "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>"
2589 ;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE.
2590 (define_insn "*<optab><mode>vnx4sf2"
2591 [(set (match_operand:VNx4SF 0 "register_operand" "=w")
2593 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2595 (match_operand:SVE_SDI 2 "register_operand" "w"))]
2596 UNSPEC_MERGE_PTRUE))]
2598 "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>"
2601 ;; Conversion of DI or SI to DF, predicated with a PTRUE.
2602 (define_insn "aarch64_sve_<optab><mode>vnx2df2"
2603 [(set (match_operand:VNx2DF 0 "register_operand" "=w")
2605 [(match_operand:VNx2BI 1 "register_operand" "Upl")
2607 (match_operand:SVE_SDI 2 "register_operand" "w"))]
2608 UNSPEC_MERGE_PTRUE))]
2610 "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>"
2613 ;; Conversion of DFs to the same number of SFs, or SFs to the same number
2615 (define_insn "*trunc<Vwide><mode>2"
2616 [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
2618 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
2620 [(match_operand:<VWIDE> 2 "register_operand" "w")]
2621 UNSPEC_FLOAT_CONVERT)]
2622 UNSPEC_MERGE_PTRUE))]
2624 "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>"
2627 ;; Conversion of SFs to the same number of DFs, or HFs to the same number
2629 (define_insn "aarch64_sve_extend<mode><Vwide>2"
2630 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2632 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
2634 [(match_operand:SVE_HSF 2 "register_operand" "w")]
2635 UNSPEC_FLOAT_CONVERT)]
2636 UNSPEC_MERGE_PTRUE))]
2638 "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>"
2641 ;; Unpack the low or high half of a predicate, where "high" refers to
2642 ;; the low-numbered lanes for big-endian and the high-numbered lanes
2643 ;; for little-endian.
2644 (define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
2645 [(match_operand:<VWIDE> 0 "register_operand")
2646 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
2650 emit_insn ((<hi_lanes_optab>
2651 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
2652 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
2653 (operands[0], operands[1]));
2658 ;; PUNPKHI and PUNPKLO.
2659 (define_insn "aarch64_sve_punpk<perm_hilo>_<mode>"
2660 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
2661 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
2664 "punpk<perm_hilo>\t%0.h, %1.b"
2667 ;; Unpack the low or high half of a vector, where "high" refers to
2668 ;; the low-numbered lanes for big-endian and the high-numbered lanes
2669 ;; for little-endian.
2670 (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
2671 [(match_operand:<VWIDE> 0 "register_operand")
2672 (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)]
2675 emit_insn ((<hi_lanes_optab>
2676 ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode>
2677 : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>)
2678 (operands[0], operands[1]));
2683 ;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO.
2684 (define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>"
2685 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2686 (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")]
2689 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
2692 ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
2693 ;; First unpack the source without conversion, then float-convert the
2695 (define_expand "vec_unpacks_<perm_hilo>_<mode>"
2696 [(match_operand:<VWIDE> 0 "register_operand")
2697 (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
2701 /* Use ZIP to do the unpack, since we don't care about the upper halves
2702 and since it has the nice property of not needing any subregs.
2703 If using UUNPK* turns out to be preferable, we could model it as
2704 a ZIP whose first operand is zero. */
2705 rtx temp = gen_reg_rtx (<MODE>mode);
2706 emit_insn ((<hi_lanes_optab>
2707 ? gen_aarch64_sve_zip2<mode>
2708 : gen_aarch64_sve_zip1<mode>)
2709 (temp, operands[1], operands[1]));
2710 rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
2711 emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0],
2717 ;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI
2718 ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
2719 ;; unpacked VNx4SI to VNx2DF.
2720 (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
2721 [(match_operand:VNx2DF 0 "register_operand")
2723 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
2727 /* Use ZIP to do the unpack, since we don't care about the upper halves
2728 and since it has the nice property of not needing any subregs.
2729 If using UUNPK* turns out to be preferable, we could model it as
2730 a ZIP whose first operand is zero. */
2731 rtx temp = gen_reg_rtx (VNx4SImode);
2732 emit_insn ((<hi_lanes_optab>
2733 ? gen_aarch64_sve_zip2vnx4si
2734 : gen_aarch64_sve_zip1vnx4si)
2735 (temp, operands[1], operands[1]));
2736 rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
2737 emit_insn (gen_aarch64_sve_<FLOATUORS:optab>vnx4sivnx2df2 (operands[0],
2743 ;; Predicate pack. Use UZP1 on the narrower type, which discards
2744 ;; the high part of each wide element.
2745 (define_insn "vec_pack_trunc_<Vwide>"
2746 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
2748 [(match_operand:<VWIDE> 1 "register_operand" "Upa")
2749 (match_operand:<VWIDE> 2 "register_operand" "Upa")]
2752 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2755 ;; Integer pack. Use UZP1 on the narrower type, which discards
2756 ;; the high part of each wide element.
2757 (define_insn "vec_pack_trunc_<Vwide>"
2758 [(set (match_operand:SVE_BHSI 0 "register_operand" "=w")
2760 [(match_operand:<VWIDE> 1 "register_operand" "w")
2761 (match_operand:<VWIDE> 2 "register_operand" "w")]
2764 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2767 ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
2768 ;; the results into a single vector.
2769 (define_expand "vec_pack_trunc_<Vwide>"
2773 (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")]
2774 UNSPEC_FLOAT_CONVERT)]
2775 UNSPEC_MERGE_PTRUE))
2779 (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")]
2780 UNSPEC_FLOAT_CONVERT)]
2781 UNSPEC_MERGE_PTRUE))
2782 (set (match_operand:SVE_HSF 0 "register_operand")
2783 (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
2786 operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
2787 operands[4] = gen_reg_rtx (<MODE>mode);
2788 operands[5] = gen_reg_rtx (<MODE>mode);
2792 ;; Convert two vectors of DF to SI and pack the results into a single vector.
2793 (define_expand "vec_pack_<su>fix_trunc_vnx2df"
2797 (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))]
2798 UNSPEC_MERGE_PTRUE))
2802 (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))]
2803 UNSPEC_MERGE_PTRUE))
2804 (set (match_operand:VNx4SI 0 "register_operand")
2805 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
2808 operands[3] = aarch64_ptrue_reg (VNx2BImode);
2809 operands[4] = gen_reg_rtx (VNx4SImode);
2810 operands[5] = gen_reg_rtx (VNx4SImode);
2814 ;; Predicated floating-point operations with select.
2815 (define_expand "cond_<optab><mode>"
2816 [(set (match_operand:SVE_F 0 "register_operand")
2818 [(match_operand:<VPRED> 1 "register_operand")
2820 [(match_operand:SVE_F 2 "register_operand")
2821 (match_operand:SVE_F 3 "register_operand")]
2823 (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
2828 ;; Predicated floating-point operations with select matching first operand.
2829 (define_insn "*cond_<optab><mode>_2"
2830 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
2832 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2834 [(match_operand:SVE_F 2 "register_operand" "0, w")
2835 (match_operand:SVE_F 3 "register_operand" "w, w")]
2841 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2842 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2843 [(set_attr "movprfx" "*,yes")]
2846 ;; Predicated floating-point operations with select matching second operand.
2847 (define_insn "*cond_<optab><mode>_3"
2848 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
2850 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2852 [(match_operand:SVE_F 2 "register_operand" "w, w")
2853 (match_operand:SVE_F 3 "register_operand" "0, w")]
2859 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2860 movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
2861 [(set_attr "movprfx" "*,yes")]
2864 ;; Predicated floating-point binary operations in which the values of
2865 ;; inactive lanes are distinct from the other inputs.
2866 (define_insn_and_rewrite "*cond_<optab><mode>_any"
2867 [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
2869 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
2871 [(match_operand:SVE_F 2 "register_operand" "0, w, w, w, w")
2872 (match_operand:SVE_F 3 "register_operand" "w, 0, w, w, w")]
2874 (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
2877 && !rtx_equal_p (operands[2], operands[4])
2878 && !rtx_equal_p (operands[3], operands[4])"
2880 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2881 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2882 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2883 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2885 "&& reload_completed
2886 && register_operand (operands[4], <MODE>mode)
2887 && !rtx_equal_p (operands[0], operands[4])"
2889 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
2890 operands[4], operands[1]));
2891 operands[4] = operands[2] = operands[0];
2893 [(set_attr "movprfx" "yes")]
2896 ;; Predicated floating-point ternary operations with select.
2897 (define_expand "cond_<optab><mode>"
2898 [(set (match_operand:SVE_F 0 "register_operand")
2900 [(match_operand:<VPRED> 1 "register_operand")
2902 [(match_operand:SVE_F 2 "register_operand")
2903 (match_operand:SVE_F 3 "register_operand")
2904 (match_operand:SVE_F 4 "register_operand")]
2905 SVE_COND_FP_TERNARY)
2906 (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")]
2910 /* Swap the multiplication operands if the fallback value is the
2911 second of the two. */
2912 if (rtx_equal_p (operands[3], operands[5]))
2913 std::swap (operands[2], operands[3]);
2916 ;; Predicated floating-point ternary operations using the FMAD-like form.
2917 (define_insn "*cond_<optab><mode>_2"
2918 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
2920 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2922 [(match_operand:SVE_F 2 "register_operand" "0, w")
2923 (match_operand:SVE_F 3 "register_operand" "w, w")
2924 (match_operand:SVE_F 4 "register_operand" "w, w")]
2925 SVE_COND_FP_TERNARY)
2930 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2931 movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2932 [(set_attr "movprfx" "*,yes")]
2935 ;; Predicated floating-point ternary operations using the FMLA-like form.
2936 (define_insn "*cond_<optab><mode>_4"
2937 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
2939 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2941 [(match_operand:SVE_F 2 "register_operand" "w, w")
2942 (match_operand:SVE_F 3 "register_operand" "w, w")
2943 (match_operand:SVE_F 4 "register_operand" "0, w")]
2944 SVE_COND_FP_TERNARY)
2949 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
2950 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
2951 [(set_attr "movprfx" "*,yes")]
2954 ;; Predicated floating-point ternary operations in which the value for
2955 ;; inactive lanes is distinct from the other inputs.
2956 (define_insn_and_rewrite "*cond_<optab><mode>_any"
2957 [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w")
2959 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2961 [(match_operand:SVE_F 2 "register_operand" "w, w, w")
2962 (match_operand:SVE_F 3 "register_operand" "w, w, w")
2963 (match_operand:SVE_F 4 "register_operand" "w, w, w")]
2964 SVE_COND_FP_TERNARY)
2965 (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
2968 && !rtx_equal_p (operands[2], operands[5])
2969 && !rtx_equal_p (operands[3], operands[5])
2970 && !rtx_equal_p (operands[4], operands[5])"
2972 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
2973 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
2975 "&& reload_completed
2976 && !CONSTANT_P (operands[5])
2977 && !rtx_equal_p (operands[0], operands[5])"
2979 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
2980 operands[5], operands[1]));
2981 operands[5] = operands[4] = operands[0];
2983 [(set_attr "movprfx" "yes")]
2986 ;; Shift an SVE vector left and insert a scalar into element 0.
2987 (define_insn "vec_shl_insert_<mode>"
2988 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
2990 [(match_operand:SVE_ALL 1 "register_operand" "0, 0")
2991 (match_operand:<VEL> 2 "register_operand" "rZ, w")]
2995 insr\t%0.<Vetype>, %<vwcore>2
2996 insr\t%0.<Vetype>, %<Vetype>2"
2999 (define_expand "copysign<mode>3"
3000 [(match_operand:SVE_F 0 "register_operand")
3001 (match_operand:SVE_F 1 "register_operand")
3002 (match_operand:SVE_F 2 "register_operand")]
3005 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
3006 rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
3007 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
3008 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
3010 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
3011 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
3013 emit_insn (gen_and<v_int_equiv>3
3015 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
3018 emit_insn (gen_and<v_int_equiv>3
3020 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
3023 emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
3024 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
3029 (define_expand "xorsign<mode>3"
3030 [(match_operand:SVE_F 0 "register_operand")
3031 (match_operand:SVE_F 1 "register_operand")
3032 (match_operand:SVE_F 2 "register_operand")]
3035 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
3036 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
3037 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
3039 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
3040 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
3042 emit_insn (gen_and<v_int_equiv>3
3044 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
3047 emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
3048 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
3053 ;; Unpredicated DOT product.
3054 (define_insn "<sur>dot_prod<vsi2qi>"
3055 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
3058 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
3059 (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
3061 (match_operand:SVE_SDI 3 "register_operand" "0, w")))]
3064 <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
3065 movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
3066 [(set_attr "movprfx" "*,yes")]
3069 ;; Unpredicated integer absolute difference.
3070 (define_expand "<su>abd<mode>_3"
3071 [(use (match_operand:SVE_I 0 "register_operand"))
3072 (USMAX:SVE_I (match_operand:SVE_I 1 "register_operand")
3073 (match_operand:SVE_I 2 "register_operand"))]
3076 rtx pred = aarch64_ptrue_reg (<VPRED>mode);
3077 emit_insn (gen_aarch64_<su>abd<mode>_3 (operands[0], pred, operands[1],
3083 ;; Predicated integer absolute difference.
3084 (define_insn "aarch64_<su>abd<mode>_3"
3085 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3087 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3090 (match_operand:SVE_I 2 "register_operand" "0, w")
3091 (match_operand:SVE_I 3 "register_operand" "w, w"))
3095 UNSPEC_MERGE_PTRUE))]
3098 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3099 movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3100 [(set_attr "movprfx" "*,yes")]
3103 ;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
3104 ;; operands 1 and 2. The sequence also has to perform a widening reduction of
3105 ;; the difference into a vector and accumulate that into operand 3 before
3106 ;; copying that into the result operand 0.
3107 ;; Perform that with a sequence of:
3109 ;; [SU]ABD diff.b, p0/m, op1.b, op2.b
3110 ;; MOVPRFX op0, op3 // If necessary
3111 ;; UDOT op0.s, diff.b, ones.b
3113 (define_expand "<sur>sad<vsi2qi>"
3114 [(use (match_operand:SVE_SDI 0 "register_operand"))
3115 (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
3116 (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
3117 (use (match_operand:SVE_SDI 3 "register_operand"))]
3120 rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
3121 rtx diff = gen_reg_rtx (<VSI2QI>mode);
3122 emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
3123 emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
3128 ;; Standard pattern name vec_init<mode><Vel>.
3129 (define_expand "vec_init<mode><Vel>"
3130 [(match_operand:SVE_ALL 0 "register_operand" "")
3131 (match_operand 1 "" "")]
3134 aarch64_sve_expand_vector_init (operands[0], operands[1]);