]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64-sve.md
* update-copyright.py: Add Gerard Jungman as external author.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-sve.md
CommitLineData
8fa7f434 1;; Machine description for AArch64 SVE.
2;; Copyright (C) 2009-2016 Free Software Foundation, Inc.
3;; Contributed by ARM Ltd.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3. If not see
19;; <http://www.gnu.org/licenses/>.
20
21;; Note on the handling of big-endian SVE
22;; --------------------------------------
23;;
24;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
25;; same way as movdi or movti would: the first byte of memory goes
26;; into the most significant byte of the register and the last byte
27;; of memory goes into the least significant byte of the register.
28;; This is the most natural ordering for Advanced SIMD and matches
29;; the ABI layout for 64-bit and 128-bit vector types.
30;;
31;; As a result, the order of bytes within the register is what GCC
32;; expects for a big-endian target, and subreg offsets therefore work
33;; as expected, with the first element in memory having subreg offset 0
34;; and the last element in memory having the subreg offset associated
35;; with a big-endian lowpart. However, this ordering also means that
36;; GCC's lane numbering does not match the architecture's numbering:
37;; GCC always treats the element at the lowest address in memory
38;; (subreg offset 0) as element 0, while the architecture treats
39;; the least significant end of the register as element 0.
40;;
41;; The situation for SVE is different. We want the layout of the
42;; SVE register to be same for mov<mode> as it is for maskload<mode>:
43;; logically, a mov<mode> load must be indistinguishable from a
44;; maskload<mode> whose mask is all true. We therefore need the
45;; register layout to match LD1 rather than LDR. The ABI layout of
46;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
47;;
48;; As a result, the architecture lane numbering matches GCC's lane
49;; numbering, with element 0 always being the first in memory.
50;; However:
51;;
52;; - Applying a subreg offset to a register does not give the element
53;; that GCC expects: the first element in memory has the subreg offset
54;; associated with a big-endian lowpart while the last element in memory
55;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
56;;
57;; - We cannot use LDR and STR for spill slots that might be accessed
58;; via subregs, since although the elements have the order GCC expects,
59;; the order of the bytes within the elements is different. We instead
60;; access spill slots via LD1 and ST1, using secondary reloads to
61;; reserve a predicate register.
62
63
64;; SVE data moves.
65(define_expand "mov<mode>"
66 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
67 (match_operand:SVE_ALL 1 "general_operand"))]
68 "TARGET_SVE"
69 {
70 /* Use the predicated load and store patterns where possible.
71 This is required for big-endian targets (see the comment at the
72 head of the file) and increases the addressing choices for
73 little-endian. */
74 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
75 && can_create_pseudo_p ())
76 {
77 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
78 DONE;
79 }
80
81 if (CONSTANT_P (operands[1]))
82 {
83 aarch64_expand_mov_immediate (operands[0], operands[1],
84 gen_vec_duplicate<mode>);
85 DONE;
86 }
70857087 87
88 /* Optimize subregs on big-endian targets: we can use REV[BHW]
89 instead of going through memory. */
90 if (BYTES_BIG_ENDIAN
91 && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
92 DONE;
93 }
94)
95
96;; A pattern for optimizing SUBREGs that have a reinterpreting effect
97;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
98;; for details. We use a special predicate for operand 2 to reduce
99;; the number of patterns.
100(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
101 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
102 (unspec:SVE_ALL
103 [(match_operand:VNx16BI 1 "register_operand" "Upl")
104 (match_operand 2 "aarch64_any_register_operand" "w")]
105 UNSPEC_REV_SUBREG))]
106 "TARGET_SVE && BYTES_BIG_ENDIAN"
107 "#"
108 "&& reload_completed"
109 [(const_int 0)]
110 {
111 aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
112 DONE;
8fa7f434 113 }
114)
115
116;; Unpredicated moves (little-endian). Only allow memory operations
117;; during and after RA; before RA we want the predicated load and
118;; store patterns to be used instead.
119(define_insn "*aarch64_sve_mov<mode>_le"
120 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
121 (match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
122 "TARGET_SVE
123 && !BYTES_BIG_ENDIAN
124 && ((lra_in_progress || reload_completed)
125 || (register_operand (operands[0], <MODE>mode)
126 && nonmemory_operand (operands[1], <MODE>mode)))"
127 "@
128 ldr\t%0, %1
129 str\t%1, %0
130 mov\t%0.d, %1.d
131 * return aarch64_output_sve_mov_immediate (operands[1]);"
132)
133
134;; Unpredicated moves (big-endian). Memory accesses require secondary
135;; reloads.
136(define_insn "*aarch64_sve_mov<mode>_be"
137 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
138 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
139 "TARGET_SVE && BYTES_BIG_ENDIAN"
140 "@
141 mov\t%0.d, %1.d
142 * return aarch64_output_sve_mov_immediate (operands[1]);"
143)
144
145;; Handle big-endian memory reloads. We use byte PTRUE for all modes
146;; to try to encourage reuse.
147(define_expand "aarch64_sve_reload_be"
148 [(parallel
149 [(set (match_operand 0)
150 (match_operand 1))
151 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
152 "TARGET_SVE && BYTES_BIG_ENDIAN"
153 {
154 /* Create a PTRUE. */
155 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
156
157 /* Refer to the PTRUE in the appropriate mode for this move. */
158 machine_mode mode = GET_MODE (operands[0]);
159 machine_mode pred_mode
160 = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require ();
161 rtx pred = gen_lowpart (pred_mode, operands[2]);
162
163 /* Emit a predicated load or store. */
164 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
165 DONE;
166 }
167)
168
169;; A predicated load or store for which the predicate is known to be
170;; all-true. Note that this pattern is generated directly by
171;; aarch64_emit_sve_pred_move, so changes to this pattern will
172;; need changes there as well.
173(define_insn "*pred_mov<mode>"
174 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, m")
175 (unspec:SVE_ALL
176 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
177 (match_operand:SVE_ALL 2 "nonimmediate_operand" "m, w")]
178 UNSPEC_MERGE_PTRUE))]
179 "TARGET_SVE
180 && (register_operand (operands[0], <MODE>mode)
181 || register_operand (operands[2], <MODE>mode))"
182 "@
183 ld1<Vesize>\t%0.<Vetype>, %1/z, %2
184 st1<Vesize>\t%2.<Vetype>, %1, %0"
185)
186
187(define_expand "movmisalign<mode>"
188 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
189 (match_operand:SVE_ALL 1 "general_operand"))]
190 "TARGET_SVE"
191 {
192 /* Equivalent to a normal move for our purpooses. */
193 emit_move_insn (operands[0], operands[1]);
194 DONE;
195 }
196)
197
198(define_insn "maskload<mode><vpred>"
199 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
200 (unspec:SVE_ALL
201 [(match_operand:<VPRED> 2 "register_operand" "Upl")
202 (match_operand:SVE_ALL 1 "memory_operand" "m")]
203 UNSPEC_LD1_SVE))]
204 "TARGET_SVE"
205 "ld1<Vesize>\t%0.<Vetype>, %2/z, %1"
206)
207
208(define_insn "maskstore<mode><vpred>"
209 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
210 (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl")
211 (match_operand:SVE_ALL 1 "register_operand" "w")
212 (match_dup 0)]
213 UNSPEC_ST1_SVE))]
214 "TARGET_SVE"
215 "st1<Vesize>\t%1.<Vetype>, %2, %0"
216)
217
1619606c 218;; Unpredicated gather loads.
219(define_expand "gather_load<mode>"
220 [(set (match_operand:SVE_SD 0 "register_operand")
221 (unspec:SVE_SD
222 [(match_dup 5)
223 (match_operand:DI 1 "aarch64_reg_or_zero")
224 (match_operand:<V_INT_EQUIV> 2 "register_operand")
225 (match_operand:DI 3 "const_int_operand")
226 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
227 (mem:BLK (scratch))]
228 UNSPEC_LD1_GATHER))]
229 "TARGET_SVE"
230 {
231 operands[5] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
232 }
233)
234
235;; Predicated gather loads for 32-bit elements. Operand 3 is true for
236;; unsigned extension and false for signed extension.
237(define_insn "mask_gather_load<mode>"
238 [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w")
239 (unspec:SVE_S
240 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
241 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
242 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w")
243 (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1")
244 (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
245 (mem:BLK (scratch))]
246 UNSPEC_LD1_GATHER))]
247 "TARGET_SVE"
248 "@
249 ld1w\t%0.s, %5/z, [%2.s]
250 ld1w\t%0.s, %5/z, [%1, %2.s, sxtw]
251 ld1w\t%0.s, %5/z, [%1, %2.s, uxtw]
252 ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
253 ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
254)
255
256;; Predicated gather loads for 64-bit elements. The value of operand 3
257;; doesn't matter in this case.
258(define_insn "mask_gather_load<mode>"
259 [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w")
260 (unspec:SVE_D
261 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
262 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk")
263 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w")
264 (match_operand:DI 3 "const_int_operand")
265 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
266 (mem:BLK (scratch))]
267 UNSPEC_LD1_GATHER))]
268 "TARGET_SVE"
269 "@
270 ld1d\t%0.d, %5/z, [%2.d]
271 ld1d\t%0.d, %5/z, [%1, %2.d]
272 ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
273)
274
0bf8b382 275;; Unpredicated scatter store.
276(define_expand "scatter_store<mode>"
277 [(set (mem:BLK (scratch))
278 (unspec:BLK
279 [(match_dup 5)
280 (match_operand:DI 0 "aarch64_reg_or_zero")
281 (match_operand:<V_INT_EQUIV> 1 "register_operand")
282 (match_operand:DI 2 "const_int_operand")
283 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
284 (match_operand:SVE_SD 4 "register_operand")]
285 UNSPEC_ST1_SCATTER))]
286 "TARGET_SVE"
287 {
288 operands[5] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
289 }
290)
291
292;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
293;; unsigned extension and false for signed extension.
294(define_insn "mask_scatter_store<mode>"
295 [(set (mem:BLK (scratch))
296 (unspec:BLK
297 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
298 (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
299 (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w")
300 (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1")
301 (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
302 (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")]
303 UNSPEC_ST1_SCATTER))]
304 "TARGET_SVE"
305 "@
306 st1w\t%4.s, %5, [%1.s]
307 st1w\t%4.s, %5, [%0, %1.s, sxtw]
308 st1w\t%4.s, %5, [%0, %1.s, uxtw]
309 st1w\t%4.s, %5, [%0, %1.s, sxtw %p3]
310 st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]"
311)
312
313;; Predicated scatter stores for 64-bit elements. The value of operand 2
314;; doesn't matter in this case.
315(define_insn "mask_scatter_store<mode>"
316 [(set (mem:BLK (scratch))
317 (unspec:BLK
318 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
319 (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk")
320 (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w")
321 (match_operand:DI 2 "const_int_operand")
322 (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
323 (match_operand:SVE_D 4 "register_operand" "w, w, w")]
324 UNSPEC_ST1_SCATTER))]
325 "TARGET_SVE"
326 "@
327 st1d\t%4.d, %5, [%1.d]
328 st1d\t%4.d, %5, [%0, %1.d]
329 st1d\t%4.d, %5, [%0, %1.d, lsl %p3]"
330)
331
0ac5a51b 332;; SVE structure moves.
333(define_expand "mov<mode>"
334 [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
335 (match_operand:SVE_STRUCT 1 "general_operand"))]
336 "TARGET_SVE"
337 {
338 /* Big-endian loads and stores need to be done via LD1 and ST1;
339 see the comment at the head of the file for details. */
340 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
341 && BYTES_BIG_ENDIAN)
342 {
343 gcc_assert (can_create_pseudo_p ());
344 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
345 DONE;
346 }
347
348 if (CONSTANT_P (operands[1]))
349 {
350 aarch64_expand_mov_immediate (operands[0], operands[1]);
351 DONE;
352 }
353 }
354)
355
356;; Unpredicated structure moves (little-endian).
357(define_insn "*aarch64_sve_mov<mode>_le"
358 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
359 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
360 "TARGET_SVE && !BYTES_BIG_ENDIAN"
361 "#"
362 [(set_attr "length" "<insn_length>")]
363)
364
365;; Unpredicated structure moves (big-endian). Memory accesses require
366;; secondary reloads.
367(define_insn "*aarch64_sve_mov<mode>_le"
368 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
369 (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
370 "TARGET_SVE && BYTES_BIG_ENDIAN"
371 "#"
372 [(set_attr "length" "<insn_length>")]
373)
374
375;; Split unpredicated structure moves into pieces. This is the same
376;; for both big-endian and little-endian code, although it only needs
377;; to handle memory operands for little-endian code.
378(define_split
379 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
380 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
381 "TARGET_SVE && reload_completed"
382 [(const_int 0)]
383 {
384 rtx dest = operands[0];
385 rtx src = operands[1];
386 if (REG_P (dest) && REG_P (src))
387 aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
388 else
389 for (unsigned int i = 0; i < <vector_count>; ++i)
390 {
391 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
392 i * BYTES_PER_SVE_VECTOR);
393 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
394 i * BYTES_PER_SVE_VECTOR);
395 emit_insn (gen_rtx_SET (subdest, subsrc));
396 }
397 DONE;
398 }
399)
400
401;; Predicated structure moves. This works for both endiannesses but in
402;; practice is only useful for big-endian.
403(define_insn_and_split "pred_mov<mode>"
404 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, Utx")
405 (unspec:SVE_STRUCT
406 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
407 (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "Utx, w")]
408 UNSPEC_MERGE_PTRUE))]
409 "TARGET_SVE
410 && (register_operand (operands[0], <MODE>mode)
411 || register_operand (operands[2], <MODE>mode))"
412 "#"
413 "&& reload_completed"
414 [(const_int 0)]
415 {
416 for (unsigned int i = 0; i < <vector_count>; ++i)
417 {
418 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
419 <MODE>mode,
420 i * BYTES_PER_SVE_VECTOR);
421 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
422 <MODE>mode,
423 i * BYTES_PER_SVE_VECTOR);
424 aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
425 }
426 DONE;
427 }
428 [(set_attr "length" "<insn_length>")]
429)
430
8fa7f434 431(define_expand "mov<mode>"
432 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
433 (match_operand:PRED_ALL 1 "general_operand"))]
434 "TARGET_SVE"
435 {
436 if (GET_CODE (operands[0]) == MEM)
437 operands[1] = force_reg (<MODE>mode, operands[1]);
438 }
439)
440
441(define_insn "*aarch64_sve_mov<mode>"
442 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa, Upa")
443 (match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dz, Dm"))]
444 "TARGET_SVE
445 && (register_operand (operands[0], <MODE>mode)
446 || register_operand (operands[1], <MODE>mode))"
447 "@
448 mov\t%0.b, %1.b
449 str\t%1, %0
450 ldr\t%0, %1
451 pfalse\t%0.b
452 * return aarch64_output_ptrue (<MODE>mode, '<Vetype>');"
453)
454
455;; Handle extractions from a predicate by converting to an integer vector
456;; and extracting from there.
457(define_expand "vec_extract<vpred><Vel>"
458 [(match_operand:<VEL> 0 "register_operand")
459 (match_operand:<VPRED> 1 "register_operand")
460 (match_operand:SI 2 "nonmemory_operand")
461 ;; Dummy operand to which we can attach the iterator.
462 (reg:SVE_I V0_REGNUM)]
463 "TARGET_SVE"
464 {
465 rtx tmp = gen_reg_rtx (<MODE>mode);
466 emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1],
467 CONST1_RTX (<MODE>mode),
468 CONST0_RTX (<MODE>mode)));
469 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
470 DONE;
471 }
472)
473
474(define_expand "vec_extract<mode><Vel>"
475 [(set (match_operand:<VEL> 0 "register_operand")
476 (vec_select:<VEL>
477 (match_operand:SVE_ALL 1 "register_operand")
478 (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
479 "TARGET_SVE"
480 {
481 poly_int64 val;
482 if (poly_int_rtx_p (operands[2], &val)
483 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
484 {
485 /* The last element can be extracted with a LASTB and a false
486 predicate. */
487 rtx sel = force_reg (<VPRED>mode, CONST0_RTX (<VPRED>mode));
384eaff1 488 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
8fa7f434 489 DONE;
490 }
491 if (!CONST_INT_P (operands[2]))
492 {
493 /* Create an index with operand[2] as the base and -1 as the step.
494 It will then be zero for the element we care about. */
495 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
496 index = force_reg (<VEL_INT>mode, index);
497 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
498 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
499
500 /* Get a predicate that is true for only that element. */
501 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
502 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
503 rtx sel = gen_reg_rtx (<VPRED>mode);
504 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
505
506 /* Select the element using LASTB. */
384eaff1 507 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
8fa7f434 508 DONE;
509 }
510 }
511)
512
7ec0c7f5 513;; Extract element zero. This is a special case because we want to force
514;; the registers to be the same for the second alternative, and then
515;; split the instruction into nothing after RA.
516(define_insn_and_split "*vec_extract<mode><Vel>_0"
517 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
518 (vec_select:<VEL>
519 (match_operand:SVE_ALL 1 "register_operand" "w, 0, w")
520 (parallel [(const_int 0)])))]
521 "TARGET_SVE"
522 {
523 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
524 switch (which_alternative)
525 {
526 case 0:
527 return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
528 case 1:
529 return "#";
530 case 2:
531 return "st1\\t{%1.<Vetype>}[0], %0";
532 default:
533 gcc_unreachable ();
534 }
535 }
536 "&& reload_completed
537 && REG_P (operands[0])
538 && REGNO (operands[0]) == REGNO (operands[1])"
539 [(const_int 0)]
540 {
541 emit_note (NOTE_INSN_DELETED);
542 DONE;
543 }
544 [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
545)
546
8fa7f434 547;; Extract an element from the Advanced SIMD portion of the register.
548;; We don't just reuse the aarch64-simd.md pattern because we don't
7ec0c7f5 549;; want any change in lane number on big-endian targets.
8fa7f434 550(define_insn "*vec_extract<mode><Vel>_v128"
551 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
552 (vec_select:<VEL>
553 (match_operand:SVE_ALL 1 "register_operand" "w, w, w")
554 (parallel [(match_operand:SI 2 "const_int_operand")])))]
555 "TARGET_SVE
7ec0c7f5 556 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
8fa7f434 557 {
7ec0c7f5 558 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
8fa7f434 559 switch (which_alternative)
560 {
561 case 0:
562 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
563 case 1:
564 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
565 case 2:
566 return "st1\\t{%1.<Vetype>}[%2], %0";
567 default:
568 gcc_unreachable ();
569 }
570 }
571 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
572)
573
574;; Extract an element in the range of DUP. This pattern allows the
575;; source and destination to be different.
576(define_insn "*vec_extract<mode><Vel>_dup"
577 [(set (match_operand:<VEL> 0 "register_operand" "=w")
578 (vec_select:<VEL>
579 (match_operand:SVE_ALL 1 "register_operand" "w")
580 (parallel [(match_operand:SI 2 "const_int_operand")])))]
581 "TARGET_SVE
582 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
583 {
584 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
585 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
586 }
587)
588
589;; Extract an element outside the range of DUP. This pattern requires the
590;; source and destination to be the same.
591(define_insn "*vec_extract<mode><Vel>_ext"
592 [(set (match_operand:<VEL> 0 "register_operand" "=w")
593 (vec_select:<VEL>
594 (match_operand:SVE_ALL 1 "register_operand" "0")
595 (parallel [(match_operand:SI 2 "const_int_operand")])))]
596 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
597 {
598 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
599 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
600 return "ext\t%0.b, %0.b, %0.b, #%2";
601 }
602)
603
604;; Extract the last active element of operand 1 into operand 0.
605;; If no elements are active, extract the last inactive element instead.
384eaff1 606(define_insn "extract_last_<mode>"
8fa7f434 607 [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
608 (unspec:<VEL>
609 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
610 (match_operand:SVE_ALL 2 "register_operand" "w, w")]
611 UNSPEC_LASTB))]
612 "TARGET_SVE"
613 "@
614 lastb\t%<vwcore>0, %1, %2.<Vetype>
615 lastb\t%<Vetype>0, %1, %2.<Vetype>"
616)
617
618(define_expand "vec_duplicate<mode>"
619 [(parallel
620 [(set (match_operand:SVE_ALL 0 "register_operand")
621 (vec_duplicate:SVE_ALL
622 (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
623 (clobber (scratch:<VPRED>))])]
624 "TARGET_SVE"
625 {
626 if (MEM_P (operands[1]))
627 {
628 rtx ptrue = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
629 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
630 CONST0_RTX (<MODE>mode)));
631 DONE;
632 }
633 }
634)
635
636;; Accept memory operands for the benefit of combine, and also in case
637;; the scalar input gets spilled to memory during RA. We want to split
638;; the load at the first opportunity in order to allow the PTRUE to be
639;; optimized with surrounding code.
640(define_insn_and_split "*vec_duplicate<mode>_reg"
641 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
642 (vec_duplicate:SVE_ALL
643 (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
644 (clobber (match_scratch:<VPRED> 2 "=X, X, Upl"))]
645 "TARGET_SVE"
646 "@
647 mov\t%0.<Vetype>, %<vwcore>1
648 mov\t%0.<Vetype>, %<Vetype>1
649 #"
650 "&& MEM_P (operands[1])"
651 [(const_int 0)]
652 {
653 if (GET_CODE (operands[2]) == SCRATCH)
654 operands[2] = gen_reg_rtx (<VPRED>mode);
655 emit_move_insn (operands[2], CONSTM1_RTX (<VPRED>mode));
656 emit_insn (gen_sve_ld1r<mode> (operands[0], operands[2], operands[1],
657 CONST0_RTX (<MODE>mode)));
658 DONE;
659 }
660 [(set_attr "length" "4,4,8")]
661)
662
663;; This is used for vec_duplicate<mode>s from memory, but can also
664;; be used by combine to optimize selects of a a vec_duplicate<mode>
665;; with zero.
666(define_insn "sve_ld1r<mode>"
667 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
668 (unspec:SVE_ALL
669 [(match_operand:<VPRED> 1 "register_operand" "Upl")
670 (vec_duplicate:SVE_ALL
671 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
672 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
673 UNSPEC_SEL))]
674 "TARGET_SVE"
675 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
676)
677
678;; Load 128 bits from memory and duplicate to fill a vector. Since there
679;; are so few operations on 128-bit "elements", we don't define a VNx1TI
680;; and simply use vectors of bytes instead.
4a5920b6 681(define_insn "*sve_ld1rq<Vesize>"
682 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
683 (unspec:SVE_ALL
684 [(match_operand:<VPRED> 1 "register_operand" "Upl")
8fa7f434 685 (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")]
686 UNSPEC_LD1RQ))]
687 "TARGET_SVE"
4a5920b6 688 "ld1rq<Vesize>\t%0.<Vetype>, %1/z, %2"
8fa7f434 689)
690
691;; Implement a predicate broadcast by shifting the low bit of the scalar
692;; input into the top bit and using a WHILELO. An alternative would be to
693;; duplicate the input and do a compare with zero.
694(define_expand "vec_duplicate<mode>"
695 [(set (match_operand:PRED_ALL 0 "register_operand")
696 (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))]
697 "TARGET_SVE"
698 {
699 rtx tmp = gen_reg_rtx (DImode);
700 rtx op1 = gen_lowpart (DImode, operands[1]);
701 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
702 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
703 DONE;
704 }
705)
706
707(define_insn "vec_series<mode>"
708 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
709 (vec_series:SVE_I
710 (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
711 (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
712 "TARGET_SVE"
713 "@
714 index\t%0.<Vetype>, #%1, %<vw>2
715 index\t%0.<Vetype>, %<vw>1, #%2
716 index\t%0.<Vetype>, %<vw>1, %<vw>2"
717)
718
719;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
720;; of an INDEX instruction.
721(define_insn "*vec_series<mode>_plus"
722 [(set (match_operand:SVE_I 0 "register_operand" "=w")
723 (plus:SVE_I
724 (vec_duplicate:SVE_I
725 (match_operand:<VEL> 1 "register_operand" "r"))
726 (match_operand:SVE_I 2 "immediate_operand")))]
727 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
728 {
729 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
730 return "index\t%0.<Vetype>, %<vw>1, #%2";
731 }
732)
733
0ac5a51b 734;; Unpredicated LD[234].
735(define_expand "vec_load_lanes<mode><vsingle>"
736 [(set (match_operand:SVE_STRUCT 0 "register_operand")
737 (unspec:SVE_STRUCT
738 [(match_dup 2)
739 (match_operand:SVE_STRUCT 1 "memory_operand")]
740 UNSPEC_LDN))]
741 "TARGET_SVE"
742 {
743 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
744 }
745)
746
747;; Predicated LD[234].
748(define_insn "vec_mask_load_lanes<mode><vsingle>"
749 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
750 (unspec:SVE_STRUCT
751 [(match_operand:<VPRED> 2 "register_operand" "Upl")
752 (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
753 UNSPEC_LDN))]
754 "TARGET_SVE"
755 "ld<vector_count><Vesize>\t%0, %2/z, %1"
756)
757
758;; Unpredicated ST[234]. This is always a full update, so the dependence
759;; on the old value of the memory location (via (match_dup 0)) is redundant.
760;; There doesn't seem to be any obvious benefit to treating the all-true
761;; case differently though. In particular, it's very unlikely that we'll
762;; only find out during RTL that a store_lanes is dead.
763(define_expand "vec_store_lanes<mode><vsingle>"
764 [(set (match_operand:SVE_STRUCT 0 "memory_operand")
765 (unspec:SVE_STRUCT
766 [(match_dup 2)
767 (match_operand:SVE_STRUCT 1 "register_operand")
768 (match_dup 0)]
769 UNSPEC_STN))]
770 "TARGET_SVE"
771 {
772 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
773 }
774)
775
776;; Predicated ST[234].
777(define_insn "vec_mask_store_lanes<mode><vsingle>"
778 [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
779 (unspec:SVE_STRUCT
780 [(match_operand:<VPRED> 2 "register_operand" "Upl")
781 (match_operand:SVE_STRUCT 1 "register_operand" "w")
782 (match_dup 0)]
783 UNSPEC_STN))]
784 "TARGET_SVE"
785 "st<vector_count><Vesize>\t%1, %2, %0"
786)
787
8fa7f434 788(define_expand "vec_perm<mode>"
789 [(match_operand:SVE_ALL 0 "register_operand")
790 (match_operand:SVE_ALL 1 "register_operand")
791 (match_operand:SVE_ALL 2 "register_operand")
792 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
793 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
794 {
795 aarch64_expand_sve_vec_perm (operands[0], operands[1],
796 operands[2], operands[3]);
797 DONE;
798 }
799)
800
801(define_insn "*aarch64_sve_tbl<mode>"
802 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
803 (unspec:SVE_ALL
804 [(match_operand:SVE_ALL 1 "register_operand" "w")
805 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
806 UNSPEC_TBL))]
807 "TARGET_SVE"
808 "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
809)
810
811(define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
812 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
813 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
814 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
815 PERMUTE))]
816 "TARGET_SVE"
817 "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
818)
819
88e81b08 820(define_insn "aarch64_sve_<perm_insn><perm_hilo><mode>"
8fa7f434 821 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
822 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")
823 (match_operand:SVE_ALL 2 "register_operand" "w")]
824 PERMUTE))]
825 "TARGET_SVE"
826 "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
827)
828
829(define_insn "*aarch64_sve_rev64<mode>"
830 [(set (match_operand:SVE_BHS 0 "register_operand" "=w")
831 (unspec:SVE_BHS
832 [(match_operand:VNx2BI 1 "register_operand" "Upl")
833 (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
834 UNSPEC_REV64)]
835 UNSPEC_MERGE_PTRUE))]
836 "TARGET_SVE"
837 "rev<Vesize>\t%0.d, %1/m, %2.d"
838)
839
840(define_insn "*aarch64_sve_rev32<mode>"
841 [(set (match_operand:SVE_BH 0 "register_operand" "=w")
842 (unspec:SVE_BH
843 [(match_operand:VNx4BI 1 "register_operand" "Upl")
844 (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
845 UNSPEC_REV32)]
846 UNSPEC_MERGE_PTRUE))]
847 "TARGET_SVE"
848 "rev<Vesize>\t%0.s, %1/m, %2.s"
849)
850
851(define_insn "*aarch64_sve_rev16vnx16qi"
852 [(set (match_operand:VNx16QI 0 "register_operand" "=w")
853 (unspec:VNx16QI
854 [(match_operand:VNx8BI 1 "register_operand" "Upl")
855 (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
856 UNSPEC_REV16)]
857 UNSPEC_MERGE_PTRUE))]
858 "TARGET_SVE"
859 "revb\t%0.h, %1/m, %2.h"
860)
861
862(define_insn "*aarch64_sve_rev<mode>"
863 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
864 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")]
865 UNSPEC_REV))]
866 "TARGET_SVE"
867 "rev\t%0.<Vetype>, %1.<Vetype>")
868
869(define_insn "*aarch64_sve_dup_lane<mode>"
870 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
871 (vec_duplicate:SVE_ALL
872 (vec_select:<VEL>
873 (match_operand:SVE_ALL 1 "register_operand" "w")
874 (parallel [(match_operand:SI 2 "const_int_operand")]))))]
875 "TARGET_SVE
876 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
877 "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
878)
879
880;; Note that the immediate (third) operand is the lane index not
881;; the byte index.
882(define_insn "*aarch64_sve_ext<mode>"
883 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
884 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0")
885 (match_operand:SVE_ALL 2 "register_operand" "w")
886 (match_operand:SI 3 "const_int_operand")]
887 UNSPEC_EXT))]
888 "TARGET_SVE
889 && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
890 {
891 operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
892 return "ext\\t%0.b, %0.b, %2.b, #%3";
893 }
894)
895
896(define_insn "add<mode>3"
897 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w")
898 (plus:SVE_I
899 (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w")
900 (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, w")))]
901 "TARGET_SVE"
902 "@
903 add\t%0.<Vetype>, %0.<Vetype>, #%D2
904 sub\t%0.<Vetype>, %0.<Vetype>, #%N2
905 * return aarch64_output_sve_inc_dec_immediate (\"%0.<Vetype>\", operands[2]);
906 add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
907)
908
909(define_insn "sub<mode>3"
910 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
911 (minus:SVE_I
912 (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa")
913 (match_operand:SVE_I 2 "register_operand" "w, 0")))]
914 "TARGET_SVE"
915 "@
916 sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
917 subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
918)
919
920;; Unpredicated multiplication.
921(define_expand "mul<mode>3"
922 [(set (match_operand:SVE_I 0 "register_operand")
923 (unspec:SVE_I
924 [(match_dup 3)
925 (mult:SVE_I
926 (match_operand:SVE_I 1 "register_operand")
927 (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))]
928 UNSPEC_MERGE_PTRUE))]
929 "TARGET_SVE"
930 {
931 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
932 }
933)
934
935;; Multiplication predicated with a PTRUE. We don't actually need the
936;; predicate for the first alternative, but using Upa or X isn't likely
937;; to gain much and would make the instruction seem less uniform to the
938;; register allocator.
0fd5a0b4 939(define_insn_and_split "*mul<mode>3"
83d1ca63 940 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
8fa7f434 941 (unspec:SVE_I
83d1ca63 942 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
8fa7f434 943 (mult:SVE_I
83d1ca63 944 (match_operand:SVE_I 2 "register_operand" "%0, 0, w")
945 (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w, w"))]
8fa7f434 946 UNSPEC_MERGE_PTRUE))]
947 "TARGET_SVE"
948 "@
0fd5a0b4 949 #
83d1ca63 950 mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
951 movprfx\t%0, %2\;mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
0fd5a0b4 952 ; Split the unpredicated form after reload, so that we don't have
953 ; the unnecessary PTRUE.
954 "&& reload_completed
955 && !register_operand (operands[3], <MODE>mode)"
956 [(set (match_dup 0) (mult:SVE_I (match_dup 2) (match_dup 3)))]
957 ""
83d1ca63 958 [(set_attr "movprfx" "*,*,yes")]
8fa7f434 959)
960
0fd5a0b4 961;; Unpredicated multiplications by a constant (post-RA only).
962;; These are generated by splitting a predicated instruction whose
963;; predicate is unused.
964(define_insn "*post_ra_mul<mode>3"
965 [(set (match_operand:SVE_I 0 "register_operand" "=w")
966 (mult:SVE_I
967 (match_operand:SVE_I 1 "register_operand" "0")
968 (match_operand:SVE_I 2 "aarch64_sve_mul_immediate")))]
969 "TARGET_SVE && reload_completed"
970 "mul\t%0.<Vetype>, %0.<Vetype>, #%2"
971)
972
8fa7f434 973(define_insn "*madd<mode>"
83d1ca63 974 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
8fa7f434 975 (plus:SVE_I
976 (unspec:SVE_I
83d1ca63 977 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
978 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
979 (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
8fa7f434 980 UNSPEC_MERGE_PTRUE)
83d1ca63 981 (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
8fa7f434 982 "TARGET_SVE"
983 "@
984 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
83d1ca63 985 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
986 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
987 [(set_attr "movprfx" "*,*,yes")]
8fa7f434 988)
989
990(define_insn "*msub<mode>3"
83d1ca63 991 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
8fa7f434 992 (minus:SVE_I
83d1ca63 993 (match_operand:SVE_I 4 "register_operand" "w, 0, w")
8fa7f434 994 (unspec:SVE_I
83d1ca63 995 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
996 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
997 (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
8fa7f434 998 UNSPEC_MERGE_PTRUE)))]
999 "TARGET_SVE"
1000 "@
1001 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
83d1ca63 1002 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
1003 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
1004 [(set_attr "movprfx" "*,*,yes")]
8fa7f434 1005)
1006
dea784df 1007;; Unpredicated highpart multiplication.
1008(define_expand "<su>mul<mode>3_highpart"
1009 [(set (match_operand:SVE_I 0 "register_operand")
1010 (unspec:SVE_I
1011 [(match_dup 3)
1012 (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand")
1013 (match_operand:SVE_I 2 "register_operand")]
1014 MUL_HIGHPART)]
1015 UNSPEC_MERGE_PTRUE))]
1016 "TARGET_SVE"
1017 {
1018 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1019 }
1020)
1021
1022;; Predicated highpart multiplication.
1023(define_insn "*<su>mul<mode>3_highpart"
83d1ca63 1024 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
dea784df 1025 (unspec:SVE_I
83d1ca63 1026 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1027 (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0, w")
1028 (match_operand:SVE_I 3 "register_operand" "w, w")]
dea784df 1029 MUL_HIGHPART)]
1030 UNSPEC_MERGE_PTRUE))]
1031 "TARGET_SVE"
83d1ca63 1032 "@
1033 <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1034 movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1035 [(set_attr "movprfx" "*,yes")]
dea784df 1036)
1037
0bbf725c 1038;; Unpredicated division.
1039(define_expand "<optab><mode>3"
1040 [(set (match_operand:SVE_SDI 0 "register_operand")
1041 (unspec:SVE_SDI
1042 [(match_dup 3)
1043 (SVE_INT_BINARY_SD:SVE_SDI
1044 (match_operand:SVE_SDI 1 "register_operand")
1045 (match_operand:SVE_SDI 2 "register_operand"))]
1046 UNSPEC_MERGE_PTRUE))]
1047 "TARGET_SVE"
1048 {
1049 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1050 }
1051)
1052
1053;; Division predicated with a PTRUE.
1054(define_insn "*<optab><mode>3"
83d1ca63 1055 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w, ?&w")
0bbf725c 1056 (unspec:SVE_SDI
83d1ca63 1057 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
0bbf725c 1058 (SVE_INT_BINARY_SD:SVE_SDI
83d1ca63 1059 (match_operand:SVE_SDI 2 "register_operand" "0, w, w")
1060 (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0, w"))]
0bbf725c 1061 UNSPEC_MERGE_PTRUE))]
1062 "TARGET_SVE"
1063 "@
1064 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
83d1ca63 1065 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1066 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1067 [(set_attr "movprfx" "*,*,yes")]
0bbf725c 1068)
1069
8fa7f434 1070;; Unpredicated NEG, NOT and POPCOUNT.
1071(define_expand "<optab><mode>2"
1072 [(set (match_operand:SVE_I 0 "register_operand")
1073 (unspec:SVE_I
1074 [(match_dup 2)
1075 (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))]
1076 UNSPEC_MERGE_PTRUE))]
1077 "TARGET_SVE"
1078 {
1079 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1080 }
1081)
1082
1083;; NEG, NOT and POPCOUNT predicated with a PTRUE.
1084(define_insn "*<optab><mode>2"
1085 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1086 (unspec:SVE_I
1087 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1088 (SVE_INT_UNARY:SVE_I
1089 (match_operand:SVE_I 2 "register_operand" "w"))]
1090 UNSPEC_MERGE_PTRUE))]
1091 "TARGET_SVE"
1092 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
1093)
1094
1095;; Vector AND, ORR and XOR.
1096(define_insn "<optab><mode>3"
1097 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
1098 (LOGICAL:SVE_I
1099 (match_operand:SVE_I 1 "register_operand" "%0, w")
1100 (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))]
1101 "TARGET_SVE"
1102 "@
1103 <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
1104 <logical>\t%0.d, %1.d, %2.d"
1105)
1106
1107;; Vector AND, ORR and XOR on floating-point modes. We avoid subregs
1108;; by providing this, but we need to use UNSPECs since rtx logical ops
1109;; aren't defined for floating-point modes.
1110(define_insn "*<optab><mode>3"
1111 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1112 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w")
1113 (match_operand:SVE_F 2 "register_operand" "w")]
1114 LOGICALF))]
1115 "TARGET_SVE"
1116 "<logicalf_op>\t%0.d, %1.d, %2.d"
1117)
1118
1119;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate
1120;; this pattern even though the NOT instruction itself is predicated.
1121(define_insn "bic<mode>3"
1122 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1123 (and:SVE_I
1124 (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w"))
1125 (match_operand:SVE_I 2 "register_operand" "w")))]
1126 "TARGET_SVE"
1127 "bic\t%0.d, %2.d, %1.d"
1128)
1129
1130;; Predicate AND. We can reuse one of the inputs as the GP.
1131(define_insn "and<mode>3"
1132 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1133 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
1134 (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
1135 "TARGET_SVE"
1136 "and\t%0.b, %1/z, %1.b, %2.b"
1137)
1138
1139;; Unpredicated predicate ORR and XOR.
1140(define_expand "<optab><mode>3"
1141 [(set (match_operand:PRED_ALL 0 "register_operand")
1142 (and:PRED_ALL
1143 (LOGICAL_OR:PRED_ALL
1144 (match_operand:PRED_ALL 1 "register_operand")
1145 (match_operand:PRED_ALL 2 "register_operand"))
1146 (match_dup 3)))]
1147 "TARGET_SVE"
1148 {
1149 operands[3] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
1150 }
1151)
1152
1153;; Predicated predicate ORR and XOR.
1154(define_insn "pred_<optab><mode>3"
1155 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1156 (and:PRED_ALL
1157 (LOGICAL:PRED_ALL
1158 (match_operand:PRED_ALL 2 "register_operand" "Upa")
1159 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1160 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1161 "TARGET_SVE"
1162 "<logical>\t%0.b, %1/z, %2.b, %3.b"
1163)
1164
1165;; Perform a logical operation on operands 2 and 3, using operand 1 as
1166;; the GP (which is known to be a PTRUE). Store the result in operand 0
1167;; and set the flags in the same way as for PTEST. The (and ...) in the
1168;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested
1169;; value is structurally equivalent to rhs of the second set.
1170(define_insn "*<optab><mode>3_cc"
1171 [(set (reg:CC CC_REGNUM)
1172 (compare:CC
1173 (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upa")
1174 (and:PRED_ALL
1175 (LOGICAL:PRED_ALL
1176 (match_operand:PRED_ALL 2 "register_operand" "Upa")
1177 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1178 (match_dup 1))]
1179 UNSPEC_PTEST_PTRUE)
1180 (const_int 0)))
1181 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1182 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
1183 (match_dup 1)))]
1184 "TARGET_SVE"
1185 "<logical>s\t%0.b, %1/z, %2.b, %3.b"
1186)
1187
1188;; Unpredicated predicate inverse.
1189(define_expand "one_cmpl<mode>2"
1190 [(set (match_operand:PRED_ALL 0 "register_operand")
1191 (and:PRED_ALL
1192 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
1193 (match_dup 2)))]
1194 "TARGET_SVE"
1195 {
1196 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
1197 }
1198)
1199
1200;; Predicated predicate inverse.
1201(define_insn "*one_cmpl<mode>3"
1202 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1203 (and:PRED_ALL
1204 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1205 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1206 "TARGET_SVE"
1207 "not\t%0.b, %1/z, %2.b"
1208)
1209
1210;; Predicated predicate BIC and ORN.
1211(define_insn "*<nlogical><mode>3"
1212 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1213 (and:PRED_ALL
1214 (NLOGICAL:PRED_ALL
1215 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1216 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1217 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1218 "TARGET_SVE"
1219 "<nlogical>\t%0.b, %1/z, %3.b, %2.b"
1220)
1221
1222;; Predicated predicate NAND and NOR.
1223(define_insn "*<logical_nn><mode>3"
1224 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1225 (and:PRED_ALL
1226 (NLOGICAL:PRED_ALL
1227 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1228 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
1229 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1230 "TARGET_SVE"
1231 "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
1232)
1233
1234;; Unpredicated LSL, LSR and ASR by a vector.
1235(define_expand "v<optab><mode>3"
1236 [(set (match_operand:SVE_I 0 "register_operand")
1237 (unspec:SVE_I
1238 [(match_dup 3)
1239 (ASHIFT:SVE_I
1240 (match_operand:SVE_I 1 "register_operand")
1241 (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
1242 UNSPEC_MERGE_PTRUE))]
1243 "TARGET_SVE"
1244 {
1245 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1246 }
1247)
1248
1249;; LSL, LSR and ASR by a vector, predicated with a PTRUE. We don't
1250;; actually need the predicate for the first alternative, but using Upa
1251;; or X isn't likely to gain much and would make the instruction seem
1252;; less uniform to the register allocator.
0fd5a0b4 1253(define_insn_and_split "*v<optab><mode>3"
83d1ca63 1254 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
8fa7f434 1255 (unspec:SVE_I
83d1ca63 1256 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
8fa7f434 1257 (ASHIFT:SVE_I
83d1ca63 1258 (match_operand:SVE_I 2 "register_operand" "w, 0, w")
1259 (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, w"))]
8fa7f434 1260 UNSPEC_MERGE_PTRUE))]
1261 "TARGET_SVE"
1262 "@
0fd5a0b4 1263 #
83d1ca63 1264 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1265 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
0fd5a0b4 1266 "&& reload_completed
1267 && !register_operand (operands[3], <MODE>mode)"
1268 [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
1269 ""
83d1ca63 1270 [(set_attr "movprfx" "*,*,yes")]
8fa7f434 1271)
1272
0fd5a0b4 1273;; Unpredicated shift operations by a constant (post-RA only).
1274;; These are generated by splitting a predicated instruction whose
1275;; predicate is unused.
1276(define_insn "*post_ra_v<optab><mode>3"
1277 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1278 (ASHIFT:SVE_I
1279 (match_operand:SVE_I 1 "register_operand" "w")
1280 (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))]
1281 "TARGET_SVE && reload_completed"
1282 "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
1283)
1284
8fa7f434 1285;; LSL, LSR and ASR by a scalar, which expands into one of the vector
1286;; shifts above.
1287(define_expand "<ASHIFT:optab><mode>3"
1288 [(set (match_operand:SVE_I 0 "register_operand")
1289 (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand")
1290 (match_operand:<VEL> 2 "general_operand")))]
1291 "TARGET_SVE"
1292 {
1293 rtx amount;
1294 if (CONST_INT_P (operands[2]))
1295 {
1296 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
1297 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
1298 amount = force_reg (<MODE>mode, amount);
1299 }
1300 else
1301 {
1302 amount = gen_reg_rtx (<MODE>mode);
1303 emit_insn (gen_vec_duplicate<mode> (amount,
1304 convert_to_mode (<VEL>mode,
1305 operands[2], 0)));
1306 }
1307 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
1308 DONE;
1309 }
1310)
1311
1312;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE.
1313;;
1314;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP
1315;; is a PTRUE even if the optimizers haven't yet been able to propagate
1316;; the constant. We would use a separate unspec code for PTESTs involving
1317;; GPs that might not be PTRUEs.
1318(define_insn "ptest_ptrue<mode>"
1319 [(set (reg:CC CC_REGNUM)
1320 (compare:CC
1321 (unspec:SI [(match_operand:PRED_ALL 0 "register_operand" "Upa")
1322 (match_operand:PRED_ALL 1 "register_operand" "Upa")]
1323 UNSPEC_PTEST_PTRUE)
1324 (const_int 0)))]
1325 "TARGET_SVE"
1326 "ptest\t%0, %1.b"
1327)
1328
1329;; Set element I of the result if operand1 + J < operand2 for all J in [0, I].
1330;; with the comparison being unsigned.
1331(define_insn "while_ult<GPI:mode><PRED_ALL:mode>"
1332 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1333 (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
1334 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
1335 UNSPEC_WHILE_LO))
1336 (clobber (reg:CC CC_REGNUM))]
1337 "TARGET_SVE"
1338 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
1339)
1340
1341;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP.
1342;; Handle the case in which both results are useful. The GP operand
1343;; to the PTEST isn't needed, so we allow it to be anything.
1344(define_insn_and_split "while_ult<GPI:mode><PRED_ALL:mode>_cc"
1345 [(set (reg:CC CC_REGNUM)
1346 (compare:CC
1347 (unspec:SI [(match_operand:PRED_ALL 1)
1348 (unspec:PRED_ALL
1349 [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")
1350 (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")]
1351 UNSPEC_WHILE_LO)]
1352 UNSPEC_PTEST_PTRUE)
1353 (const_int 0)))
1354 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1355 (unspec:PRED_ALL [(match_dup 2)
1356 (match_dup 3)]
1357 UNSPEC_WHILE_LO))]
1358 "TARGET_SVE"
1359 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3"
1360 ;; Force the compiler to drop the unused predicate operand, so that we
1361 ;; don't have an unnecessary PTRUE.
1362 "&& !CONSTANT_P (operands[1])"
1363 [(const_int 0)]
1364 {
1365 emit_insn (gen_while_ult<GPI:mode><PRED_ALL:mode>_cc
1366 (operands[0], CONSTM1_RTX (<MODE>mode),
1367 operands[2], operands[3]));
1368 DONE;
1369 }
1370)
1371
b293e19f 1372;; Integer comparisons predicated with a PTRUE.
1373(define_insn "*cmp<cmp_op><mode>"
8fa7f434 1374 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1375 (unspec:<VPRED>
1376 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
b293e19f 1377 (SVE_INT_CMP:<VPRED>
1378 (match_operand:SVE_I 2 "register_operand" "w, w")
1379 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1380 UNSPEC_MERGE_PTRUE))
8fa7f434 1381 (clobber (reg:CC CC_REGNUM))]
1382 "TARGET_SVE"
1383 "@
1384 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1385 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1386)
1387
b293e19f 1388;; Integer comparisons predicated with a PTRUE in which only the flags result
1389;; is interesting.
1390(define_insn "*cmp<cmp_op><mode>_ptest"
8fa7f434 1391 [(set (reg:CC CC_REGNUM)
1392 (compare:CC
1393 (unspec:SI
1394 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1395 (unspec:<VPRED>
1396 [(match_dup 1)
b293e19f 1397 (SVE_INT_CMP:<VPRED>
1398 (match_operand:SVE_I 2 "register_operand" "w, w")
1399 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1400 UNSPEC_MERGE_PTRUE)]
8fa7f434 1401 UNSPEC_PTEST_PTRUE)
1402 (const_int 0)))
1403 (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
1404 "TARGET_SVE"
1405 "@
1406 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1407 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1408)
1409
b293e19f 1410;; Integer comparisons predicated with a PTRUE in which both the flag and
1411;; predicate results are interesting.
1412(define_insn "*cmp<cmp_op><mode>_cc"
8fa7f434 1413 [(set (reg:CC CC_REGNUM)
1414 (compare:CC
1415 (unspec:SI
1416 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1417 (unspec:<VPRED>
1418 [(match_dup 1)
b293e19f 1419 (SVE_INT_CMP:<VPRED>
1420 (match_operand:SVE_I 2 "register_operand" "w, w")
1421 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1422 UNSPEC_MERGE_PTRUE)]
8fa7f434 1423 UNSPEC_PTEST_PTRUE)
1424 (const_int 0)))
1425 (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1426 (unspec:<VPRED>
1427 [(match_dup 1)
b293e19f 1428 (SVE_INT_CMP:<VPRED>
1429 (match_dup 2)
1430 (match_dup 3))]
1431 UNSPEC_MERGE_PTRUE))]
8fa7f434 1432 "TARGET_SVE"
1433 "@
1434 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1435 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1436)
1437
30dd727b 1438;; Predicated integer comparisons, formed by combining a PTRUE-predicated
1439;; comparison with an AND. Split the instruction into its preferred form
1440;; (below) at the earliest opportunity, in order to get rid of the
1441;; redundant operand 1.
1442(define_insn_and_split "*pred_cmp<cmp_op><mode>_combine"
1443 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1444 (and:<VPRED>
1445 (unspec:<VPRED>
1446 [(match_operand:<VPRED> 1)
1447 (SVE_INT_CMP:<VPRED>
1448 (match_operand:SVE_I 2 "register_operand" "w, w")
1449 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1450 UNSPEC_MERGE_PTRUE)
1451 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))
1452 (clobber (reg:CC CC_REGNUM))]
1453 "TARGET_SVE"
1454 "#"
1455 "&& 1"
1456 [(parallel
1457 [(set (match_dup 0)
1458 (and:<VPRED>
1459 (SVE_INT_CMP:<VPRED>
1460 (match_dup 2)
1461 (match_dup 3))
1462 (match_dup 4)))
1463 (clobber (reg:CC CC_REGNUM))])]
1464)
1465
1466;; Predicated integer comparisons.
1467(define_insn "*pred_cmp<cmp_op><mode>"
1468 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1469 (and:<VPRED>
1470 (SVE_INT_CMP:<VPRED>
1471 (match_operand:SVE_I 2 "register_operand" "w, w")
1472 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))
1473 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
1474 (clobber (reg:CC CC_REGNUM))]
1475 "TARGET_SVE"
1476 "@
1477 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1478 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1479)
1480
b293e19f 1481;; Floating-point comparisons predicated with a PTRUE.
1482(define_insn "*fcm<cmp_op><mode>"
8fa7f434 1483 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1484 (unspec:<VPRED>
1485 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
b293e19f 1486 (SVE_FP_CMP:<VPRED>
1487 (match_operand:SVE_F 2 "register_operand" "w, w")
1488 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
1489 UNSPEC_MERGE_PTRUE))]
8fa7f434 1490 "TARGET_SVE"
1491 "@
1492 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
1493 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1494)
1495
b293e19f 1496(define_insn "*fcmuo<mode>"
8fa7f434 1497 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
1498 (unspec:<VPRED>
1499 [(match_operand:<VPRED> 1 "register_operand" "Upl")
b293e19f 1500 (unordered:<VPRED>
1501 (match_operand:SVE_F 2 "register_operand" "w")
1502 (match_operand:SVE_F 3 "register_operand" "w"))]
1503 UNSPEC_MERGE_PTRUE))]
8fa7f434 1504 "TARGET_SVE"
1505 "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1506)
1507
30dd727b 1508;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
1509;; with another predicate P. This does not have the same trapping behavior
1510;; as predicating the comparison itself on P, but it's a legitimate fold,
1511;; since we can drop any potentially-trapping operations whose results
1512;; are not needed.
1513;;
1514;; Split the instruction into its preferred form (below) at the earliest
1515;; opportunity, in order to get rid of the redundant operand 1.
1516(define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
1517 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1518 (and:<VPRED>
1519 (unspec:<VPRED>
1520 [(match_operand:<VPRED> 1)
1521 (SVE_FP_CMP
1522 (match_operand:SVE_F 2 "register_operand" "w, w")
1523 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
1524 UNSPEC_MERGE_PTRUE)
1525 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
1526 "TARGET_SVE"
1527 "#"
1528 "&& 1"
1529 [(set (match_dup 0)
1530 (and:<VPRED>
1531 (SVE_FP_CMP:<VPRED>
1532 (match_dup 2)
1533 (match_dup 3))
1534 (match_dup 4)))]
1535)
1536
1537(define_insn_and_split "*fcmuo<mode>_and_combine"
1538 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
1539 (and:<VPRED>
1540 (unspec:<VPRED>
1541 [(match_operand:<VPRED> 1)
1542 (unordered
1543 (match_operand:SVE_F 2 "register_operand" "w")
1544 (match_operand:SVE_F 3 "register_operand" "w"))]
1545 UNSPEC_MERGE_PTRUE)
1546 (match_operand:<VPRED> 4 "register_operand" "Upl")))]
1547 "TARGET_SVE"
1548 "#"
1549 "&& 1"
1550 [(set (match_dup 0)
1551 (and:<VPRED>
1552 (unordered:<VPRED>
1553 (match_dup 2)
1554 (match_dup 3))
1555 (match_dup 4)))]
1556)
1557
1558;; Unpredicated floating-point comparisons, with the results ANDed
1559;; with another predicate. This is a valid fold for the same reasons
1560;; as above.
1561(define_insn "*fcm<cmp_op><mode>_and"
1562 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1563 (and:<VPRED>
1564 (SVE_FP_CMP:<VPRED>
1565 (match_operand:SVE_F 2 "register_operand" "w, w")
1566 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))
1567 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))]
1568 "TARGET_SVE"
1569 "@
1570 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
1571 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1572)
1573
1574(define_insn "*fcmuo<mode>_and"
1575 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
1576 (and:<VPRED>
1577 (unordered:<VPRED>
1578 (match_operand:SVE_F 2 "register_operand" "w")
1579 (match_operand:SVE_F 3 "register_operand" "w"))
1580 (match_operand:<VPRED> 1 "register_operand" "Upl")))]
1581 "TARGET_SVE"
1582 "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1583)
1584
b293e19f 1585;; Predicated floating-point comparisons. We don't need a version
1586;; of this for unordered comparisons.
1587(define_insn "*pred_fcm<cmp_op><mode>"
1588 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1589 (unspec:<VPRED>
1590 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1591 (match_operand:SVE_F 2 "register_operand" "w, w")
1592 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
1593 SVE_COND_FP_CMP))]
1594 "TARGET_SVE"
1595 "@
1596 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
1597 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1598)
1599
8fa7f434 1600;; vcond_mask operand order: true, false, mask
1601;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
1602;; SEL operand order: mask, true, false
1603(define_insn "vcond_mask_<mode><vpred>"
1604 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1605 (unspec:SVE_ALL
1606 [(match_operand:<VPRED> 3 "register_operand" "Upa")
1607 (match_operand:SVE_ALL 1 "register_operand" "w")
1608 (match_operand:SVE_ALL 2 "register_operand" "w")]
1609 UNSPEC_SEL))]
1610 "TARGET_SVE"
1611 "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>"
1612)
1613
1614;; Selects between a duplicated immediate and zero.
1615(define_insn "aarch64_sve_dup<mode>_const"
1616 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1617 (unspec:SVE_I
1618 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1619 (match_operand:SVE_I 2 "aarch64_sve_dup_immediate")
1620 (match_operand:SVE_I 3 "aarch64_simd_imm_zero")]
1621 UNSPEC_SEL))]
1622 "TARGET_SVE"
1623 "mov\t%0.<Vetype>, %1/z, #%2"
1624)
1625
1626;; Integer (signed) vcond. Don't enforce an immediate range here, since it
1627;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
1628(define_expand "vcond<mode><v_int_equiv>"
1629 [(set (match_operand:SVE_ALL 0 "register_operand")
1630 (if_then_else:SVE_ALL
1631 (match_operator 3 "comparison_operator"
1632 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
1633 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
1634 (match_operand:SVE_ALL 1 "register_operand")
1635 (match_operand:SVE_ALL 2 "register_operand")))]
1636 "TARGET_SVE"
1637 {
1638 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
1639 DONE;
1640 }
1641)
1642
1643;; Integer vcondu. Don't enforce an immediate range here, since it
1644;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
1645(define_expand "vcondu<mode><v_int_equiv>"
1646 [(set (match_operand:SVE_ALL 0 "register_operand")
1647 (if_then_else:SVE_ALL
1648 (match_operator 3 "comparison_operator"
1649 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
1650 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
1651 (match_operand:SVE_ALL 1 "register_operand")
1652 (match_operand:SVE_ALL 2 "register_operand")))]
1653 "TARGET_SVE"
1654 {
1655 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
1656 DONE;
1657 }
1658)
1659
1660;; Floating-point vcond. All comparisons except FCMUO allow a zero
1661;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO
1662;; with zero.
1663(define_expand "vcond<mode><v_fp_equiv>"
1664 [(set (match_operand:SVE_SD 0 "register_operand")
1665 (if_then_else:SVE_SD
1666 (match_operator 3 "comparison_operator"
1667 [(match_operand:<V_FP_EQUIV> 4 "register_operand")
1668 (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
1669 (match_operand:SVE_SD 1 "register_operand")
1670 (match_operand:SVE_SD 2 "register_operand")))]
1671 "TARGET_SVE"
1672 {
1673 aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
1674 DONE;
1675 }
1676)
1677
1678;; Signed integer comparisons. Don't enforce an immediate range here, since
1679;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
1680;; instead.
1681(define_expand "vec_cmp<mode><vpred>"
1682 [(parallel
1683 [(set (match_operand:<VPRED> 0 "register_operand")
1684 (match_operator:<VPRED> 1 "comparison_operator"
1685 [(match_operand:SVE_I 2 "register_operand")
1686 (match_operand:SVE_I 3 "nonmemory_operand")]))
1687 (clobber (reg:CC CC_REGNUM))])]
1688 "TARGET_SVE"
1689 {
1690 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
1691 operands[2], operands[3]);
1692 DONE;
1693 }
1694)
1695
1696;; Unsigned integer comparisons. Don't enforce an immediate range here, since
1697;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
1698;; instead.
1699(define_expand "vec_cmpu<mode><vpred>"
1700 [(parallel
1701 [(set (match_operand:<VPRED> 0 "register_operand")
1702 (match_operator:<VPRED> 1 "comparison_operator"
1703 [(match_operand:SVE_I 2 "register_operand")
1704 (match_operand:SVE_I 3 "nonmemory_operand")]))
1705 (clobber (reg:CC CC_REGNUM))])]
1706 "TARGET_SVE"
1707 {
1708 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
1709 operands[2], operands[3]);
1710 DONE;
1711 }
1712)
1713
1714;; Floating-point comparisons. All comparisons except FCMUO allow a zero
1715;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
1716;; with zero.
1717(define_expand "vec_cmp<mode><vpred>"
1718 [(set (match_operand:<VPRED> 0 "register_operand")
1719 (match_operator:<VPRED> 1 "comparison_operator"
1720 [(match_operand:SVE_F 2 "register_operand")
1721 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
1722 "TARGET_SVE"
1723 {
1724 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
1725 operands[2], operands[3], false);
1726 DONE;
1727 }
1728)
1729
1730;; Branch based on predicate equality or inequality.
1731(define_expand "cbranch<mode>4"
1732 [(set (pc)
1733 (if_then_else
1734 (match_operator 0 "aarch64_equality_operator"
1735 [(match_operand:PRED_ALL 1 "register_operand")
1736 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
1737 (label_ref (match_operand 3 ""))
1738 (pc)))]
1739 ""
1740 {
1741 rtx ptrue = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
1742 rtx pred;
1743 if (operands[2] == CONST0_RTX (<MODE>mode))
1744 pred = operands[1];
1745 else
1746 {
1747 pred = gen_reg_rtx (<MODE>mode);
1748 emit_insn (gen_pred_xor<mode>3 (pred, ptrue, operands[1],
1749 operands[2]));
1750 }
1751 emit_insn (gen_ptest_ptrue<mode> (ptrue, pred));
1752 operands[1] = gen_rtx_REG (CCmode, CC_REGNUM);
1753 operands[2] = const0_rtx;
1754 }
1755)
1756
1757;; Unpredicated integer MIN/MAX.
1758(define_expand "<su><maxmin><mode>3"
1759 [(set (match_operand:SVE_I 0 "register_operand")
1760 (unspec:SVE_I
1761 [(match_dup 3)
1762 (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand")
1763 (match_operand:SVE_I 2 "register_operand"))]
1764 UNSPEC_MERGE_PTRUE))]
1765 "TARGET_SVE"
1766 {
1767 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1768 }
1769)
1770
1771;; Integer MIN/MAX predicated with a PTRUE.
1772(define_insn "*<su><maxmin><mode>3"
83d1ca63 1773 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
8fa7f434 1774 (unspec:SVE_I
83d1ca63 1775 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1776 (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
1777 (match_operand:SVE_I 3 "register_operand" "w, w"))]
8fa7f434 1778 UNSPEC_MERGE_PTRUE))]
1779 "TARGET_SVE"
83d1ca63 1780 "@
1781 <su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1782 movprfx\t%0, %2\;<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1783 [(set_attr "movprfx" "*,yes")]
8fa7f434 1784)
1785
1786;; Unpredicated floating-point MIN/MAX.
1787(define_expand "<su><maxmin><mode>3"
1788 [(set (match_operand:SVE_F 0 "register_operand")
1789 (unspec:SVE_F
1790 [(match_dup 3)
1791 (FMAXMIN:SVE_F (match_operand:SVE_F 1 "register_operand")
1792 (match_operand:SVE_F 2 "register_operand"))]
1793 UNSPEC_MERGE_PTRUE))]
1794 "TARGET_SVE"
1795 {
1796 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1797 }
1798)
1799
1800;; Floating-point MIN/MAX predicated with a PTRUE.
1801(define_insn "*<su><maxmin><mode>3"
83d1ca63 1802 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
8fa7f434 1803 (unspec:SVE_F
83d1ca63 1804 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1805 (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0, w")
1806 (match_operand:SVE_F 3 "register_operand" "w, w"))]
8fa7f434 1807 UNSPEC_MERGE_PTRUE))]
1808 "TARGET_SVE"
83d1ca63 1809 "@
1810 f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1811 movprfx\t%0, %2\;f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1812 [(set_attr "movprfx" "*,yes")]
8fa7f434 1813)
1814
1815;; Unpredicated fmin/fmax.
1816(define_expand "<maxmin_uns><mode>3"
1817 [(set (match_operand:SVE_F 0 "register_operand")
1818 (unspec:SVE_F
1819 [(match_dup 3)
1820 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")
1821 (match_operand:SVE_F 2 "register_operand")]
1822 FMAXMIN_UNS)]
1823 UNSPEC_MERGE_PTRUE))]
1824 "TARGET_SVE"
1825 {
1826 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1827 }
1828)
1829
1830;; fmin/fmax predicated with a PTRUE.
1831(define_insn "*<maxmin_uns><mode>3"
83d1ca63 1832 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
8fa7f434 1833 (unspec:SVE_F
83d1ca63 1834 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1835 (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0, w")
1836 (match_operand:SVE_F 3 "register_operand" "w, w")]
8fa7f434 1837 FMAXMIN_UNS)]
1838 UNSPEC_MERGE_PTRUE))]
1839 "TARGET_SVE"
83d1ca63 1840 "@
1841 <maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1842 movprfx\t%0, %2\;<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1843 [(set_attr "movprfx" "*,yes")]
8fa7f434 1844)
1845
47c52435 1846;; Predicated integer operations with select.
1847(define_expand "cond_<optab><mode>"
1848 [(set (match_operand:SVE_I 0 "register_operand")
1849 (unspec:SVE_I
1850 [(match_operand:<VPRED> 1 "register_operand")
1851 (SVE_INT_BINARY:SVE_I
1852 (match_operand:SVE_I 2 "register_operand")
1853 (match_operand:SVE_I 3 "register_operand"))
83d1ca63 1854 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
47c52435 1855 UNSPEC_SEL))]
1856 "TARGET_SVE"
83d1ca63 1857)
47c52435 1858
cdb4d5d0 1859(define_expand "cond_<optab><mode>"
1860 [(set (match_operand:SVE_SDI 0 "register_operand")
1861 (unspec:SVE_SDI
1862 [(match_operand:<VPRED> 1 "register_operand")
1863 (SVE_INT_BINARY_SD:SVE_SDI
1864 (match_operand:SVE_SDI 2 "register_operand")
1865 (match_operand:SVE_SDI 3 "register_operand"))
83d1ca63 1866 (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
cdb4d5d0 1867 UNSPEC_SEL))]
1868 "TARGET_SVE"
83d1ca63 1869)
cdb4d5d0 1870
83d1ca63 1871;; Predicated integer operations with select matching the output operand.
1872(define_insn "*cond_<optab><mode>_0"
1873 [(set (match_operand:SVE_I 0 "register_operand" "+w, w, ?&w")
1874 (unspec:SVE_I
1875 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
1876 (SVE_INT_BINARY:SVE_I
1877 (match_operand:SVE_I 2 "register_operand" "0, w, w")
1878 (match_operand:SVE_I 3 "register_operand" "w, 0, w"))
1879 (match_dup 0)]
1880 UNSPEC_SEL))]
1881 "TARGET_SVE"
1882 "@
1883 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1884 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1885 movprfx\t%0, %1/m, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1886 [(set_attr "movprfx" "*,*,yes")]
1887)
1888
1889(define_insn "*cond_<optab><mode>_0"
1890 [(set (match_operand:SVE_SDI 0 "register_operand" "+w, w, ?&w")
1891 (unspec:SVE_SDI
1892 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
1893 (SVE_INT_BINARY_SD:SVE_SDI
1894 (match_operand:SVE_SDI 2 "register_operand" "0, w, w")
1895 (match_operand:SVE_SDI 3 "register_operand" "w, 0, w"))
1896 (match_dup 0)]
1897 UNSPEC_SEL))]
1898 "TARGET_SVE"
1899 "@
1900 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1901 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1902 movprfx\t%0, %1/m, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1903 [(set_attr "movprfx" "*,*,yes")]
1904)
1905
1906;; Predicated integer operations with select matching the first operand.
1907(define_insn "*cond_<optab><mode>_2"
1908 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1909 (unspec:SVE_I
1910 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1911 (SVE_INT_BINARY:SVE_I
1912 (match_operand:SVE_I 2 "register_operand" "0, w")
1913 (match_operand:SVE_I 3 "register_operand" "w, w"))
1914 (match_dup 2)]
1915 UNSPEC_SEL))]
1916 "TARGET_SVE"
1917 "@
1918 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1919 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1920 [(set_attr "movprfx" "*,yes")]
1921)
1922
1923(define_insn "*cond_<optab><mode>_2"
1924 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
1925 (unspec:SVE_SDI
1926 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1927 (SVE_INT_BINARY_SD:SVE_SDI
1928 (match_operand:SVE_SDI 2 "register_operand" "0, w")
1929 (match_operand:SVE_SDI 3 "register_operand" "w, w"))
1930 (match_dup 2)]
1931 UNSPEC_SEL))]
1932 "TARGET_SVE"
1933 "@
1934 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1935 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1936 [(set_attr "movprfx" "*,yes")]
1937)
1938
1939;; Predicated integer operations with select matching the second operand.
1940(define_insn "*cond_<optab><mode>_3"
1941 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1942 (unspec:SVE_I
1943 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1944 (SVE_INT_BINARY:SVE_I
1945 (match_operand:SVE_I 2 "register_operand" "w, w")
1946 (match_operand:SVE_I 3 "register_operand" "0, w"))
1947 (match_dup 3)]
1948 UNSPEC_SEL))]
1949 "TARGET_SVE"
1950 "@
1951 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1952 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
1953 [(set_attr "movprfx" "*,yes")]
1954)
1955
1956(define_insn "*cond_<optab><mode>_3"
1957 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
1958 (unspec:SVE_SDI
1959 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1960 (SVE_INT_BINARY_SD:SVE_SDI
1961 (match_operand:SVE_SDI 2 "register_operand" "w, w")
1962 (match_operand:SVE_SDI 3 "register_operand" "0, w"))
1963 (match_dup 3)]
1964 UNSPEC_SEL))]
1965 "TARGET_SVE"
1966 "@
1967 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1968 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
1969 [(set_attr "movprfx" "*,yes")]
1970)
1971
1972;; Predicated integer operations with select matching zero.
1973(define_insn "*cond_<optab><mode>_z"
1974 [(set (match_operand:SVE_I 0 "register_operand" "=&w")
88fefa8f 1975 (unspec:SVE_I
1976 [(match_operand:<VPRED> 1 "register_operand" "Upl")
47c52435 1977 (SVE_INT_BINARY:SVE_I
83d1ca63 1978 (match_operand:SVE_I 2 "register_operand" "w")
47c52435 1979 (match_operand:SVE_I 3 "register_operand" "w"))
83d1ca63 1980 (match_operand:SVE_I 4 "aarch64_simd_imm_zero")]
47c52435 1981 UNSPEC_SEL))]
88fefa8f 1982 "TARGET_SVE"
83d1ca63 1983 "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1984 [(set_attr "movprfx" "yes")]
88fefa8f 1985)
1986
83d1ca63 1987(define_insn "*cond_<optab><mode>_z"
1988 [(set (match_operand:SVE_SDI 0 "register_operand" "=&w")
cdb4d5d0 1989 (unspec:SVE_SDI
1990 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1991 (SVE_INT_BINARY_SD:SVE_SDI
83d1ca63 1992 (match_operand:SVE_SDI 2 "register_operand" "w")
cdb4d5d0 1993 (match_operand:SVE_SDI 3 "register_operand" "w"))
83d1ca63 1994 (match_operand:SVE_SDI 4 "aarch64_simd_imm_zero")]
cdb4d5d0 1995 UNSPEC_SEL))]
1996 "TARGET_SVE"
83d1ca63 1997 "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1998 [(set_attr "movprfx" "yes")]
cdb4d5d0 1999)
2000
83d1ca63 2001;; Synthetic predications with select unmatched.
2002(define_insn "*cond_<optab><mode>_any"
2003 [(set (match_operand:SVE_I 0 "register_operand" "=&w")
dbc7e6ae 2004 (unspec:SVE_I
2005 [(match_operand:<VPRED> 1 "register_operand" "Upl")
83d1ca63 2006 (SVE_INT_BINARY:SVE_I
dbc7e6ae 2007 (match_operand:SVE_I 2 "register_operand" "w")
83d1ca63 2008 (match_operand:SVE_I 3 "register_operand" "w"))
2009 (match_operand:SVE_I 4 "register_operand" "w")]
dbc7e6ae 2010 UNSPEC_SEL))]
2011 "TARGET_SVE"
83d1ca63 2012 "#"
dbc7e6ae 2013)
2014
83d1ca63 2015(define_insn "*cond_<optab><mode>_any"
2016 [(set (match_operand:SVE_SDI 0 "register_operand" "=&w")
cdb4d5d0 2017 (unspec:SVE_SDI
2018 [(match_operand:<VPRED> 1 "register_operand" "Upl")
83d1ca63 2019 (SVE_INT_BINARY_SD:SVE_I
cdb4d5d0 2020 (match_operand:SVE_SDI 2 "register_operand" "w")
83d1ca63 2021 (match_operand:SVE_SDI 3 "register_operand" "w"))
2022 (match_operand:SVE_SDI 4 "register_operand" "w")]
cdb4d5d0 2023 UNSPEC_SEL))]
2024 "TARGET_SVE"
83d1ca63 2025 "#"
2026)
2027
2028(define_split
2029 [(set (match_operand:SVE_I 0 "register_operand")
2030 (unspec:SVE_I
2031 [(match_operand:<VPRED> 1 "register_operand")
2032 (match_operator:SVE_I 5 "aarch64_sve_any_binary_operator"
2033 [(match_operand:SVE_I 2 "register_operand")
2034 (match_operand:SVE_I 3 "register_operand")])
2035 (match_operand:SVE_I 4 "register_operand")]
2036 UNSPEC_SEL))]
2037 "TARGET_SVE && reload_completed
2038 && !(rtx_equal_p (operands[0], operands[4])
2039 || rtx_equal_p (operands[2], operands[4])
2040 || rtx_equal_p (operands[3], operands[4]))"
2041 ; Not matchable by any one insn or movprfx insn. We need a separate select.
2042 [(set (match_dup 0)
2043 (unspec:SVE_I [(match_dup 1) (match_dup 2) (match_dup 4)]
2044 UNSPEC_SEL))
2045 (set (match_dup 0)
2046 (unspec:SVE_I
2047 [(match_dup 1)
2048 (match_op_dup 5 [(match_dup 0) (match_dup 3)])
2049 (match_dup 0)]
2050 UNSPEC_SEL))]
cdb4d5d0 2051)
2052
3bf95150 2053;; Set operand 0 to the last active element in operand 3, or to tied
2054;; operand 1 if no elements are active.
2055(define_insn "fold_extract_last_<mode>"
2056 [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
2057 (unspec:<VEL>
2058 [(match_operand:<VEL> 1 "register_operand" "0, 0")
2059 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
2060 (match_operand:SVE_ALL 3 "register_operand" "w, w")]
2061 UNSPEC_CLASTB))]
2062 "TARGET_SVE"
2063 "@
2064 clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
2065 clastb\t%<vw>0, %2, %<vw>0, %3.<Vetype>"
2066)
2067
8fa7f434 2068;; Unpredicated integer add reduction.
2069(define_expand "reduc_plus_scal_<mode>"
2070 [(set (match_operand:<VEL> 0 "register_operand")
2071 (unspec:<VEL> [(match_dup 2)
2072 (match_operand:SVE_I 1 "register_operand")]
2073 UNSPEC_ADDV))]
2074 "TARGET_SVE"
2075 {
2076 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2077 }
2078)
2079
2080;; Predicated integer add reduction. The result is always 64-bits.
2081(define_insn "*reduc_plus_scal_<mode>"
2082 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2083 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2084 (match_operand:SVE_I 2 "register_operand" "w")]
2085 UNSPEC_ADDV))]
2086 "TARGET_SVE"
2087 "uaddv\t%d0, %1, %2.<Vetype>"
2088)
2089
2090;; Unpredicated floating-point add reduction.
2091(define_expand "reduc_plus_scal_<mode>"
2092 [(set (match_operand:<VEL> 0 "register_operand")
2093 (unspec:<VEL> [(match_dup 2)
2094 (match_operand:SVE_F 1 "register_operand")]
2095 UNSPEC_FADDV))]
2096 "TARGET_SVE"
2097 {
2098 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2099 }
2100)
2101
2102;; Predicated floating-point add reduction.
2103(define_insn "*reduc_plus_scal_<mode>"
2104 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2105 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2106 (match_operand:SVE_F 2 "register_operand" "w")]
2107 UNSPEC_FADDV))]
2108 "TARGET_SVE"
2109 "faddv\t%<Vetype>0, %1, %2.<Vetype>"
2110)
2111
2112;; Unpredicated integer MIN/MAX reduction.
2113(define_expand "reduc_<maxmin_uns>_scal_<mode>"
2114 [(set (match_operand:<VEL> 0 "register_operand")
2115 (unspec:<VEL> [(match_dup 2)
2116 (match_operand:SVE_I 1 "register_operand")]
2117 MAXMINV))]
2118 "TARGET_SVE"
2119 {
2120 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2121 }
2122)
2123
2124;; Predicated integer MIN/MAX reduction.
2125(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
2126 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2127 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2128 (match_operand:SVE_I 2 "register_operand" "w")]
2129 MAXMINV))]
2130 "TARGET_SVE"
2131 "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
2132)
2133
2134;; Unpredicated floating-point MIN/MAX reduction.
2135(define_expand "reduc_<maxmin_uns>_scal_<mode>"
2136 [(set (match_operand:<VEL> 0 "register_operand")
2137 (unspec:<VEL> [(match_dup 2)
2138 (match_operand:SVE_F 1 "register_operand")]
2139 FMAXMINV))]
2140 "TARGET_SVE"
2141 {
2142 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2143 }
2144)
2145
2146;; Predicated floating-point MIN/MAX reduction.
2147(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
2148 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2149 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2150 (match_operand:SVE_F 2 "register_operand" "w")]
2151 FMAXMINV))]
2152 "TARGET_SVE"
2153 "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
2154)
2155
216934f9 2156(define_expand "reduc_<optab>_scal_<mode>"
2157 [(set (match_operand:<VEL> 0 "register_operand")
2158 (unspec:<VEL> [(match_dup 2)
2159 (match_operand:SVE_I 1 "register_operand")]
2160 BITWISEV))]
2161 "TARGET_SVE"
2162 {
2163 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2164 }
2165)
2166
2167(define_insn "*reduc_<optab>_scal_<mode>"
2168 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2169 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2170 (match_operand:SVE_I 2 "register_operand" "w")]
2171 BITWISEV))]
2172 "TARGET_SVE"
2173 "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>"
2174)
2175
d77809a4 2176;; Unpredicated in-order FP reductions.
2177(define_expand "fold_left_plus_<mode>"
2178 [(set (match_operand:<VEL> 0 "register_operand")
2179 (unspec:<VEL> [(match_dup 3)
2180 (match_operand:<VEL> 1 "register_operand")
2181 (match_operand:SVE_F 2 "register_operand")]
2182 UNSPEC_FADDA))]
2183 "TARGET_SVE"
2184 {
2185 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2186 }
2187)
2188
2189;; In-order FP reductions predicated with PTRUE.
2190(define_insn "*fold_left_plus_<mode>"
2191 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2192 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2193 (match_operand:<VEL> 2 "register_operand" "0")
2194 (match_operand:SVE_F 3 "register_operand" "w")]
2195 UNSPEC_FADDA))]
2196 "TARGET_SVE"
2197 "fadda\t%<Vetype>0, %1, %<Vetype>0, %3.<Vetype>"
2198)
2199
2200;; Predicated form of the above in-order reduction.
2201(define_insn "*pred_fold_left_plus_<mode>"
2202 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2203 (unspec:<VEL>
2204 [(match_operand:<VEL> 1 "register_operand" "0")
2205 (unspec:SVE_F
2206 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2207 (match_operand:SVE_F 3 "register_operand" "w")
2208 (match_operand:SVE_F 4 "aarch64_simd_imm_zero")]
2209 UNSPEC_SEL)]
2210 UNSPEC_FADDA))]
2211 "TARGET_SVE"
2212 "fadda\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
2213)
2214
8fa7f434 2215;; Unpredicated floating-point addition.
2216(define_expand "add<mode>3"
2217 [(set (match_operand:SVE_F 0 "register_operand")
2218 (unspec:SVE_F
2219 [(match_dup 3)
2220 (plus:SVE_F
2221 (match_operand:SVE_F 1 "register_operand")
2222 (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))]
2223 UNSPEC_MERGE_PTRUE))]
2224 "TARGET_SVE"
2225 {
2226 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2227 }
2228)
2229
2230;; Floating-point addition predicated with a PTRUE.
9e089468 2231(define_insn_and_split "*add<mode>3"
8fa7f434 2232 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
2233 (unspec:SVE_F
2234 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2235 (plus:SVE_F
2236 (match_operand:SVE_F 2 "register_operand" "%0, 0, w")
2237 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))]
2238 UNSPEC_MERGE_PTRUE))]
2239 "TARGET_SVE"
2240 "@
2241 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2242 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
9e089468 2243 #"
2244 ; Split the unpredicated form after reload, so that we don't have
2245 ; the unnecessary PTRUE.
2246 "&& reload_completed
2247 && register_operand (operands[3], <MODE>mode)"
2248 [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))]
8fa7f434 2249)
2250
2251;; Unpredicated floating-point subtraction.
2252(define_expand "sub<mode>3"
2253 [(set (match_operand:SVE_F 0 "register_operand")
2254 (unspec:SVE_F
2255 [(match_dup 3)
2256 (minus:SVE_F
2257 (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand")
2258 (match_operand:SVE_F 2 "register_operand"))]
2259 UNSPEC_MERGE_PTRUE))]
2260 "TARGET_SVE"
2261 {
2262 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2263 }
2264)
2265
2266;; Floating-point subtraction predicated with a PTRUE.
9e089468 2267(define_insn_and_split "*sub<mode>3"
8fa7f434 2268 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
2269 (unspec:SVE_F
2270 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
2271 (minus:SVE_F
2272 (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w")
2273 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))]
2274 UNSPEC_MERGE_PTRUE))]
2275 "TARGET_SVE
2276 && (register_operand (operands[2], <MODE>mode)
2277 || register_operand (operands[3], <MODE>mode))"
2278 "@
2279 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2280 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
2281 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
9e089468 2282 #"
2283 ; Split the unpredicated form after reload, so that we don't have
2284 ; the unnecessary PTRUE.
2285 "&& reload_completed
2286 && register_operand (operands[2], <MODE>mode)
2287 && register_operand (operands[3], <MODE>mode)"
2288 [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))]
8fa7f434 2289)
2290
2291;; Unpredicated floating-point multiplication.
2292(define_expand "mul<mode>3"
2293 [(set (match_operand:SVE_F 0 "register_operand")
2294 (unspec:SVE_F
2295 [(match_dup 3)
2296 (mult:SVE_F
2297 (match_operand:SVE_F 1 "register_operand")
2298 (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))]
2299 UNSPEC_MERGE_PTRUE))]
2300 "TARGET_SVE"
2301 {
2302 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2303 }
2304)
2305
2306;; Floating-point multiplication predicated with a PTRUE.
9e089468 2307(define_insn_and_split "*mul<mode>3"
8fa7f434 2308 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
2309 (unspec:SVE_F
2310 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2311 (mult:SVE_F
2312 (match_operand:SVE_F 2 "register_operand" "%0, w")
2313 (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))]
2314 UNSPEC_MERGE_PTRUE))]
2315 "TARGET_SVE"
2316 "@
2317 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
9e089468 2318 #"
2319 ; Split the unpredicated form after reload, so that we don't have
2320 ; the unnecessary PTRUE.
2321 "&& reload_completed
2322 && register_operand (operands[3], <MODE>mode)"
2323 [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))]
8fa7f434 2324)
2325
9e089468 2326;; Unpredicated floating-point binary operations (post-RA only).
2327;; These are generated by splitting a predicated instruction whose
2328;; predicate is unused.
2329(define_insn "*post_ra_<sve_fp_op><mode>3"
2330 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2331 (SVE_UNPRED_FP_BINARY:SVE_F
2332 (match_operand:SVE_F 1 "register_operand" "w")
2333 (match_operand:SVE_F 2 "register_operand" "w")))]
2334 "TARGET_SVE && reload_completed"
2335 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
2336
8fa7f434 2337;; Unpredicated fma (%0 = (%1 * %2) + %3).
2338(define_expand "fma<mode>4"
2339 [(set (match_operand:SVE_F 0 "register_operand")
2340 (unspec:SVE_F
2341 [(match_dup 4)
2342 (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
2343 (match_operand:SVE_F 2 "register_operand")
2344 (match_operand:SVE_F 3 "register_operand"))]
2345 UNSPEC_MERGE_PTRUE))]
2346 "TARGET_SVE"
2347 {
2348 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2349 }
2350)
2351
2352;; fma predicated with a PTRUE.
2353(define_insn "*fma<mode>4"
83d1ca63 2354 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
8fa7f434 2355 (unspec:SVE_F
83d1ca63 2356 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2357 (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
2358 (match_operand:SVE_F 4 "register_operand" "w, w, w")
2359 (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
8fa7f434 2360 UNSPEC_MERGE_PTRUE))]
2361 "TARGET_SVE"
2362 "@
2363 fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
83d1ca63 2364 fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2365 movprfx\t%0, %2\;fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2366 [(set_attr "movprfx" "*,*,yes")]
8fa7f434 2367)
2368
2369;; Unpredicated fnma (%0 = (-%1 * %2) + %3).
2370(define_expand "fnma<mode>4"
2371 [(set (match_operand:SVE_F 0 "register_operand")
2372 (unspec:SVE_F
2373 [(match_dup 4)
2374 (fma:SVE_F (neg:SVE_F
2375 (match_operand:SVE_F 1 "register_operand"))
2376 (match_operand:SVE_F 2 "register_operand")
2377 (match_operand:SVE_F 3 "register_operand"))]
2378 UNSPEC_MERGE_PTRUE))]
2379 "TARGET_SVE"
2380 {
2381 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2382 }
2383)
2384
2385;; fnma predicated with a PTRUE.
2386(define_insn "*fnma<mode>4"
83d1ca63 2387 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
8fa7f434 2388 (unspec:SVE_F
83d1ca63 2389 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
8fa7f434 2390 (fma:SVE_F (neg:SVE_F
83d1ca63 2391 (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
2392 (match_operand:SVE_F 4 "register_operand" "w, w, w")
2393 (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
8fa7f434 2394 UNSPEC_MERGE_PTRUE))]
2395 "TARGET_SVE"
2396 "@
2397 fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
83d1ca63 2398 fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2399 movprfx\t%0, %2\;fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2400 [(set_attr "movprfx" "*,*,yes")]
8fa7f434 2401)
2402
2403;; Unpredicated fms (%0 = (%1 * %2) - %3).
2404(define_expand "fms<mode>4"
2405 [(set (match_operand:SVE_F 0 "register_operand")
2406 (unspec:SVE_F
2407 [(match_dup 4)
2408 (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
2409 (match_operand:SVE_F 2 "register_operand")
2410 (neg:SVE_F
2411 (match_operand:SVE_F 3 "register_operand")))]
2412 UNSPEC_MERGE_PTRUE))]
2413 "TARGET_SVE"
2414 {
2415 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2416 }
2417)
2418
2419;; fms predicated with a PTRUE.
2420(define_insn "*fms<mode>4"
83d1ca63 2421 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
8fa7f434 2422 (unspec:SVE_F
83d1ca63 2423 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2424 (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
2425 (match_operand:SVE_F 4 "register_operand" "w, w, w")
8fa7f434 2426 (neg:SVE_F
83d1ca63 2427 (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
8fa7f434 2428 UNSPEC_MERGE_PTRUE))]
2429 "TARGET_SVE"
2430 "@
2431 fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
83d1ca63 2432 fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2433 movprfx\t%0, %2\;fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2434 [(set_attr "movprfx" "*,*,yes")]
8fa7f434 2435)
2436
2437;; Unpredicated fnms (%0 = (-%1 * %2) - %3).
2438(define_expand "fnms<mode>4"
2439 [(set (match_operand:SVE_F 0 "register_operand")
2440 (unspec:SVE_F
2441 [(match_dup 4)
2442 (fma:SVE_F (neg:SVE_F
2443 (match_operand:SVE_F 1 "register_operand"))
2444 (match_operand:SVE_F 2 "register_operand")
2445 (neg:SVE_F
2446 (match_operand:SVE_F 3 "register_operand")))]
2447 UNSPEC_MERGE_PTRUE))]
2448 "TARGET_SVE"
2449 {
2450 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2451 }
2452)
2453
2454;; fnms predicated with a PTRUE.
2455(define_insn "*fnms<mode>4"
83d1ca63 2456 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
8fa7f434 2457 (unspec:SVE_F
83d1ca63 2458 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
8fa7f434 2459 (fma:SVE_F (neg:SVE_F
83d1ca63 2460 (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
2461 (match_operand:SVE_F 4 "register_operand" "w, w, w")
8fa7f434 2462 (neg:SVE_F
83d1ca63 2463 (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
8fa7f434 2464 UNSPEC_MERGE_PTRUE))]
2465 "TARGET_SVE"
2466 "@
2467 fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
83d1ca63 2468 fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2469 movprfx\t%0, %2\;fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2470 [(set_attr "movprfx" "*,*,yes")]
8fa7f434 2471)
2472
2473;; Unpredicated floating-point division.
2474(define_expand "div<mode>3"
2475 [(set (match_operand:SVE_F 0 "register_operand")
2476 (unspec:SVE_F
2477 [(match_dup 3)
2478 (div:SVE_F (match_operand:SVE_F 1 "register_operand")
2479 (match_operand:SVE_F 2 "register_operand"))]
2480 UNSPEC_MERGE_PTRUE))]
2481 "TARGET_SVE"
2482 {
2483 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2484 }
2485)
2486
2487;; Floating-point division predicated with a PTRUE.
2488(define_insn "*div<mode>3"
83d1ca63 2489 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
8fa7f434 2490 (unspec:SVE_F
83d1ca63 2491 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2492 (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w, w")
2493 (match_operand:SVE_F 3 "register_operand" "w, 0, w"))]
8fa7f434 2494 UNSPEC_MERGE_PTRUE))]
2495 "TARGET_SVE"
2496 "@
2497 fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
83d1ca63 2498 fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2499 movprfx\t%0, %2\;fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2500 [(set_attr "movprfx" "*,*,yes")]
8fa7f434 2501)
2502
2503;; Unpredicated FNEG, FABS and FSQRT.
2504(define_expand "<optab><mode>2"
2505 [(set (match_operand:SVE_F 0 "register_operand")
2506 (unspec:SVE_F
2507 [(match_dup 2)
2508 (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))]
2509 UNSPEC_MERGE_PTRUE))]
2510 "TARGET_SVE"
2511 {
2512 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2513 }
2514)
2515
2516;; FNEG, FABS and FSQRT predicated with a PTRUE.
2517(define_insn "*<optab><mode>2"
2518 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2519 (unspec:SVE_F
2520 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2521 (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))]
2522 UNSPEC_MERGE_PTRUE))]
2523 "TARGET_SVE"
2524 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2525)
2526
2527;; Unpredicated FRINTy.
2528(define_expand "<frint_pattern><mode>2"
2529 [(set (match_operand:SVE_F 0 "register_operand")
2530 (unspec:SVE_F
2531 [(match_dup 2)
2532 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")]
2533 FRINT)]
2534 UNSPEC_MERGE_PTRUE))]
2535 "TARGET_SVE"
2536 {
2537 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2538 }
2539)
2540
2541;; FRINTy predicated with a PTRUE.
2542(define_insn "*<frint_pattern><mode>2"
2543 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2544 (unspec:SVE_F
2545 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2546 (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")]
2547 FRINT)]
2548 UNSPEC_MERGE_PTRUE))]
2549 "TARGET_SVE"
2550 "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2551)
2552
2553;; Unpredicated conversion of floats to integers of the same size (HF to HI,
2554;; SF to SI or DF to DI).
2555(define_expand "<fix_trunc_optab><mode><v_int_equiv>2"
2556 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2557 (unspec:<V_INT_EQUIV>
2558 [(match_dup 2)
2559 (FIXUORS:<V_INT_EQUIV>
2560 (match_operand:SVE_F 1 "register_operand"))]
2561 UNSPEC_MERGE_PTRUE))]
2562 "TARGET_SVE"
2563 {
2564 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2565 }
2566)
2567
2568;; Conversion of SF to DI, SI or HI, predicated with a PTRUE.
2569(define_insn "*<fix_trunc_optab>v16hsf<mode>2"
2570 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
2571 (unspec:SVE_HSDI
2572 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2573 (FIXUORS:SVE_HSDI
2574 (match_operand:VNx8HF 2 "register_operand" "w"))]
2575 UNSPEC_MERGE_PTRUE))]
2576 "TARGET_SVE"
2577 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h"
2578)
2579
2580;; Conversion of SF to DI or SI, predicated with a PTRUE.
2581(define_insn "*<fix_trunc_optab>vnx4sf<mode>2"
2582 [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
2583 (unspec:SVE_SDI
2584 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2585 (FIXUORS:SVE_SDI
2586 (match_operand:VNx4SF 2 "register_operand" "w"))]
2587 UNSPEC_MERGE_PTRUE))]
2588 "TARGET_SVE"
2589 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s"
2590)
2591
2592;; Conversion of DF to DI or SI, predicated with a PTRUE.
2593(define_insn "*<fix_trunc_optab>vnx2df<mode>2"
2594 [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
2595 (unspec:SVE_SDI
2596 [(match_operand:VNx2BI 1 "register_operand" "Upl")
2597 (FIXUORS:SVE_SDI
2598 (match_operand:VNx2DF 2 "register_operand" "w"))]
2599 UNSPEC_MERGE_PTRUE))]
2600 "TARGET_SVE"
2601 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d"
2602)
2603
2604;; Unpredicated conversion of integers to floats of the same size
2605;; (HI to HF, SI to SF or DI to DF).
2606(define_expand "<optab><v_int_equiv><mode>2"
2607 [(set (match_operand:SVE_F 0 "register_operand")
2608 (unspec:SVE_F
2609 [(match_dup 2)
2610 (FLOATUORS:SVE_F
2611 (match_operand:<V_INT_EQUIV> 1 "register_operand"))]
2612 UNSPEC_MERGE_PTRUE))]
2613 "TARGET_SVE"
2614 {
2615 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2616 }
2617)
2618
2619;; Conversion of DI, SI or HI to the same number of HFs, predicated
2620;; with a PTRUE.
2621(define_insn "*<optab><mode>vnx8hf2"
2622 [(set (match_operand:VNx8HF 0 "register_operand" "=w")
2623 (unspec:VNx8HF
2624 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2625 (FLOATUORS:VNx8HF
2626 (match_operand:SVE_HSDI 2 "register_operand" "w"))]
2627 UNSPEC_MERGE_PTRUE))]
2628 "TARGET_SVE"
2629 "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>"
2630)
2631
2632;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE.
2633(define_insn "*<optab><mode>vnx4sf2"
2634 [(set (match_operand:VNx4SF 0 "register_operand" "=w")
2635 (unspec:VNx4SF
2636 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2637 (FLOATUORS:VNx4SF
2638 (match_operand:SVE_SDI 2 "register_operand" "w"))]
2639 UNSPEC_MERGE_PTRUE))]
2640 "TARGET_SVE"
2641 "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>"
2642)
2643
2644;; Conversion of DI or SI to DF, predicated with a PTRUE.
88e81b08 2645(define_insn "aarch64_sve_<optab><mode>vnx2df2"
8fa7f434 2646 [(set (match_operand:VNx2DF 0 "register_operand" "=w")
2647 (unspec:VNx2DF
2648 [(match_operand:VNx2BI 1 "register_operand" "Upl")
2649 (FLOATUORS:VNx2DF
2650 (match_operand:SVE_SDI 2 "register_operand" "w"))]
2651 UNSPEC_MERGE_PTRUE))]
2652 "TARGET_SVE"
2653 "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>"
2654)
2655
2656;; Conversion of DFs to the same number of SFs, or SFs to the same number
2657;; of HFs.
2658(define_insn "*trunc<Vwide><mode>2"
2659 [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
2660 (unspec:SVE_HSF
2661 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
2662 (unspec:SVE_HSF
2663 [(match_operand:<VWIDE> 2 "register_operand" "w")]
2664 UNSPEC_FLOAT_CONVERT)]
2665 UNSPEC_MERGE_PTRUE))]
2666 "TARGET_SVE"
2667 "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>"
2668)
2669
2670;; Conversion of SFs to the same number of DFs, or HFs to the same number
2671;; of SFs.
88e81b08 2672(define_insn "aarch64_sve_extend<mode><Vwide>2"
8fa7f434 2673 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2674 (unspec:<VWIDE>
2675 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
2676 (unspec:<VWIDE>
2677 [(match_operand:SVE_HSF 2 "register_operand" "w")]
2678 UNSPEC_FLOAT_CONVERT)]
2679 UNSPEC_MERGE_PTRUE))]
2680 "TARGET_SVE"
2681 "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>"
2682)
2683
88e81b08 2684;; Unpack the low or high half of a predicate, where "high" refers to
2685;; the low-numbered lanes for big-endian and the high-numbered lanes
2686;; for little-endian.
2687(define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
2688 [(match_operand:<VWIDE> 0 "register_operand")
2689 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
2690 UNPACK)]
2691 "TARGET_SVE"
2692 {
2693 emit_insn ((<hi_lanes_optab>
2694 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
2695 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
2696 (operands[0], operands[1]));
2697 DONE;
2698 }
2699)
2700
8fa7f434 2701;; PUNPKHI and PUNPKLO.
88e81b08 2702(define_insn "aarch64_sve_punpk<perm_hilo>_<mode>"
8fa7f434 2703 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
2704 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
88e81b08 2705 UNPACK_UNSIGNED))]
8fa7f434 2706 "TARGET_SVE"
2707 "punpk<perm_hilo>\t%0.h, %1.b"
2708)
2709
88e81b08 2710;; Unpack the low or high half of a vector, where "high" refers to
2711;; the low-numbered lanes for big-endian and the high-numbered lanes
2712;; for little-endian.
2713(define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
2714 [(match_operand:<VWIDE> 0 "register_operand")
2715 (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)]
2716 "TARGET_SVE"
2717 {
2718 emit_insn ((<hi_lanes_optab>
2719 ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode>
2720 : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>)
2721 (operands[0], operands[1]));
2722 DONE;
2723 }
2724)
2725
8fa7f434 2726;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO.
88e81b08 2727(define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>"
8fa7f434 2728 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2729 (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")]
2730 UNPACK))]
2731 "TARGET_SVE"
2732 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
2733)
2734
8fa7f434 2735;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
2736;; First unpack the source without conversion, then float-convert the
2737;; unpacked source.
2738(define_expand "vec_unpacks_<perm_hilo>_<mode>"
88e81b08 2739 [(match_operand:<VWIDE> 0 "register_operand")
2740 (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
2741 UNPACK_UNSIGNED)]
8fa7f434 2742 "TARGET_SVE"
2743 {
88e81b08 2744 /* Use ZIP to do the unpack, since we don't care about the upper halves
2745 and since it has the nice property of not needing any subregs.
2746 If using UUNPK* turns out to be preferable, we could model it as
2747 a ZIP whose first operand is zero. */
2748 rtx temp = gen_reg_rtx (<MODE>mode);
2749 emit_insn ((<hi_lanes_optab>
2750 ? gen_aarch64_sve_zip2<mode>
2751 : gen_aarch64_sve_zip1<mode>)
2752 (temp, operands[1], operands[1]));
2753 rtx ptrue = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode));
2754 emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0],
2755 ptrue, temp));
2756 DONE;
8fa7f434 2757 }
2758)
2759
2760;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI
2761;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
2762;; unpacked VNx4SI to VNx2DF.
2763(define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
88e81b08 2764 [(match_operand:VNx2DF 0 "register_operand")
2765 (FLOATUORS:VNx2DF
2766 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
2767 UNPACK_UNSIGNED))]
2768 "TARGET_SVE"
2769 {
2770 /* Use ZIP to do the unpack, since we don't care about the upper halves
2771 and since it has the nice property of not needing any subregs.
2772 If using UUNPK* turns out to be preferable, we could model it as
2773 a ZIP whose first operand is zero. */
2774 rtx temp = gen_reg_rtx (VNx4SImode);
2775 emit_insn ((<hi_lanes_optab>
2776 ? gen_aarch64_sve_zip2vnx4si
2777 : gen_aarch64_sve_zip1vnx4si)
2778 (temp, operands[1], operands[1]));
2779 rtx ptrue = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode));
2780 emit_insn (gen_aarch64_sve_<FLOATUORS:optab>vnx4sivnx2df2 (operands[0],
2781 ptrue, temp));
2782 DONE;
8fa7f434 2783 }
2784)
2785
2786;; Predicate pack. Use UZP1 on the narrower type, which discards
2787;; the high part of each wide element.
2788(define_insn "vec_pack_trunc_<Vwide>"
2789 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
2790 (unspec:PRED_BHS
2791 [(match_operand:<VWIDE> 1 "register_operand" "Upa")
2792 (match_operand:<VWIDE> 2 "register_operand" "Upa")]
2793 UNSPEC_PACK))]
2794 "TARGET_SVE"
2795 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2796)
2797
2798;; Integer pack. Use UZP1 on the narrower type, which discards
2799;; the high part of each wide element.
2800(define_insn "vec_pack_trunc_<Vwide>"
2801 [(set (match_operand:SVE_BHSI 0 "register_operand" "=w")
2802 (unspec:SVE_BHSI
2803 [(match_operand:<VWIDE> 1 "register_operand" "w")
2804 (match_operand:<VWIDE> 2 "register_operand" "w")]
2805 UNSPEC_PACK))]
2806 "TARGET_SVE"
2807 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2808)
2809
2810;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
2811;; the results into a single vector.
2812(define_expand "vec_pack_trunc_<Vwide>"
2813 [(set (match_dup 4)
2814 (unspec:SVE_HSF
2815 [(match_dup 3)
2816 (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")]
2817 UNSPEC_FLOAT_CONVERT)]
2818 UNSPEC_MERGE_PTRUE))
2819 (set (match_dup 5)
2820 (unspec:SVE_HSF
2821 [(match_dup 3)
2822 (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")]
2823 UNSPEC_FLOAT_CONVERT)]
2824 UNSPEC_MERGE_PTRUE))
2825 (set (match_operand:SVE_HSF 0 "register_operand")
2826 (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
2827 "TARGET_SVE"
2828 {
2829 operands[3] = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode));
2830 operands[4] = gen_reg_rtx (<MODE>mode);
2831 operands[5] = gen_reg_rtx (<MODE>mode);
2832 }
2833)
2834
2835;; Convert two vectors of DF to SI and pack the results into a single vector.
2836(define_expand "vec_pack_<su>fix_trunc_vnx2df"
2837 [(set (match_dup 4)
2838 (unspec:VNx4SI
2839 [(match_dup 3)
2840 (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))]
2841 UNSPEC_MERGE_PTRUE))
2842 (set (match_dup 5)
2843 (unspec:VNx4SI
2844 [(match_dup 3)
2845 (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))]
2846 UNSPEC_MERGE_PTRUE))
2847 (set (match_operand:VNx4SI 0 "register_operand")
2848 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
2849 "TARGET_SVE"
2850 {
2851 operands[3] = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode));
2852 operands[4] = gen_reg_rtx (VNx4SImode);
2853 operands[5] = gen_reg_rtx (VNx4SImode);
2854 }
2855)
633af029 2856
47c52435 2857;; Predicated floating-point operations with select.
2858(define_expand "cond_<optab><mode>"
2859 [(set (match_operand:SVE_F 0 "register_operand")
2860 (unspec:SVE_F
2861 [(match_operand:<VPRED> 1 "register_operand")
2862 (unspec:SVE_F
83d1ca63 2863 [(match_operand:SVE_F 2 "register_operand")
47c52435 2864 (match_operand:SVE_F 3 "register_operand")]
2865 SVE_COND_FP_BINARY)
83d1ca63 2866 (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
47c52435 2867 UNSPEC_SEL))]
2868 "TARGET_SVE"
83d1ca63 2869)
47c52435 2870
83d1ca63 2871;; Predicated floating-point operations with select matching output.
2872(define_insn "*cond_<optab><mode>_0"
2873 [(set (match_operand:SVE_F 0 "register_operand" "+w, w, ?&w")
2874 (unspec:SVE_F
2875 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2876 (unspec:SVE_F
2877 [(match_operand:SVE_F 2 "register_operand" "0, w, w")
2878 (match_operand:SVE_F 3 "register_operand" "w, 0, w")]
2879 SVE_COND_FP_BINARY)
2880 (match_dup 0)]
2881 UNSPEC_SEL))]
2882 "TARGET_SVE"
2883 "@
2884 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2885 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2886 movprfx\t%0, %1/m, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2887 [(set_attr "movprfx" "*,*,yes")]
2888)
2889
2890;; Predicated floating-point operations with select matching first operand.
2891(define_insn "*cond_<optab><mode>_2"
2892 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
2893 (unspec:SVE_F
2894 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2895 (unspec:SVE_F
2896 [(match_operand:SVE_F 2 "register_operand" "0, w")
2897 (match_operand:SVE_F 3 "register_operand" "w, w")]
2898 SVE_COND_FP_BINARY)
2899 (match_dup 2)]
2900 UNSPEC_SEL))]
2901 "TARGET_SVE"
2902 "@
2903 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2904 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2905 [(set_attr "movprfx" "*,yes")]
2906)
2907
2908;; Predicated floating-point operations with select matching second operand.
2909(define_insn "*cond_<optab><mode>_3"
2910 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
2911 (unspec:SVE_F
2912 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2913 (unspec:SVE_F
2914 [(match_operand:SVE_F 2 "register_operand" "w, w")
2915 (match_operand:SVE_F 3 "register_operand" "0, w")]
2916 SVE_COND_FP_BINARY)
2917 (match_dup 3)]
2918 UNSPEC_SEL))]
2919 "TARGET_SVE"
2920 "@
2921 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2922 movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
2923 [(set_attr "movprfx" "*,yes")]
2924)
2925
2926;; Predicated floating-point operations with select matching zero.
2927(define_insn "*cond_<optab><mode>_z"
2928 [(set (match_operand:SVE_F 0 "register_operand" "=&w")
88fefa8f 2929 (unspec:SVE_F
2930 [(match_operand:<VPRED> 1 "register_operand" "Upl")
47c52435 2931 (unspec:SVE_F
83d1ca63 2932 [(match_operand:SVE_F 2 "register_operand" "w")
47c52435 2933 (match_operand:SVE_F 3 "register_operand" "w")]
2934 SVE_COND_FP_BINARY)
83d1ca63 2935 (match_operand:SVE_F 4 "aarch64_simd_imm_zero")]
47c52435 2936 UNSPEC_SEL))]
88fefa8f 2937 "TARGET_SVE"
83d1ca63 2938 "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2939 [(set_attr "movprfx" "yes")]
88fefa8f 2940)
2941
83d1ca63 2942;; Synthetic predication of floating-point operations with select unmatched.
2943(define_insn_and_split "*cond_<optab><mode>_any"
2944 [(set (match_operand:SVE_F 0 "register_operand" "=&w")
dbc7e6ae 2945 (unspec:SVE_F
2946 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2947 (unspec:SVE_F
83d1ca63 2948 [(match_operand:SVE_F 2 "register_operand" "w")
2949 (match_operand:SVE_F 3 "register_operand" "w")]
dbc7e6ae 2950 SVE_COND_FP_BINARY)
83d1ca63 2951 (match_operand:SVE_F 4 "register_operand" "w")]
dbc7e6ae 2952 UNSPEC_SEL))]
2953 "TARGET_SVE"
83d1ca63 2954 "#"
2955 "&& reload_completed
2956 && !(rtx_equal_p (operands[0], operands[4])
2957 || rtx_equal_p (operands[2], operands[4])
2958 || rtx_equal_p (operands[3], operands[4]))"
2959 ; Not matchable by any one insn or movprfx insn. We need a separate select.
2960 [(set (match_dup 0)
2961 (unspec:SVE_F [(match_dup 1) (match_dup 2) (match_dup 4)] UNSPEC_SEL))
2962 (set (match_dup 0)
2963 (unspec:SVE_F
2964 [(match_dup 1)
2965 (unspec:SVE_F [(match_dup 0) (match_dup 3)] SVE_COND_FP_BINARY)
2966 (match_dup 0)]
2967 UNSPEC_SEL))]
dbc7e6ae 2968)
2969
6682fc02 2970;; Predicated floating-point ternary operations with select.
2971(define_expand "cond_<optab><mode>"
2972 [(set (match_operand:SVE_F 0 "register_operand")
2973 (unspec:SVE_F
2974 [(match_operand:<VPRED> 1 "register_operand")
2975 (unspec:SVE_F
2976 [(match_operand:SVE_F 2 "register_operand")
2977 (match_operand:SVE_F 3 "register_operand")
2978 (match_operand:SVE_F 4 "register_operand")]
2979 SVE_COND_FP_TERNARY)
2980 (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")]
2981 UNSPEC_SEL))]
2982 "TARGET_SVE"
2983{
2984 /* Swap the multiplication operands if the fallback value is the
2985 second of the two. */
2986 if (rtx_equal_p (operands[3], operands[5]))
2987 std::swap (operands[2], operands[3]);
2988})
2989
2990;; Predicated floating-point ternary operations using the FMAD-like form.
2991(define_insn "*cond_<optab><mode>_2"
2992 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
2993 (unspec:SVE_F
2994 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2995 (unspec:SVE_F
2996 [(match_operand:SVE_F 2 "register_operand" "0, w")
2997 (match_operand:SVE_F 3 "register_operand" "w, w")
2998 (match_operand:SVE_F 4 "register_operand" "w, w")]
2999 SVE_COND_FP_TERNARY)
3000 (match_dup 2)]
3001 UNSPEC_SEL))]
3002 "TARGET_SVE"
3003 "@
3004 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
3005 movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
3006 [(set_attr "movprfx" "*,yes")]
3007)
3008
3009;; Predicated floating-point ternary operations using the FMLA-like form.
3010(define_insn "*cond_<optab><mode>_4"
3011 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
3012 (unspec:SVE_F
3013 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3014 (unspec:SVE_F
3015 [(match_operand:SVE_F 2 "register_operand" "w, w")
3016 (match_operand:SVE_F 3 "register_operand" "w, w")
3017 (match_operand:SVE_F 4 "register_operand" "0, w")]
3018 SVE_COND_FP_TERNARY)
3019 (match_dup 4)]
3020 UNSPEC_SEL))]
3021 "TARGET_SVE"
3022 "@
3023 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
0f958fe3 3024 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6682fc02 3025 [(set_attr "movprfx" "*,yes")]
3026)
3027
3028;; Predicated floating-point ternary operations in which the value for
3029;; inactive lanes is distinct from the other inputs.
3030(define_insn_and_split "*cond_<optab><mode>_any"
3031 [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w")
3032 (unspec:SVE_F
3033 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3034 (unspec:SVE_F
3035 [(match_operand:SVE_F 2 "register_operand" "w, w, w")
3036 (match_operand:SVE_F 3 "register_operand" "w, w, w")
3037 (match_operand:SVE_F 4 "register_operand" "w, w, w")]
3038 SVE_COND_FP_TERNARY)
3039 (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
3040 UNSPEC_SEL))]
3041 "TARGET_SVE
3042 && !rtx_equal_p (operands[2], operands[5])
3043 && !rtx_equal_p (operands[3], operands[5])
3044 && !rtx_equal_p (operands[4], operands[5])"
3045 "@
3046 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
3047 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
3048 #"
3049 "&& reload_completed
3050 && !CONSTANT_P (operands[5])
3051 && !rtx_equal_p (operands[0], operands[5])"
3052 [(set (match_dup 0)
3053 (unspec:SVE_F [(match_dup 1) (match_dup 4) (match_dup 5)] UNSPEC_SEL))
3054 (set (match_dup 0)
3055 (unspec:SVE_F
3056 [(match_dup 1)
3057 (unspec:SVE_F [(match_dup 2) (match_dup 3) (match_dup 0)]
3058 SVE_COND_FP_TERNARY)
3059 (match_dup 0)]
3060 UNSPEC_SEL))]
3061 ""
3062 [(set_attr "movprfx" "yes")]
3063)
3064
633af029 3065;; Shift an SVE vector left and insert a scalar into element 0.
3066(define_insn "vec_shl_insert_<mode>"
3067 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
3068 (unspec:SVE_ALL
3069 [(match_operand:SVE_ALL 1 "register_operand" "0, 0")
3070 (match_operand:<VEL> 2 "register_operand" "rZ, w")]
3071 UNSPEC_INSR))]
3072 "TARGET_SVE"
3073 "@
3074 insr\t%0.<Vetype>, %<vwcore>2
3075 insr\t%0.<Vetype>, %<Vetype>2"
3076)