]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/aarch64/aarch64-sve.md
[AArch64] Factor out ptrue predicate creation
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-sve.md
1 ;; Machine description for AArch64 SVE.
2 ;; Copyright (C) 2009-2019 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 ;; Note on the handling of big-endian SVE
22 ;; --------------------------------------
23 ;;
24 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
25 ;; same way as movdi or movti would: the first byte of memory goes
26 ;; into the most significant byte of the register and the last byte
27 ;; of memory goes into the least significant byte of the register.
28 ;; This is the most natural ordering for Advanced SIMD and matches
29 ;; the ABI layout for 64-bit and 128-bit vector types.
30 ;;
31 ;; As a result, the order of bytes within the register is what GCC
32 ;; expects for a big-endian target, and subreg offsets therefore work
33 ;; as expected, with the first element in memory having subreg offset 0
34 ;; and the last element in memory having the subreg offset associated
35 ;; with a big-endian lowpart. However, this ordering also means that
36 ;; GCC's lane numbering does not match the architecture's numbering:
37 ;; GCC always treats the element at the lowest address in memory
38 ;; (subreg offset 0) as element 0, while the architecture treats
39 ;; the least significant end of the register as element 0.
40 ;;
41 ;; The situation for SVE is different. We want the layout of the
42 ;; SVE register to be same for mov<mode> as it is for maskload<mode>:
43 ;; logically, a mov<mode> load must be indistinguishable from a
44 ;; maskload<mode> whose mask is all true. We therefore need the
45 ;; register layout to match LD1 rather than LDR. The ABI layout of
46 ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
47 ;;
48 ;; As a result, the architecture lane numbering matches GCC's lane
49 ;; numbering, with element 0 always being the first in memory.
50 ;; However:
51 ;;
52 ;; - Applying a subreg offset to a register does not give the element
53 ;; that GCC expects: the first element in memory has the subreg offset
54 ;; associated with a big-endian lowpart while the last element in memory
55 ;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
56 ;;
57 ;; - We cannot use LDR and STR for spill slots that might be accessed
58 ;; via subregs, since although the elements have the order GCC expects,
59 ;; the order of the bytes within the elements is different. We instead
60 ;; access spill slots via LD1 and ST1, using secondary reloads to
61 ;; reserve a predicate register.
62
63
64 ;; SVE data moves.
65 (define_expand "mov<mode>"
66 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
67 (match_operand:SVE_ALL 1 "general_operand"))]
68 "TARGET_SVE"
69 {
70 /* Use the predicated load and store patterns where possible.
71 This is required for big-endian targets (see the comment at the
72 head of the file) and increases the addressing choices for
73 little-endian. */
74 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
75 && can_create_pseudo_p ())
76 {
77 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
78 DONE;
79 }
80
81 if (CONSTANT_P (operands[1]))
82 {
83 aarch64_expand_mov_immediate (operands[0], operands[1],
84 gen_vec_duplicate<mode>);
85 DONE;
86 }
87
88 /* Optimize subregs on big-endian targets: we can use REV[BHW]
89 instead of going through memory. */
90 if (BYTES_BIG_ENDIAN
91 && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
92 DONE;
93 }
94 )
95
96 ;; A pattern for optimizing SUBREGs that have a reinterpreting effect
97 ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
98 ;; for details. We use a special predicate for operand 2 to reduce
99 ;; the number of patterns.
100 (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
101 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
102 (unspec:SVE_ALL
103 [(match_operand:VNx16BI 1 "register_operand" "Upl")
104 (match_operand 2 "aarch64_any_register_operand" "w")]
105 UNSPEC_REV_SUBREG))]
106 "TARGET_SVE && BYTES_BIG_ENDIAN"
107 "#"
108 "&& reload_completed"
109 [(const_int 0)]
110 {
111 aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
112 DONE;
113 }
114 )
115
116 ;; Unpredicated moves (little-endian). Only allow memory operations
117 ;; during and after RA; before RA we want the predicated load and
118 ;; store patterns to be used instead.
119 (define_insn "*aarch64_sve_mov<mode>_le"
120 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
121 (match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
122 "TARGET_SVE
123 && !BYTES_BIG_ENDIAN
124 && ((lra_in_progress || reload_completed)
125 || (register_operand (operands[0], <MODE>mode)
126 && nonmemory_operand (operands[1], <MODE>mode)))"
127 "@
128 ldr\t%0, %1
129 str\t%1, %0
130 mov\t%0.d, %1.d
131 * return aarch64_output_sve_mov_immediate (operands[1]);"
132 )
133
134 ;; Unpredicated moves (big-endian). Memory accesses require secondary
135 ;; reloads.
136 (define_insn "*aarch64_sve_mov<mode>_be"
137 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
138 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
139 "TARGET_SVE && BYTES_BIG_ENDIAN"
140 "@
141 mov\t%0.d, %1.d
142 * return aarch64_output_sve_mov_immediate (operands[1]);"
143 )
144
145 ;; Handle big-endian memory reloads. We use byte PTRUE for all modes
146 ;; to try to encourage reuse.
147 (define_expand "aarch64_sve_reload_be"
148 [(parallel
149 [(set (match_operand 0)
150 (match_operand 1))
151 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
152 "TARGET_SVE && BYTES_BIG_ENDIAN"
153 {
154 /* Create a PTRUE. */
155 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
156
157 /* Refer to the PTRUE in the appropriate mode for this move. */
158 machine_mode mode = GET_MODE (operands[0]);
159 machine_mode pred_mode
160 = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require ();
161 rtx pred = gen_lowpart (pred_mode, operands[2]);
162
163 /* Emit a predicated load or store. */
164 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
165 DONE;
166 }
167 )
168
169 ;; A predicated load or store for which the predicate is known to be
170 ;; all-true. Note that this pattern is generated directly by
171 ;; aarch64_emit_sve_pred_move, so changes to this pattern will
172 ;; need changes there as well.
173 (define_insn_and_split "@aarch64_pred_mov<mode>"
174 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
175 (unspec:SVE_ALL
176 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
177 (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")]
178 UNSPEC_MERGE_PTRUE))]
179 "TARGET_SVE
180 && (register_operand (operands[0], <MODE>mode)
181 || register_operand (operands[2], <MODE>mode))"
182 "@
183 #
184 ld1<Vesize>\t%0.<Vetype>, %1/z, %2
185 st1<Vesize>\t%2.<Vetype>, %1, %0"
186 "&& register_operand (operands[0], <MODE>mode)
187 && register_operand (operands[2], <MODE>mode)"
188 [(set (match_dup 0) (match_dup 2))]
189 )
190
191 (define_expand "movmisalign<mode>"
192 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
193 (match_operand:SVE_ALL 1 "general_operand"))]
194 "TARGET_SVE"
195 {
196 /* Equivalent to a normal move for our purpooses. */
197 emit_move_insn (operands[0], operands[1]);
198 DONE;
199 }
200 )
201
202 (define_insn "maskload<mode><vpred>"
203 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
204 (unspec:SVE_ALL
205 [(match_operand:<VPRED> 2 "register_operand" "Upl")
206 (match_operand:SVE_ALL 1 "memory_operand" "m")]
207 UNSPEC_LD1_SVE))]
208 "TARGET_SVE"
209 "ld1<Vesize>\t%0.<Vetype>, %2/z, %1"
210 )
211
212 (define_insn "maskstore<mode><vpred>"
213 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
214 (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl")
215 (match_operand:SVE_ALL 1 "register_operand" "w")
216 (match_dup 0)]
217 UNSPEC_ST1_SVE))]
218 "TARGET_SVE"
219 "st1<Vesize>\t%1.<Vetype>, %2, %0"
220 )
221
222 ;; Unpredicated gather loads.
223 (define_expand "gather_load<mode>"
224 [(set (match_operand:SVE_SD 0 "register_operand")
225 (unspec:SVE_SD
226 [(match_dup 5)
227 (match_operand:DI 1 "aarch64_reg_or_zero")
228 (match_operand:<V_INT_EQUIV> 2 "register_operand")
229 (match_operand:DI 3 "const_int_operand")
230 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
231 (mem:BLK (scratch))]
232 UNSPEC_LD1_GATHER))]
233 "TARGET_SVE"
234 {
235 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
236 }
237 )
238
239 ;; Predicated gather loads for 32-bit elements. Operand 3 is true for
240 ;; unsigned extension and false for signed extension.
241 (define_insn "mask_gather_load<mode>"
242 [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w")
243 (unspec:SVE_S
244 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
245 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
246 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w")
247 (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1")
248 (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
249 (mem:BLK (scratch))]
250 UNSPEC_LD1_GATHER))]
251 "TARGET_SVE"
252 "@
253 ld1w\t%0.s, %5/z, [%2.s]
254 ld1w\t%0.s, %5/z, [%1, %2.s, sxtw]
255 ld1w\t%0.s, %5/z, [%1, %2.s, uxtw]
256 ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
257 ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
258 )
259
260 ;; Predicated gather loads for 64-bit elements. The value of operand 3
261 ;; doesn't matter in this case.
262 (define_insn "mask_gather_load<mode>"
263 [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w")
264 (unspec:SVE_D
265 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
266 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk")
267 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w")
268 (match_operand:DI 3 "const_int_operand")
269 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
270 (mem:BLK (scratch))]
271 UNSPEC_LD1_GATHER))]
272 "TARGET_SVE"
273 "@
274 ld1d\t%0.d, %5/z, [%2.d]
275 ld1d\t%0.d, %5/z, [%1, %2.d]
276 ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
277 )
278
279 ;; Unpredicated scatter store.
280 (define_expand "scatter_store<mode>"
281 [(set (mem:BLK (scratch))
282 (unspec:BLK
283 [(match_dup 5)
284 (match_operand:DI 0 "aarch64_reg_or_zero")
285 (match_operand:<V_INT_EQUIV> 1 "register_operand")
286 (match_operand:DI 2 "const_int_operand")
287 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
288 (match_operand:SVE_SD 4 "register_operand")]
289 UNSPEC_ST1_SCATTER))]
290 "TARGET_SVE"
291 {
292 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
293 }
294 )
295
296 ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
297 ;; unsigned extension and false for signed extension.
298 (define_insn "mask_scatter_store<mode>"
299 [(set (mem:BLK (scratch))
300 (unspec:BLK
301 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
302 (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
303 (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w")
304 (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1")
305 (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
306 (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")]
307 UNSPEC_ST1_SCATTER))]
308 "TARGET_SVE"
309 "@
310 st1w\t%4.s, %5, [%1.s]
311 st1w\t%4.s, %5, [%0, %1.s, sxtw]
312 st1w\t%4.s, %5, [%0, %1.s, uxtw]
313 st1w\t%4.s, %5, [%0, %1.s, sxtw %p3]
314 st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]"
315 )
316
317 ;; Predicated scatter stores for 64-bit elements. The value of operand 2
318 ;; doesn't matter in this case.
319 (define_insn "mask_scatter_store<mode>"
320 [(set (mem:BLK (scratch))
321 (unspec:BLK
322 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
323 (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk")
324 (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w")
325 (match_operand:DI 2 "const_int_operand")
326 (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
327 (match_operand:SVE_D 4 "register_operand" "w, w, w")]
328 UNSPEC_ST1_SCATTER))]
329 "TARGET_SVE"
330 "@
331 st1d\t%4.d, %5, [%1.d]
332 st1d\t%4.d, %5, [%0, %1.d]
333 st1d\t%4.d, %5, [%0, %1.d, lsl %p3]"
334 )
335
336 ;; SVE structure moves.
337 (define_expand "mov<mode>"
338 [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
339 (match_operand:SVE_STRUCT 1 "general_operand"))]
340 "TARGET_SVE"
341 {
342 /* Big-endian loads and stores need to be done via LD1 and ST1;
343 see the comment at the head of the file for details. */
344 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
345 && BYTES_BIG_ENDIAN)
346 {
347 gcc_assert (can_create_pseudo_p ());
348 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
349 DONE;
350 }
351
352 if (CONSTANT_P (operands[1]))
353 {
354 aarch64_expand_mov_immediate (operands[0], operands[1]);
355 DONE;
356 }
357 }
358 )
359
360 ;; Unpredicated structure moves (little-endian).
361 (define_insn "*aarch64_sve_mov<mode>_le"
362 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
363 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
364 "TARGET_SVE && !BYTES_BIG_ENDIAN"
365 "#"
366 [(set_attr "length" "<insn_length>")]
367 )
368
369 ;; Unpredicated structure moves (big-endian). Memory accesses require
370 ;; secondary reloads.
371 (define_insn "*aarch64_sve_mov<mode>_le"
372 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
373 (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
374 "TARGET_SVE && BYTES_BIG_ENDIAN"
375 "#"
376 [(set_attr "length" "<insn_length>")]
377 )
378
379 ;; Split unpredicated structure moves into pieces. This is the same
380 ;; for both big-endian and little-endian code, although it only needs
381 ;; to handle memory operands for little-endian code.
382 (define_split
383 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
384 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
385 "TARGET_SVE && reload_completed"
386 [(const_int 0)]
387 {
388 rtx dest = operands[0];
389 rtx src = operands[1];
390 if (REG_P (dest) && REG_P (src))
391 aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
392 else
393 for (unsigned int i = 0; i < <vector_count>; ++i)
394 {
395 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
396 i * BYTES_PER_SVE_VECTOR);
397 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
398 i * BYTES_PER_SVE_VECTOR);
399 emit_insn (gen_rtx_SET (subdest, subsrc));
400 }
401 DONE;
402 }
403 )
404
405 ;; Predicated structure moves. This works for both endiannesses but in
406 ;; practice is only useful for big-endian.
407 (define_insn_and_split "@aarch64_pred_mov<mode>"
408 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
409 (unspec:SVE_STRUCT
410 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
411 (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
412 UNSPEC_MERGE_PTRUE))]
413 "TARGET_SVE
414 && (register_operand (operands[0], <MODE>mode)
415 || register_operand (operands[2], <MODE>mode))"
416 "#"
417 "&& reload_completed"
418 [(const_int 0)]
419 {
420 for (unsigned int i = 0; i < <vector_count>; ++i)
421 {
422 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
423 <MODE>mode,
424 i * BYTES_PER_SVE_VECTOR);
425 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
426 <MODE>mode,
427 i * BYTES_PER_SVE_VECTOR);
428 aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
429 }
430 DONE;
431 }
432 [(set_attr "length" "<insn_length>")]
433 )
434
435 (define_expand "mov<mode>"
436 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
437 (match_operand:PRED_ALL 1 "general_operand"))]
438 "TARGET_SVE"
439 {
440 if (GET_CODE (operands[0]) == MEM)
441 operands[1] = force_reg (<MODE>mode, operands[1]);
442 }
443 )
444
445 (define_insn "*aarch64_sve_mov<mode>"
446 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa, Upa")
447 (match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dz, Dm"))]
448 "TARGET_SVE
449 && (register_operand (operands[0], <MODE>mode)
450 || register_operand (operands[1], <MODE>mode))"
451 "@
452 mov\t%0.b, %1.b
453 str\t%1, %0
454 ldr\t%0, %1
455 pfalse\t%0.b
456 * return aarch64_output_ptrue (<MODE>mode, '<Vetype>');"
457 )
458
459 ;; Handle extractions from a predicate by converting to an integer vector
460 ;; and extracting from there.
461 (define_expand "vec_extract<vpred><Vel>"
462 [(match_operand:<VEL> 0 "register_operand")
463 (match_operand:<VPRED> 1 "register_operand")
464 (match_operand:SI 2 "nonmemory_operand")
465 ;; Dummy operand to which we can attach the iterator.
466 (reg:SVE_I V0_REGNUM)]
467 "TARGET_SVE"
468 {
469 rtx tmp = gen_reg_rtx (<MODE>mode);
470 emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1],
471 CONST1_RTX (<MODE>mode),
472 CONST0_RTX (<MODE>mode)));
473 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
474 DONE;
475 }
476 )
477
478 (define_expand "vec_extract<mode><Vel>"
479 [(set (match_operand:<VEL> 0 "register_operand")
480 (vec_select:<VEL>
481 (match_operand:SVE_ALL 1 "register_operand")
482 (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
483 "TARGET_SVE"
484 {
485 poly_int64 val;
486 if (poly_int_rtx_p (operands[2], &val)
487 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
488 {
489 /* The last element can be extracted with a LASTB and a false
490 predicate. */
491 rtx sel = force_reg (<VPRED>mode, CONST0_RTX (<VPRED>mode));
492 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
493 DONE;
494 }
495 if (!CONST_INT_P (operands[2]))
496 {
497 /* Create an index with operand[2] as the base and -1 as the step.
498 It will then be zero for the element we care about. */
499 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
500 index = force_reg (<VEL_INT>mode, index);
501 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
502 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
503
504 /* Get a predicate that is true for only that element. */
505 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
506 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
507 rtx sel = gen_reg_rtx (<VPRED>mode);
508 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
509
510 /* Select the element using LASTB. */
511 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
512 DONE;
513 }
514 }
515 )
516
517 ;; Extract element zero. This is a special case because we want to force
518 ;; the registers to be the same for the second alternative, and then
519 ;; split the instruction into nothing after RA.
520 (define_insn_and_split "*vec_extract<mode><Vel>_0"
521 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
522 (vec_select:<VEL>
523 (match_operand:SVE_ALL 1 "register_operand" "w, 0, w")
524 (parallel [(const_int 0)])))]
525 "TARGET_SVE"
526 {
527 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
528 switch (which_alternative)
529 {
530 case 0:
531 return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
532 case 1:
533 return "#";
534 case 2:
535 return "st1\\t{%1.<Vetype>}[0], %0";
536 default:
537 gcc_unreachable ();
538 }
539 }
540 "&& reload_completed
541 && REG_P (operands[0])
542 && REGNO (operands[0]) == REGNO (operands[1])"
543 [(const_int 0)]
544 {
545 emit_note (NOTE_INSN_DELETED);
546 DONE;
547 }
548 [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
549 )
550
551 ;; Extract an element from the Advanced SIMD portion of the register.
552 ;; We don't just reuse the aarch64-simd.md pattern because we don't
553 ;; want any change in lane number on big-endian targets.
554 (define_insn "*vec_extract<mode><Vel>_v128"
555 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
556 (vec_select:<VEL>
557 (match_operand:SVE_ALL 1 "register_operand" "w, w, w")
558 (parallel [(match_operand:SI 2 "const_int_operand")])))]
559 "TARGET_SVE
560 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
561 {
562 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
563 switch (which_alternative)
564 {
565 case 0:
566 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
567 case 1:
568 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
569 case 2:
570 return "st1\\t{%1.<Vetype>}[%2], %0";
571 default:
572 gcc_unreachable ();
573 }
574 }
575 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
576 )
577
578 ;; Extract an element in the range of DUP. This pattern allows the
579 ;; source and destination to be different.
580 (define_insn "*vec_extract<mode><Vel>_dup"
581 [(set (match_operand:<VEL> 0 "register_operand" "=w")
582 (vec_select:<VEL>
583 (match_operand:SVE_ALL 1 "register_operand" "w")
584 (parallel [(match_operand:SI 2 "const_int_operand")])))]
585 "TARGET_SVE
586 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
587 {
588 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
589 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
590 }
591 )
592
593 ;; Extract an element outside the range of DUP. This pattern requires the
594 ;; source and destination to be the same.
595 (define_insn "*vec_extract<mode><Vel>_ext"
596 [(set (match_operand:<VEL> 0 "register_operand" "=w")
597 (vec_select:<VEL>
598 (match_operand:SVE_ALL 1 "register_operand" "0")
599 (parallel [(match_operand:SI 2 "const_int_operand")])))]
600 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
601 {
602 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
603 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
604 return "ext\t%0.b, %0.b, %0.b, #%2";
605 }
606 )
607
608 ;; Extract the last active element of operand 1 into operand 0.
609 ;; If no elements are active, extract the last inactive element instead.
610 (define_insn "extract_last_<mode>"
611 [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
612 (unspec:<VEL>
613 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
614 (match_operand:SVE_ALL 2 "register_operand" "w, w")]
615 UNSPEC_LASTB))]
616 "TARGET_SVE"
617 "@
618 lastb\t%<vwcore>0, %1, %2.<Vetype>
619 lastb\t%<Vetype>0, %1, %2.<Vetype>"
620 )
621
622 (define_expand "vec_duplicate<mode>"
623 [(parallel
624 [(set (match_operand:SVE_ALL 0 "register_operand")
625 (vec_duplicate:SVE_ALL
626 (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
627 (clobber (scratch:<VPRED>))])]
628 "TARGET_SVE"
629 {
630 if (MEM_P (operands[1]))
631 {
632 rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
633 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
634 CONST0_RTX (<MODE>mode)));
635 DONE;
636 }
637 }
638 )
639
640 ;; Accept memory operands for the benefit of combine, and also in case
641 ;; the scalar input gets spilled to memory during RA. We want to split
642 ;; the load at the first opportunity in order to allow the PTRUE to be
643 ;; optimized with surrounding code.
644 (define_insn_and_split "*vec_duplicate<mode>_reg"
645 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
646 (vec_duplicate:SVE_ALL
647 (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
648 (clobber (match_scratch:<VPRED> 2 "=X, X, Upl"))]
649 "TARGET_SVE"
650 "@
651 mov\t%0.<Vetype>, %<vwcore>1
652 mov\t%0.<Vetype>, %<Vetype>1
653 #"
654 "&& MEM_P (operands[1])"
655 [(const_int 0)]
656 {
657 if (GET_CODE (operands[2]) == SCRATCH)
658 operands[2] = gen_reg_rtx (<VPRED>mode);
659 emit_move_insn (operands[2], CONSTM1_RTX (<VPRED>mode));
660 emit_insn (gen_sve_ld1r<mode> (operands[0], operands[2], operands[1],
661 CONST0_RTX (<MODE>mode)));
662 DONE;
663 }
664 [(set_attr "length" "4,4,8")]
665 )
666
667 ;; This is used for vec_duplicate<mode>s from memory, but can also
668 ;; be used by combine to optimize selects of a a vec_duplicate<mode>
669 ;; with zero.
670 (define_insn "sve_ld1r<mode>"
671 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
672 (unspec:SVE_ALL
673 [(match_operand:<VPRED> 1 "register_operand" "Upl")
674 (vec_duplicate:SVE_ALL
675 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
676 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
677 UNSPEC_SEL))]
678 "TARGET_SVE"
679 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
680 )
681
682 ;; Load 128 bits from memory and duplicate to fill a vector. Since there
683 ;; are so few operations on 128-bit "elements", we don't define a VNx1TI
684 ;; and simply use vectors of bytes instead.
685 (define_insn "*sve_ld1rq<Vesize>"
686 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
687 (unspec:SVE_ALL
688 [(match_operand:<VPRED> 1 "register_operand" "Upl")
689 (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")]
690 UNSPEC_LD1RQ))]
691 "TARGET_SVE"
692 "ld1rq<Vesize>\t%0.<Vetype>, %1/z, %2"
693 )
694
695 ;; Implement a predicate broadcast by shifting the low bit of the scalar
696 ;; input into the top bit and using a WHILELO. An alternative would be to
697 ;; duplicate the input and do a compare with zero.
698 (define_expand "vec_duplicate<mode>"
699 [(set (match_operand:PRED_ALL 0 "register_operand")
700 (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))]
701 "TARGET_SVE"
702 {
703 rtx tmp = gen_reg_rtx (DImode);
704 rtx op1 = gen_lowpart (DImode, operands[1]);
705 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
706 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
707 DONE;
708 }
709 )
710
711 (define_insn "vec_series<mode>"
712 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
713 (vec_series:SVE_I
714 (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
715 (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
716 "TARGET_SVE"
717 "@
718 index\t%0.<Vetype>, #%1, %<vw>2
719 index\t%0.<Vetype>, %<vw>1, #%2
720 index\t%0.<Vetype>, %<vw>1, %<vw>2"
721 )
722
723 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
724 ;; of an INDEX instruction.
725 (define_insn "*vec_series<mode>_plus"
726 [(set (match_operand:SVE_I 0 "register_operand" "=w")
727 (plus:SVE_I
728 (vec_duplicate:SVE_I
729 (match_operand:<VEL> 1 "register_operand" "r"))
730 (match_operand:SVE_I 2 "immediate_operand")))]
731 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
732 {
733 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
734 return "index\t%0.<Vetype>, %<vw>1, #%2";
735 }
736 )
737
738 ;; Unpredicated LD[234].
739 (define_expand "vec_load_lanes<mode><vsingle>"
740 [(set (match_operand:SVE_STRUCT 0 "register_operand")
741 (unspec:SVE_STRUCT
742 [(match_dup 2)
743 (match_operand:SVE_STRUCT 1 "memory_operand")]
744 UNSPEC_LDN))]
745 "TARGET_SVE"
746 {
747 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
748 }
749 )
750
751 ;; Predicated LD[234].
752 (define_insn "vec_mask_load_lanes<mode><vsingle>"
753 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
754 (unspec:SVE_STRUCT
755 [(match_operand:<VPRED> 2 "register_operand" "Upl")
756 (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
757 UNSPEC_LDN))]
758 "TARGET_SVE"
759 "ld<vector_count><Vesize>\t%0, %2/z, %1"
760 )
761
762 ;; Unpredicated ST[234]. This is always a full update, so the dependence
763 ;; on the old value of the memory location (via (match_dup 0)) is redundant.
764 ;; There doesn't seem to be any obvious benefit to treating the all-true
765 ;; case differently though. In particular, it's very unlikely that we'll
766 ;; only find out during RTL that a store_lanes is dead.
767 (define_expand "vec_store_lanes<mode><vsingle>"
768 [(set (match_operand:SVE_STRUCT 0 "memory_operand")
769 (unspec:SVE_STRUCT
770 [(match_dup 2)
771 (match_operand:SVE_STRUCT 1 "register_operand")
772 (match_dup 0)]
773 UNSPEC_STN))]
774 "TARGET_SVE"
775 {
776 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
777 }
778 )
779
780 ;; Predicated ST[234].
781 (define_insn "vec_mask_store_lanes<mode><vsingle>"
782 [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
783 (unspec:SVE_STRUCT
784 [(match_operand:<VPRED> 2 "register_operand" "Upl")
785 (match_operand:SVE_STRUCT 1 "register_operand" "w")
786 (match_dup 0)]
787 UNSPEC_STN))]
788 "TARGET_SVE"
789 "st<vector_count><Vesize>\t%1, %2, %0"
790 )
791
792 (define_expand "vec_perm<mode>"
793 [(match_operand:SVE_ALL 0 "register_operand")
794 (match_operand:SVE_ALL 1 "register_operand")
795 (match_operand:SVE_ALL 2 "register_operand")
796 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
797 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
798 {
799 aarch64_expand_sve_vec_perm (operands[0], operands[1],
800 operands[2], operands[3]);
801 DONE;
802 }
803 )
804
805 (define_insn "*aarch64_sve_tbl<mode>"
806 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
807 (unspec:SVE_ALL
808 [(match_operand:SVE_ALL 1 "register_operand" "w")
809 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
810 UNSPEC_TBL))]
811 "TARGET_SVE"
812 "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
813 )
814
815 (define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
816 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
817 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
818 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
819 PERMUTE))]
820 "TARGET_SVE"
821 "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
822 )
823
824 (define_insn "aarch64_sve_<perm_insn><perm_hilo><mode>"
825 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
826 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")
827 (match_operand:SVE_ALL 2 "register_operand" "w")]
828 PERMUTE))]
829 "TARGET_SVE"
830 "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
831 )
832
833 (define_insn "*aarch64_sve_rev64<mode>"
834 [(set (match_operand:SVE_BHS 0 "register_operand" "=w")
835 (unspec:SVE_BHS
836 [(match_operand:VNx2BI 1 "register_operand" "Upl")
837 (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
838 UNSPEC_REV64)]
839 UNSPEC_MERGE_PTRUE))]
840 "TARGET_SVE"
841 "rev<Vesize>\t%0.d, %1/m, %2.d"
842 )
843
844 (define_insn "*aarch64_sve_rev32<mode>"
845 [(set (match_operand:SVE_BH 0 "register_operand" "=w")
846 (unspec:SVE_BH
847 [(match_operand:VNx4BI 1 "register_operand" "Upl")
848 (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
849 UNSPEC_REV32)]
850 UNSPEC_MERGE_PTRUE))]
851 "TARGET_SVE"
852 "rev<Vesize>\t%0.s, %1/m, %2.s"
853 )
854
855 (define_insn "*aarch64_sve_rev16vnx16qi"
856 [(set (match_operand:VNx16QI 0 "register_operand" "=w")
857 (unspec:VNx16QI
858 [(match_operand:VNx8BI 1 "register_operand" "Upl")
859 (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
860 UNSPEC_REV16)]
861 UNSPEC_MERGE_PTRUE))]
862 "TARGET_SVE"
863 "revb\t%0.h, %1/m, %2.h"
864 )
865
866 (define_insn "@aarch64_sve_rev<mode>"
867 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
868 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")]
869 UNSPEC_REV))]
870 "TARGET_SVE"
871 "rev\t%0.<Vetype>, %1.<Vetype>")
872
873 (define_insn "*aarch64_sve_dup_lane<mode>"
874 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
875 (vec_duplicate:SVE_ALL
876 (vec_select:<VEL>
877 (match_operand:SVE_ALL 1 "register_operand" "w")
878 (parallel [(match_operand:SI 2 "const_int_operand")]))))]
879 "TARGET_SVE
880 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
881 "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
882 )
883
884 ;; Note that the immediate (third) operand is the lane index not
885 ;; the byte index.
886 (define_insn "*aarch64_sve_ext<mode>"
887 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
888 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0")
889 (match_operand:SVE_ALL 2 "register_operand" "w")
890 (match_operand:SI 3 "const_int_operand")]
891 UNSPEC_EXT))]
892 "TARGET_SVE
893 && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
894 {
895 operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
896 return "ext\\t%0.b, %0.b, %2.b, #%3";
897 }
898 )
899
900 (define_insn "add<mode>3"
901 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w")
902 (plus:SVE_I
903 (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w")
904 (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, w")))]
905 "TARGET_SVE"
906 "@
907 add\t%0.<Vetype>, %0.<Vetype>, #%D2
908 sub\t%0.<Vetype>, %0.<Vetype>, #%N2
909 * return aarch64_output_sve_inc_dec_immediate (\"%0.<Vetype>\", operands[2]);
910 add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
911 )
912
913 (define_insn "sub<mode>3"
914 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
915 (minus:SVE_I
916 (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa")
917 (match_operand:SVE_I 2 "register_operand" "w, 0")))]
918 "TARGET_SVE"
919 "@
920 sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
921 subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
922 )
923
924 ;; Unpredicated multiplication.
925 (define_expand "mul<mode>3"
926 [(set (match_operand:SVE_I 0 "register_operand")
927 (unspec:SVE_I
928 [(match_dup 3)
929 (mult:SVE_I
930 (match_operand:SVE_I 1 "register_operand")
931 (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))]
932 UNSPEC_MERGE_PTRUE))]
933 "TARGET_SVE"
934 {
935 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
936 }
937 )
938
939 ;; Multiplication predicated with a PTRUE. We don't actually need the
940 ;; predicate for the first alternative, but using Upa or X isn't likely
941 ;; to gain much and would make the instruction seem less uniform to the
942 ;; register allocator.
943 (define_insn_and_split "*mul<mode>3"
944 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
945 (unspec:SVE_I
946 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
947 (mult:SVE_I
948 (match_operand:SVE_I 2 "register_operand" "%0, 0, w")
949 (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w, w"))]
950 UNSPEC_MERGE_PTRUE))]
951 "TARGET_SVE"
952 "@
953 #
954 mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
955 movprfx\t%0, %2\;mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
956 ; Split the unpredicated form after reload, so that we don't have
957 ; the unnecessary PTRUE.
958 "&& reload_completed
959 && !register_operand (operands[3], <MODE>mode)"
960 [(set (match_dup 0) (mult:SVE_I (match_dup 2) (match_dup 3)))]
961 ""
962 [(set_attr "movprfx" "*,*,yes")]
963 )
964
965 ;; Unpredicated multiplications by a constant (post-RA only).
966 ;; These are generated by splitting a predicated instruction whose
967 ;; predicate is unused.
968 (define_insn "*post_ra_mul<mode>3"
969 [(set (match_operand:SVE_I 0 "register_operand" "=w")
970 (mult:SVE_I
971 (match_operand:SVE_I 1 "register_operand" "0")
972 (match_operand:SVE_I 2 "aarch64_sve_mul_immediate")))]
973 "TARGET_SVE && reload_completed"
974 "mul\t%0.<Vetype>, %0.<Vetype>, #%2"
975 )
976
977 (define_insn "*madd<mode>"
978 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
979 (plus:SVE_I
980 (unspec:SVE_I
981 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
982 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
983 (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
984 UNSPEC_MERGE_PTRUE)
985 (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
986 "TARGET_SVE"
987 "@
988 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
989 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
990 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
991 [(set_attr "movprfx" "*,*,yes")]
992 )
993
994 (define_insn "*msub<mode>3"
995 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
996 (minus:SVE_I
997 (match_operand:SVE_I 4 "register_operand" "w, 0, w")
998 (unspec:SVE_I
999 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
1000 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
1001 (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
1002 UNSPEC_MERGE_PTRUE)))]
1003 "TARGET_SVE"
1004 "@
1005 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
1006 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
1007 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
1008 [(set_attr "movprfx" "*,*,yes")]
1009 )
1010
1011 ;; Unpredicated highpart multiplication.
1012 (define_expand "<su>mul<mode>3_highpart"
1013 [(set (match_operand:SVE_I 0 "register_operand")
1014 (unspec:SVE_I
1015 [(match_dup 3)
1016 (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand")
1017 (match_operand:SVE_I 2 "register_operand")]
1018 MUL_HIGHPART)]
1019 UNSPEC_MERGE_PTRUE))]
1020 "TARGET_SVE"
1021 {
1022 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
1023 }
1024 )
1025
1026 ;; Predicated highpart multiplication.
1027 (define_insn "*<su>mul<mode>3_highpart"
1028 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1029 (unspec:SVE_I
1030 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1031 (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0, w")
1032 (match_operand:SVE_I 3 "register_operand" "w, w")]
1033 MUL_HIGHPART)]
1034 UNSPEC_MERGE_PTRUE))]
1035 "TARGET_SVE"
1036 "@
1037 <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1038 movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1039 [(set_attr "movprfx" "*,yes")]
1040 )
1041
1042 ;; Unpredicated division.
1043 (define_expand "<optab><mode>3"
1044 [(set (match_operand:SVE_SDI 0 "register_operand")
1045 (unspec:SVE_SDI
1046 [(match_dup 3)
1047 (SVE_INT_BINARY_SD:SVE_SDI
1048 (match_operand:SVE_SDI 1 "register_operand")
1049 (match_operand:SVE_SDI 2 "register_operand"))]
1050 UNSPEC_MERGE_PTRUE))]
1051 "TARGET_SVE"
1052 {
1053 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
1054 }
1055 )
1056
1057 ;; Division predicated with a PTRUE.
1058 (define_insn "*<optab><mode>3"
1059 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w, ?&w")
1060 (unspec:SVE_SDI
1061 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
1062 (SVE_INT_BINARY_SD:SVE_SDI
1063 (match_operand:SVE_SDI 2 "register_operand" "0, w, w")
1064 (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0, w"))]
1065 UNSPEC_MERGE_PTRUE))]
1066 "TARGET_SVE"
1067 "@
1068 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1069 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1070 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1071 [(set_attr "movprfx" "*,*,yes")]
1072 )
1073
1074 ;; Unpredicated NEG, NOT and POPCOUNT.
1075 (define_expand "<optab><mode>2"
1076 [(set (match_operand:SVE_I 0 "register_operand")
1077 (unspec:SVE_I
1078 [(match_dup 2)
1079 (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))]
1080 UNSPEC_MERGE_PTRUE))]
1081 "TARGET_SVE"
1082 {
1083 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1084 }
1085 )
1086
1087 ;; NEG, NOT and POPCOUNT predicated with a PTRUE.
1088 (define_insn "*<optab><mode>2"
1089 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1090 (unspec:SVE_I
1091 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1092 (SVE_INT_UNARY:SVE_I
1093 (match_operand:SVE_I 2 "register_operand" "w"))]
1094 UNSPEC_MERGE_PTRUE))]
1095 "TARGET_SVE"
1096 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
1097 )
1098
1099 ;; Vector AND, ORR and XOR.
1100 (define_insn "<optab><mode>3"
1101 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
1102 (LOGICAL:SVE_I
1103 (match_operand:SVE_I 1 "register_operand" "%0, w")
1104 (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))]
1105 "TARGET_SVE"
1106 "@
1107 <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
1108 <logical>\t%0.d, %1.d, %2.d"
1109 )
1110
1111 ;; Vector AND, ORR and XOR on floating-point modes. We avoid subregs
1112 ;; by providing this, but we need to use UNSPECs since rtx logical ops
1113 ;; aren't defined for floating-point modes.
1114 (define_insn "*<optab><mode>3"
1115 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1116 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w")
1117 (match_operand:SVE_F 2 "register_operand" "w")]
1118 LOGICALF))]
1119 "TARGET_SVE"
1120 "<logicalf_op>\t%0.d, %1.d, %2.d"
1121 )
1122
1123 ;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate
1124 ;; this pattern even though the NOT instruction itself is predicated.
1125 (define_insn "bic<mode>3"
1126 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1127 (and:SVE_I
1128 (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w"))
1129 (match_operand:SVE_I 2 "register_operand" "w")))]
1130 "TARGET_SVE"
1131 "bic\t%0.d, %2.d, %1.d"
1132 )
1133
1134 ;; Predicate AND. We can reuse one of the inputs as the GP.
1135 (define_insn "and<mode>3"
1136 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1137 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
1138 (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
1139 "TARGET_SVE"
1140 "and\t%0.b, %1/z, %1.b, %2.b"
1141 )
1142
1143 ;; Unpredicated predicate ORR and XOR.
1144 (define_expand "<optab><mode>3"
1145 [(set (match_operand:PRED_ALL 0 "register_operand")
1146 (and:PRED_ALL
1147 (LOGICAL_OR:PRED_ALL
1148 (match_operand:PRED_ALL 1 "register_operand")
1149 (match_operand:PRED_ALL 2 "register_operand"))
1150 (match_dup 3)))]
1151 "TARGET_SVE"
1152 {
1153 operands[3] = aarch64_ptrue_reg (<MODE>mode);
1154 }
1155 )
1156
1157 ;; Predicated predicate ORR and XOR.
1158 (define_insn "pred_<optab><mode>3"
1159 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1160 (and:PRED_ALL
1161 (LOGICAL:PRED_ALL
1162 (match_operand:PRED_ALL 2 "register_operand" "Upa")
1163 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1164 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1165 "TARGET_SVE"
1166 "<logical>\t%0.b, %1/z, %2.b, %3.b"
1167 )
1168
1169 ;; Perform a logical operation on operands 2 and 3, using operand 1 as
1170 ;; the GP (which is known to be a PTRUE). Store the result in operand 0
1171 ;; and set the flags in the same way as for PTEST. The (and ...) in the
1172 ;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested
1173 ;; value is structurally equivalent to rhs of the second set.
1174 (define_insn "*<optab><mode>3_cc"
1175 [(set (reg:CC CC_REGNUM)
1176 (compare:CC
1177 (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upa")
1178 (and:PRED_ALL
1179 (LOGICAL:PRED_ALL
1180 (match_operand:PRED_ALL 2 "register_operand" "Upa")
1181 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1182 (match_dup 1))]
1183 UNSPEC_PTEST_PTRUE)
1184 (const_int 0)))
1185 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1186 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
1187 (match_dup 1)))]
1188 "TARGET_SVE"
1189 "<logical>s\t%0.b, %1/z, %2.b, %3.b"
1190 )
1191
1192 ;; Unpredicated predicate inverse.
1193 (define_expand "one_cmpl<mode>2"
1194 [(set (match_operand:PRED_ALL 0 "register_operand")
1195 (and:PRED_ALL
1196 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
1197 (match_dup 2)))]
1198 "TARGET_SVE"
1199 {
1200 operands[2] = aarch64_ptrue_reg (<MODE>mode);
1201 }
1202 )
1203
1204 ;; Predicated predicate inverse.
1205 (define_insn "*one_cmpl<mode>3"
1206 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1207 (and:PRED_ALL
1208 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1209 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1210 "TARGET_SVE"
1211 "not\t%0.b, %1/z, %2.b"
1212 )
1213
1214 ;; Predicated predicate BIC and ORN.
1215 (define_insn "*<nlogical><mode>3"
1216 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1217 (and:PRED_ALL
1218 (NLOGICAL:PRED_ALL
1219 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1220 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1221 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1222 "TARGET_SVE"
1223 "<nlogical>\t%0.b, %1/z, %3.b, %2.b"
1224 )
1225
1226 ;; Predicated predicate NAND and NOR.
1227 (define_insn "*<logical_nn><mode>3"
1228 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1229 (and:PRED_ALL
1230 (NLOGICAL:PRED_ALL
1231 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1232 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
1233 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1234 "TARGET_SVE"
1235 "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
1236 )
1237
1238 ;; Unpredicated LSL, LSR and ASR by a vector.
1239 (define_expand "v<optab><mode>3"
1240 [(set (match_operand:SVE_I 0 "register_operand")
1241 (unspec:SVE_I
1242 [(match_dup 3)
1243 (ASHIFT:SVE_I
1244 (match_operand:SVE_I 1 "register_operand")
1245 (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
1246 UNSPEC_MERGE_PTRUE))]
1247 "TARGET_SVE"
1248 {
1249 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
1250 }
1251 )
1252
1253 ;; LSL, LSR and ASR by a vector, predicated with a PTRUE. We don't
1254 ;; actually need the predicate for the first alternative, but using Upa
1255 ;; or X isn't likely to gain much and would make the instruction seem
1256 ;; less uniform to the register allocator.
1257 (define_insn_and_split "*v<optab><mode>3"
1258 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
1259 (unspec:SVE_I
1260 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
1261 (ASHIFT:SVE_I
1262 (match_operand:SVE_I 2 "register_operand" "w, 0, w")
1263 (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, w"))]
1264 UNSPEC_MERGE_PTRUE))]
1265 "TARGET_SVE"
1266 "@
1267 #
1268 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1269 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1270 "&& reload_completed
1271 && !register_operand (operands[3], <MODE>mode)"
1272 [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
1273 ""
1274 [(set_attr "movprfx" "*,*,yes")]
1275 )
1276
1277 ;; Unpredicated shift operations by a constant (post-RA only).
1278 ;; These are generated by splitting a predicated instruction whose
1279 ;; predicate is unused.
1280 (define_insn "*post_ra_v<optab><mode>3"
1281 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1282 (ASHIFT:SVE_I
1283 (match_operand:SVE_I 1 "register_operand" "w")
1284 (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))]
1285 "TARGET_SVE && reload_completed"
1286 "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
1287 )
1288
1289 ;; LSL, LSR and ASR by a scalar, which expands into one of the vector
1290 ;; shifts above.
1291 (define_expand "<ASHIFT:optab><mode>3"
1292 [(set (match_operand:SVE_I 0 "register_operand")
1293 (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand")
1294 (match_operand:<VEL> 2 "general_operand")))]
1295 "TARGET_SVE"
1296 {
1297 rtx amount;
1298 if (CONST_INT_P (operands[2]))
1299 {
1300 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
1301 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
1302 amount = force_reg (<MODE>mode, amount);
1303 }
1304 else
1305 {
1306 amount = gen_reg_rtx (<MODE>mode);
1307 emit_insn (gen_vec_duplicate<mode> (amount,
1308 convert_to_mode (<VEL>mode,
1309 operands[2], 0)));
1310 }
1311 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
1312 DONE;
1313 }
1314 )
1315
1316 ;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE.
1317 ;;
1318 ;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP
1319 ;; is a PTRUE even if the optimizers haven't yet been able to propagate
1320 ;; the constant. We would use a separate unspec code for PTESTs involving
1321 ;; GPs that might not be PTRUEs.
1322 (define_insn "ptest_ptrue<mode>"
1323 [(set (reg:CC CC_REGNUM)
1324 (compare:CC
1325 (unspec:SI [(match_operand:PRED_ALL 0 "register_operand" "Upa")
1326 (match_operand:PRED_ALL 1 "register_operand" "Upa")]
1327 UNSPEC_PTEST_PTRUE)
1328 (const_int 0)))]
1329 "TARGET_SVE"
1330 "ptest\t%0, %1.b"
1331 )
1332
1333 ;; Set element I of the result if operand1 + J < operand2 for all J in [0, I].
1334 ;; with the comparison being unsigned.
1335 (define_insn "while_ult<GPI:mode><PRED_ALL:mode>"
1336 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1337 (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
1338 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
1339 UNSPEC_WHILE_LO))
1340 (clobber (reg:CC CC_REGNUM))]
1341 "TARGET_SVE"
1342 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
1343 )
1344
1345 ;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP.
1346 ;; Handle the case in which both results are useful. The GP operand
1347 ;; to the PTEST isn't needed, so we allow it to be anything.
1348 (define_insn_and_rewrite "*while_ult<GPI:mode><PRED_ALL:mode>_cc"
1349 [(set (reg:CC CC_REGNUM)
1350 (compare:CC
1351 (unspec:SI [(match_operand:PRED_ALL 1)
1352 (unspec:PRED_ALL
1353 [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")
1354 (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")]
1355 UNSPEC_WHILE_LO)]
1356 UNSPEC_PTEST_PTRUE)
1357 (const_int 0)))
1358 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1359 (unspec:PRED_ALL [(match_dup 2)
1360 (match_dup 3)]
1361 UNSPEC_WHILE_LO))]
1362 "TARGET_SVE"
1363 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3"
1364 ;; Force the compiler to drop the unused predicate operand, so that we
1365 ;; don't have an unnecessary PTRUE.
1366 "&& !CONSTANT_P (operands[1])"
1367 {
1368 operands[1] = CONSTM1_RTX (<MODE>mode);
1369 }
1370 )
1371
1372 ;; Integer comparisons predicated with a PTRUE.
1373 (define_insn "*cmp<cmp_op><mode>"
1374 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1375 (unspec:<VPRED>
1376 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1377 (SVE_INT_CMP:<VPRED>
1378 (match_operand:SVE_I 2 "register_operand" "w, w")
1379 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1380 UNSPEC_MERGE_PTRUE))
1381 (clobber (reg:CC CC_REGNUM))]
1382 "TARGET_SVE"
1383 "@
1384 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1385 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1386 )
1387
1388 ;; Integer comparisons predicated with a PTRUE in which only the flags result
1389 ;; is interesting.
1390 (define_insn "*cmp<cmp_op><mode>_ptest"
1391 [(set (reg:CC CC_REGNUM)
1392 (compare:CC
1393 (unspec:SI
1394 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1395 (unspec:<VPRED>
1396 [(match_dup 1)
1397 (SVE_INT_CMP:<VPRED>
1398 (match_operand:SVE_I 2 "register_operand" "w, w")
1399 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1400 UNSPEC_MERGE_PTRUE)]
1401 UNSPEC_PTEST_PTRUE)
1402 (const_int 0)))
1403 (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
1404 "TARGET_SVE"
1405 "@
1406 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1407 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1408 )
1409
1410 ;; Integer comparisons predicated with a PTRUE in which both the flag and
1411 ;; predicate results are interesting.
1412 (define_insn "*cmp<cmp_op><mode>_cc"
1413 [(set (reg:CC CC_REGNUM)
1414 (compare:CC
1415 (unspec:SI
1416 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1417 (unspec:<VPRED>
1418 [(match_dup 1)
1419 (SVE_INT_CMP:<VPRED>
1420 (match_operand:SVE_I 2 "register_operand" "w, w")
1421 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1422 UNSPEC_MERGE_PTRUE)]
1423 UNSPEC_PTEST_PTRUE)
1424 (const_int 0)))
1425 (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1426 (unspec:<VPRED>
1427 [(match_dup 1)
1428 (SVE_INT_CMP:<VPRED>
1429 (match_dup 2)
1430 (match_dup 3))]
1431 UNSPEC_MERGE_PTRUE))]
1432 "TARGET_SVE"
1433 "@
1434 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1435 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1436 )
1437
1438 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated
1439 ;; comparison with an AND. Split the instruction into its preferred form
1440 ;; (below) at the earliest opportunity, in order to get rid of the
1441 ;; redundant operand 1.
1442 (define_insn_and_split "*pred_cmp<cmp_op><mode>_combine"
1443 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1444 (and:<VPRED>
1445 (unspec:<VPRED>
1446 [(match_operand:<VPRED> 1)
1447 (SVE_INT_CMP:<VPRED>
1448 (match_operand:SVE_I 2 "register_operand" "w, w")
1449 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1450 UNSPEC_MERGE_PTRUE)
1451 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))
1452 (clobber (reg:CC CC_REGNUM))]
1453 "TARGET_SVE"
1454 "#"
1455 "&& 1"
1456 [(parallel
1457 [(set (match_dup 0)
1458 (and:<VPRED>
1459 (SVE_INT_CMP:<VPRED>
1460 (match_dup 2)
1461 (match_dup 3))
1462 (match_dup 4)))
1463 (clobber (reg:CC CC_REGNUM))])]
1464 )
1465
1466 ;; Predicated integer comparisons.
1467 (define_insn "*pred_cmp<cmp_op><mode>"
1468 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1469 (and:<VPRED>
1470 (SVE_INT_CMP:<VPRED>
1471 (match_operand:SVE_I 2 "register_operand" "w, w")
1472 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))
1473 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
1474 (clobber (reg:CC CC_REGNUM))]
1475 "TARGET_SVE"
1476 "@
1477 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1478 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1479 )
1480
1481 ;; Floating-point comparisons predicated with a PTRUE.
1482 (define_insn "*fcm<cmp_op><mode>"
1483 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1484 (unspec:<VPRED>
1485 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1486 (SVE_FP_CMP:<VPRED>
1487 (match_operand:SVE_F 2 "register_operand" "w, w")
1488 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
1489 UNSPEC_MERGE_PTRUE))]
1490 "TARGET_SVE"
1491 "@
1492 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
1493 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1494 )
1495
1496 (define_insn "*fcmuo<mode>"
1497 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
1498 (unspec:<VPRED>
1499 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1500 (unordered:<VPRED>
1501 (match_operand:SVE_F 2 "register_operand" "w")
1502 (match_operand:SVE_F 3 "register_operand" "w"))]
1503 UNSPEC_MERGE_PTRUE))]
1504 "TARGET_SVE"
1505 "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1506 )
1507
1508 ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
1509 ;; with another predicate P. This does not have the same trapping behavior
1510 ;; as predicating the comparison itself on P, but it's a legitimate fold,
1511 ;; since we can drop any potentially-trapping operations whose results
1512 ;; are not needed.
1513 ;;
1514 ;; Split the instruction into its preferred form (below) at the earliest
1515 ;; opportunity, in order to get rid of the redundant operand 1.
1516 (define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
1517 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1518 (and:<VPRED>
1519 (unspec:<VPRED>
1520 [(match_operand:<VPRED> 1)
1521 (SVE_FP_CMP
1522 (match_operand:SVE_F 2 "register_operand" "w, w")
1523 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
1524 UNSPEC_MERGE_PTRUE)
1525 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
1526 "TARGET_SVE"
1527 "#"
1528 "&& 1"
1529 [(set (match_dup 0)
1530 (and:<VPRED>
1531 (SVE_FP_CMP:<VPRED>
1532 (match_dup 2)
1533 (match_dup 3))
1534 (match_dup 4)))]
1535 )
1536
1537 (define_insn_and_split "*fcmuo<mode>_and_combine"
1538 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
1539 (and:<VPRED>
1540 (unspec:<VPRED>
1541 [(match_operand:<VPRED> 1)
1542 (unordered
1543 (match_operand:SVE_F 2 "register_operand" "w")
1544 (match_operand:SVE_F 3 "register_operand" "w"))]
1545 UNSPEC_MERGE_PTRUE)
1546 (match_operand:<VPRED> 4 "register_operand" "Upl")))]
1547 "TARGET_SVE"
1548 "#"
1549 "&& 1"
1550 [(set (match_dup 0)
1551 (and:<VPRED>
1552 (unordered:<VPRED>
1553 (match_dup 2)
1554 (match_dup 3))
1555 (match_dup 4)))]
1556 )
1557
1558 ;; Unpredicated floating-point comparisons, with the results ANDed
1559 ;; with another predicate. This is a valid fold for the same reasons
1560 ;; as above.
1561 (define_insn "*fcm<cmp_op><mode>_and"
1562 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1563 (and:<VPRED>
1564 (SVE_FP_CMP:<VPRED>
1565 (match_operand:SVE_F 2 "register_operand" "w, w")
1566 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))
1567 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))]
1568 "TARGET_SVE"
1569 "@
1570 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
1571 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1572 )
1573
1574 (define_insn "*fcmuo<mode>_and"
1575 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
1576 (and:<VPRED>
1577 (unordered:<VPRED>
1578 (match_operand:SVE_F 2 "register_operand" "w")
1579 (match_operand:SVE_F 3 "register_operand" "w"))
1580 (match_operand:<VPRED> 1 "register_operand" "Upl")))]
1581 "TARGET_SVE"
1582 "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1583 )
1584
1585 ;; Predicated floating-point comparisons. We don't need a version
1586 ;; of this for unordered comparisons.
1587 (define_insn "*pred_fcm<cmp_op><mode>"
1588 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1589 (unspec:<VPRED>
1590 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1591 (match_operand:SVE_F 2 "register_operand" "w, w")
1592 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
1593 SVE_COND_FP_CMP))]
1594 "TARGET_SVE"
1595 "@
1596 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
1597 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1598 )
1599
1600 ;; vcond_mask operand order: true, false, mask
1601 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
1602 ;; SEL operand order: mask, true, false
1603 (define_insn "vcond_mask_<mode><vpred>"
1604 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1605 (unspec:SVE_ALL
1606 [(match_operand:<VPRED> 3 "register_operand" "Upa")
1607 (match_operand:SVE_ALL 1 "register_operand" "w")
1608 (match_operand:SVE_ALL 2 "register_operand" "w")]
1609 UNSPEC_SEL))]
1610 "TARGET_SVE"
1611 "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>"
1612 )
1613
1614 ;; Selects between a duplicated immediate and zero.
1615 (define_insn "aarch64_sve_dup<mode>_const"
1616 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1617 (unspec:SVE_I
1618 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1619 (match_operand:SVE_I 2 "aarch64_sve_dup_immediate")
1620 (match_operand:SVE_I 3 "aarch64_simd_imm_zero")]
1621 UNSPEC_SEL))]
1622 "TARGET_SVE"
1623 "mov\t%0.<Vetype>, %1/z, #%2"
1624 )
1625
1626 ;; Integer (signed) vcond. Don't enforce an immediate range here, since it
1627 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
1628 (define_expand "vcond<mode><v_int_equiv>"
1629 [(set (match_operand:SVE_ALL 0 "register_operand")
1630 (if_then_else:SVE_ALL
1631 (match_operator 3 "comparison_operator"
1632 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
1633 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
1634 (match_operand:SVE_ALL 1 "register_operand")
1635 (match_operand:SVE_ALL 2 "register_operand")))]
1636 "TARGET_SVE"
1637 {
1638 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
1639 DONE;
1640 }
1641 )
1642
1643 ;; Integer vcondu. Don't enforce an immediate range here, since it
1644 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
1645 (define_expand "vcondu<mode><v_int_equiv>"
1646 [(set (match_operand:SVE_ALL 0 "register_operand")
1647 (if_then_else:SVE_ALL
1648 (match_operator 3 "comparison_operator"
1649 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
1650 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
1651 (match_operand:SVE_ALL 1 "register_operand")
1652 (match_operand:SVE_ALL 2 "register_operand")))]
1653 "TARGET_SVE"
1654 {
1655 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
1656 DONE;
1657 }
1658 )
1659
1660 ;; Floating-point vcond. All comparisons except FCMUO allow a zero
1661 ;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO
1662 ;; with zero.
1663 (define_expand "vcond<mode><v_fp_equiv>"
1664 [(set (match_operand:SVE_SD 0 "register_operand")
1665 (if_then_else:SVE_SD
1666 (match_operator 3 "comparison_operator"
1667 [(match_operand:<V_FP_EQUIV> 4 "register_operand")
1668 (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
1669 (match_operand:SVE_SD 1 "register_operand")
1670 (match_operand:SVE_SD 2 "register_operand")))]
1671 "TARGET_SVE"
1672 {
1673 aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
1674 DONE;
1675 }
1676 )
1677
1678 ;; Signed integer comparisons. Don't enforce an immediate range here, since
1679 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
1680 ;; instead.
1681 (define_expand "vec_cmp<mode><vpred>"
1682 [(parallel
1683 [(set (match_operand:<VPRED> 0 "register_operand")
1684 (match_operator:<VPRED> 1 "comparison_operator"
1685 [(match_operand:SVE_I 2 "register_operand")
1686 (match_operand:SVE_I 3 "nonmemory_operand")]))
1687 (clobber (reg:CC CC_REGNUM))])]
1688 "TARGET_SVE"
1689 {
1690 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
1691 operands[2], operands[3]);
1692 DONE;
1693 }
1694 )
1695
1696 ;; Unsigned integer comparisons. Don't enforce an immediate range here, since
1697 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
1698 ;; instead.
1699 (define_expand "vec_cmpu<mode><vpred>"
1700 [(parallel
1701 [(set (match_operand:<VPRED> 0 "register_operand")
1702 (match_operator:<VPRED> 1 "comparison_operator"
1703 [(match_operand:SVE_I 2 "register_operand")
1704 (match_operand:SVE_I 3 "nonmemory_operand")]))
1705 (clobber (reg:CC CC_REGNUM))])]
1706 "TARGET_SVE"
1707 {
1708 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
1709 operands[2], operands[3]);
1710 DONE;
1711 }
1712 )
1713
1714 ;; Floating-point comparisons. All comparisons except FCMUO allow a zero
1715 ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
1716 ;; with zero.
1717 (define_expand "vec_cmp<mode><vpred>"
1718 [(set (match_operand:<VPRED> 0 "register_operand")
1719 (match_operator:<VPRED> 1 "comparison_operator"
1720 [(match_operand:SVE_F 2 "register_operand")
1721 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
1722 "TARGET_SVE"
1723 {
1724 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
1725 operands[2], operands[3], false);
1726 DONE;
1727 }
1728 )
1729
1730 ;; Branch based on predicate equality or inequality.
1731 (define_expand "cbranch<mode>4"
1732 [(set (pc)
1733 (if_then_else
1734 (match_operator 0 "aarch64_equality_operator"
1735 [(match_operand:PRED_ALL 1 "register_operand")
1736 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
1737 (label_ref (match_operand 3 ""))
1738 (pc)))]
1739 ""
1740 {
1741 rtx ptrue = aarch64_ptrue_reg (<MODE>mode);
1742 rtx pred;
1743 if (operands[2] == CONST0_RTX (<MODE>mode))
1744 pred = operands[1];
1745 else
1746 {
1747 pred = gen_reg_rtx (<MODE>mode);
1748 emit_insn (gen_pred_xor<mode>3 (pred, ptrue, operands[1],
1749 operands[2]));
1750 }
1751 emit_insn (gen_ptest_ptrue<mode> (ptrue, pred));
1752 operands[1] = gen_rtx_REG (CCmode, CC_REGNUM);
1753 operands[2] = const0_rtx;
1754 }
1755 )
1756
1757 ;; Unpredicated integer MIN/MAX.
1758 (define_expand "<su><maxmin><mode>3"
1759 [(set (match_operand:SVE_I 0 "register_operand")
1760 (unspec:SVE_I
1761 [(match_dup 3)
1762 (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand")
1763 (match_operand:SVE_I 2 "register_operand"))]
1764 UNSPEC_MERGE_PTRUE))]
1765 "TARGET_SVE"
1766 {
1767 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
1768 }
1769 )
1770
1771 ;; Integer MIN/MAX predicated with a PTRUE.
1772 (define_insn "*<su><maxmin><mode>3"
1773 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1774 (unspec:SVE_I
1775 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1776 (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
1777 (match_operand:SVE_I 3 "register_operand" "w, w"))]
1778 UNSPEC_MERGE_PTRUE))]
1779 "TARGET_SVE"
1780 "@
1781 <su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1782 movprfx\t%0, %2\;<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1783 [(set_attr "movprfx" "*,yes")]
1784 )
1785
1786 ;; Unpredicated floating-point MIN/MAX.
1787 (define_expand "<su><maxmin><mode>3"
1788 [(set (match_operand:SVE_F 0 "register_operand")
1789 (unspec:SVE_F
1790 [(match_dup 3)
1791 (FMAXMIN:SVE_F (match_operand:SVE_F 1 "register_operand")
1792 (match_operand:SVE_F 2 "register_operand"))]
1793 UNSPEC_MERGE_PTRUE))]
1794 "TARGET_SVE"
1795 {
1796 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
1797 }
1798 )
1799
1800 ;; Floating-point MIN/MAX predicated with a PTRUE.
1801 (define_insn "*<su><maxmin><mode>3"
1802 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
1803 (unspec:SVE_F
1804 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1805 (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0, w")
1806 (match_operand:SVE_F 3 "register_operand" "w, w"))]
1807 UNSPEC_MERGE_PTRUE))]
1808 "TARGET_SVE"
1809 "@
1810 f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1811 movprfx\t%0, %2\;f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1812 [(set_attr "movprfx" "*,yes")]
1813 )
1814
1815 ;; Unpredicated fmin/fmax.
1816 (define_expand "<maxmin_uns><mode>3"
1817 [(set (match_operand:SVE_F 0 "register_operand")
1818 (unspec:SVE_F
1819 [(match_dup 3)
1820 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")
1821 (match_operand:SVE_F 2 "register_operand")]
1822 FMAXMIN_UNS)]
1823 UNSPEC_MERGE_PTRUE))]
1824 "TARGET_SVE"
1825 {
1826 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
1827 }
1828 )
1829
1830 ;; fmin/fmax predicated with a PTRUE.
1831 (define_insn "*<maxmin_uns><mode>3"
1832 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
1833 (unspec:SVE_F
1834 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1835 (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0, w")
1836 (match_operand:SVE_F 3 "register_operand" "w, w")]
1837 FMAXMIN_UNS)]
1838 UNSPEC_MERGE_PTRUE))]
1839 "TARGET_SVE"
1840 "@
1841 <maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1842 movprfx\t%0, %2\;<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1843 [(set_attr "movprfx" "*,yes")]
1844 )
1845
1846 ;; Predicated integer operations with select.
1847 (define_expand "cond_<optab><mode>"
1848 [(set (match_operand:SVE_I 0 "register_operand")
1849 (unspec:SVE_I
1850 [(match_operand:<VPRED> 1 "register_operand")
1851 (SVE_INT_BINARY:SVE_I
1852 (match_operand:SVE_I 2 "register_operand")
1853 (match_operand:SVE_I 3 "register_operand"))
1854 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
1855 UNSPEC_SEL))]
1856 "TARGET_SVE"
1857 )
1858
1859 (define_expand "cond_<optab><mode>"
1860 [(set (match_operand:SVE_SDI 0 "register_operand")
1861 (unspec:SVE_SDI
1862 [(match_operand:<VPRED> 1 "register_operand")
1863 (SVE_INT_BINARY_SD:SVE_SDI
1864 (match_operand:SVE_SDI 2 "register_operand")
1865 (match_operand:SVE_SDI 3 "register_operand"))
1866 (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
1867 UNSPEC_SEL))]
1868 "TARGET_SVE"
1869 )
1870
1871 ;; Predicated integer operations with select matching the first operand.
1872 (define_insn "*cond_<optab><mode>_2"
1873 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1874 (unspec:SVE_I
1875 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1876 (SVE_INT_BINARY:SVE_I
1877 (match_operand:SVE_I 2 "register_operand" "0, w")
1878 (match_operand:SVE_I 3 "register_operand" "w, w"))
1879 (match_dup 2)]
1880 UNSPEC_SEL))]
1881 "TARGET_SVE"
1882 "@
1883 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1884 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1885 [(set_attr "movprfx" "*,yes")]
1886 )
1887
1888 (define_insn "*cond_<optab><mode>_2"
1889 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
1890 (unspec:SVE_SDI
1891 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1892 (SVE_INT_BINARY_SD:SVE_SDI
1893 (match_operand:SVE_SDI 2 "register_operand" "0, w")
1894 (match_operand:SVE_SDI 3 "register_operand" "w, w"))
1895 (match_dup 2)]
1896 UNSPEC_SEL))]
1897 "TARGET_SVE"
1898 "@
1899 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1900 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1901 [(set_attr "movprfx" "*,yes")]
1902 )
1903
1904 ;; Predicated integer operations with select matching the second operand.
1905 (define_insn "*cond_<optab><mode>_3"
1906 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1907 (unspec:SVE_I
1908 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1909 (SVE_INT_BINARY:SVE_I
1910 (match_operand:SVE_I 2 "register_operand" "w, w")
1911 (match_operand:SVE_I 3 "register_operand" "0, w"))
1912 (match_dup 3)]
1913 UNSPEC_SEL))]
1914 "TARGET_SVE"
1915 "@
1916 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1917 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
1918 [(set_attr "movprfx" "*,yes")]
1919 )
1920
1921 (define_insn "*cond_<optab><mode>_3"
1922 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
1923 (unspec:SVE_SDI
1924 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1925 (SVE_INT_BINARY_SD:SVE_SDI
1926 (match_operand:SVE_SDI 2 "register_operand" "w, w")
1927 (match_operand:SVE_SDI 3 "register_operand" "0, w"))
1928 (match_dup 3)]
1929 UNSPEC_SEL))]
1930 "TARGET_SVE"
1931 "@
1932 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1933 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
1934 [(set_attr "movprfx" "*,yes")]
1935 )
1936
1937 ;; Predicated integer binary operations in which the values of inactive
1938 ;; lanes are distinct from the other inputs.
1939 (define_insn_and_rewrite "*cond_<optab><mode>_any"
1940 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
1941 (unspec:SVE_I
1942 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
1943 (SVE_INT_BINARY:SVE_I
1944 (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
1945 (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))
1946 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
1947 UNSPEC_SEL))]
1948 "TARGET_SVE
1949 && !rtx_equal_p (operands[2], operands[4])
1950 && !rtx_equal_p (operands[3], operands[4])"
1951 "@
1952 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1953 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1954 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1955 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1956 #"
1957 "&& reload_completed
1958 && register_operand (operands[4], <MODE>mode)
1959 && !rtx_equal_p (operands[0], operands[4])"
1960 {
1961 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
1962 operands[4], operands[1]));
1963 operands[4] = operands[2] = operands[0];
1964 }
1965 [(set_attr "movprfx" "yes")]
1966 )
1967
1968 (define_insn_and_rewrite "*cond_<optab><mode>_any"
1969 [(set (match_operand:SVE_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
1970 (unspec:SVE_SDI
1971 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
1972 (SVE_INT_BINARY_SD:SVE_SDI
1973 (match_operand:SVE_SDI 2 "register_operand" "0, w, w, w, w")
1974 (match_operand:SVE_SDI 3 "register_operand" "w, 0, w, w, w"))
1975 (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
1976 UNSPEC_SEL))]
1977 "TARGET_SVE
1978 && !rtx_equal_p (operands[2], operands[4])
1979 && !rtx_equal_p (operands[3], operands[4])"
1980 "@
1981 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1982 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1983 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1984 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1985 #"
1986 "&& reload_completed
1987 && register_operand (operands[4], <MODE>mode)
1988 && !rtx_equal_p (operands[0], operands[4])"
1989 {
1990 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
1991 operands[4], operands[1]));
1992 operands[4] = operands[2] = operands[0];
1993 }
1994 [(set_attr "movprfx" "yes")]
1995 )
1996
1997 ;; Set operand 0 to the last active element in operand 3, or to tied
1998 ;; operand 1 if no elements are active.
1999 (define_insn "fold_extract_last_<mode>"
2000 [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
2001 (unspec:<VEL>
2002 [(match_operand:<VEL> 1 "register_operand" "0, 0")
2003 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
2004 (match_operand:SVE_ALL 3 "register_operand" "w, w")]
2005 UNSPEC_CLASTB))]
2006 "TARGET_SVE"
2007 "@
2008 clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
2009 clastb\t%<vw>0, %2, %<vw>0, %3.<Vetype>"
2010 )
2011
2012 ;; Unpredicated integer add reduction.
2013 (define_expand "reduc_plus_scal_<mode>"
2014 [(set (match_operand:<VEL> 0 "register_operand")
2015 (unspec:<VEL> [(match_dup 2)
2016 (match_operand:SVE_I 1 "register_operand")]
2017 UNSPEC_ADDV))]
2018 "TARGET_SVE"
2019 {
2020 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2021 }
2022 )
2023
2024 ;; Predicated integer add reduction. The result is always 64-bits.
2025 (define_insn "*reduc_plus_scal_<mode>"
2026 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2027 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2028 (match_operand:SVE_I 2 "register_operand" "w")]
2029 UNSPEC_ADDV))]
2030 "TARGET_SVE"
2031 "uaddv\t%d0, %1, %2.<Vetype>"
2032 )
2033
2034 ;; Unpredicated floating-point add reduction.
2035 (define_expand "reduc_plus_scal_<mode>"
2036 [(set (match_operand:<VEL> 0 "register_operand")
2037 (unspec:<VEL> [(match_dup 2)
2038 (match_operand:SVE_F 1 "register_operand")]
2039 UNSPEC_FADDV))]
2040 "TARGET_SVE"
2041 {
2042 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2043 }
2044 )
2045
2046 ;; Predicated floating-point add reduction.
2047 (define_insn "*reduc_plus_scal_<mode>"
2048 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2049 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2050 (match_operand:SVE_F 2 "register_operand" "w")]
2051 UNSPEC_FADDV))]
2052 "TARGET_SVE"
2053 "faddv\t%<Vetype>0, %1, %2.<Vetype>"
2054 )
2055
2056 ;; Unpredicated integer MIN/MAX reduction.
2057 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2058 [(set (match_operand:<VEL> 0 "register_operand")
2059 (unspec:<VEL> [(match_dup 2)
2060 (match_operand:SVE_I 1 "register_operand")]
2061 MAXMINV))]
2062 "TARGET_SVE"
2063 {
2064 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2065 }
2066 )
2067
2068 ;; Predicated integer MIN/MAX reduction.
2069 (define_insn "*reduc_<maxmin_uns>_scal_<mode>"
2070 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2071 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2072 (match_operand:SVE_I 2 "register_operand" "w")]
2073 MAXMINV))]
2074 "TARGET_SVE"
2075 "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
2076 )
2077
2078 ;; Unpredicated floating-point MIN/MAX reduction.
2079 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2080 [(set (match_operand:<VEL> 0 "register_operand")
2081 (unspec:<VEL> [(match_dup 2)
2082 (match_operand:SVE_F 1 "register_operand")]
2083 FMAXMINV))]
2084 "TARGET_SVE"
2085 {
2086 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2087 }
2088 )
2089
2090 ;; Predicated floating-point MIN/MAX reduction.
2091 (define_insn "*reduc_<maxmin_uns>_scal_<mode>"
2092 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2093 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2094 (match_operand:SVE_F 2 "register_operand" "w")]
2095 FMAXMINV))]
2096 "TARGET_SVE"
2097 "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
2098 )
2099
2100 (define_expand "reduc_<optab>_scal_<mode>"
2101 [(set (match_operand:<VEL> 0 "register_operand")
2102 (unspec:<VEL> [(match_dup 2)
2103 (match_operand:SVE_I 1 "register_operand")]
2104 BITWISEV))]
2105 "TARGET_SVE"
2106 {
2107 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2108 }
2109 )
2110
2111 (define_insn "*reduc_<optab>_scal_<mode>"
2112 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2113 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2114 (match_operand:SVE_I 2 "register_operand" "w")]
2115 BITWISEV))]
2116 "TARGET_SVE"
2117 "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>"
2118 )
2119
2120 ;; Unpredicated in-order FP reductions.
2121 (define_expand "fold_left_plus_<mode>"
2122 [(set (match_operand:<VEL> 0 "register_operand")
2123 (unspec:<VEL> [(match_dup 3)
2124 (match_operand:<VEL> 1 "register_operand")
2125 (match_operand:SVE_F 2 "register_operand")]
2126 UNSPEC_FADDA))]
2127 "TARGET_SVE"
2128 {
2129 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
2130 }
2131 )
2132
2133 ;; In-order FP reductions predicated with PTRUE.
2134 (define_insn "mask_fold_left_plus_<mode>"
2135 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2136 (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
2137 (match_operand:<VEL> 1 "register_operand" "0")
2138 (match_operand:SVE_F 2 "register_operand" "w")]
2139 UNSPEC_FADDA))]
2140 "TARGET_SVE"
2141 "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
2142 )
2143
2144 ;; Predicated form of the above in-order reduction.
2145 (define_insn "*pred_fold_left_plus_<mode>"
2146 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2147 (unspec:<VEL>
2148 [(match_operand:<VEL> 1 "register_operand" "0")
2149 (unspec:SVE_F
2150 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2151 (match_operand:SVE_F 3 "register_operand" "w")
2152 (match_operand:SVE_F 4 "aarch64_simd_imm_zero")]
2153 UNSPEC_SEL)]
2154 UNSPEC_FADDA))]
2155 "TARGET_SVE"
2156 "fadda\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
2157 )
2158
2159 ;; Unpredicated floating-point addition.
2160 (define_expand "add<mode>3"
2161 [(set (match_operand:SVE_F 0 "register_operand")
2162 (unspec:SVE_F
2163 [(match_dup 3)
2164 (plus:SVE_F
2165 (match_operand:SVE_F 1 "register_operand")
2166 (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))]
2167 UNSPEC_MERGE_PTRUE))]
2168 "TARGET_SVE"
2169 {
2170 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
2171 }
2172 )
2173
2174 ;; Floating-point addition predicated with a PTRUE.
2175 (define_insn_and_split "*add<mode>3"
2176 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
2177 (unspec:SVE_F
2178 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2179 (plus:SVE_F
2180 (match_operand:SVE_F 2 "register_operand" "%0, 0, w")
2181 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))]
2182 UNSPEC_MERGE_PTRUE))]
2183 "TARGET_SVE"
2184 "@
2185 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2186 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
2187 #"
2188 ; Split the unpredicated form after reload, so that we don't have
2189 ; the unnecessary PTRUE.
2190 "&& reload_completed
2191 && register_operand (operands[3], <MODE>mode)"
2192 [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))]
2193 )
2194
2195 ;; Unpredicated floating-point subtraction.
2196 (define_expand "sub<mode>3"
2197 [(set (match_operand:SVE_F 0 "register_operand")
2198 (unspec:SVE_F
2199 [(match_dup 3)
2200 (minus:SVE_F
2201 (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand")
2202 (match_operand:SVE_F 2 "register_operand"))]
2203 UNSPEC_MERGE_PTRUE))]
2204 "TARGET_SVE"
2205 {
2206 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
2207 }
2208 )
2209
2210 ;; Floating-point subtraction predicated with a PTRUE.
2211 (define_insn_and_split "*sub<mode>3"
2212 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
2213 (unspec:SVE_F
2214 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
2215 (minus:SVE_F
2216 (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w")
2217 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))]
2218 UNSPEC_MERGE_PTRUE))]
2219 "TARGET_SVE
2220 && (register_operand (operands[2], <MODE>mode)
2221 || register_operand (operands[3], <MODE>mode))"
2222 "@
2223 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2224 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
2225 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
2226 #"
2227 ; Split the unpredicated form after reload, so that we don't have
2228 ; the unnecessary PTRUE.
2229 "&& reload_completed
2230 && register_operand (operands[2], <MODE>mode)
2231 && register_operand (operands[3], <MODE>mode)"
2232 [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))]
2233 )
2234
2235 ;; Unpredicated floating-point multiplication.
2236 (define_expand "mul<mode>3"
2237 [(set (match_operand:SVE_F 0 "register_operand")
2238 (unspec:SVE_F
2239 [(match_dup 3)
2240 (mult:SVE_F
2241 (match_operand:SVE_F 1 "register_operand")
2242 (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))]
2243 UNSPEC_MERGE_PTRUE))]
2244 "TARGET_SVE"
2245 {
2246 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
2247 }
2248 )
2249
2250 ;; Floating-point multiplication predicated with a PTRUE.
2251 (define_insn_and_split "*mul<mode>3"
2252 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
2253 (unspec:SVE_F
2254 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2255 (mult:SVE_F
2256 (match_operand:SVE_F 2 "register_operand" "%0, w")
2257 (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))]
2258 UNSPEC_MERGE_PTRUE))]
2259 "TARGET_SVE"
2260 "@
2261 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2262 #"
2263 ; Split the unpredicated form after reload, so that we don't have
2264 ; the unnecessary PTRUE.
2265 "&& reload_completed
2266 && register_operand (operands[3], <MODE>mode)"
2267 [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))]
2268 )
2269
2270 ;; Unpredicated floating-point binary operations (post-RA only).
2271 ;; These are generated by splitting a predicated instruction whose
2272 ;; predicate is unused.
2273 (define_insn "*post_ra_<sve_fp_op><mode>3"
2274 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2275 (SVE_UNPRED_FP_BINARY:SVE_F
2276 (match_operand:SVE_F 1 "register_operand" "w")
2277 (match_operand:SVE_F 2 "register_operand" "w")))]
2278 "TARGET_SVE && reload_completed"
2279 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
2280
2281 ;; Unpredicated fma (%0 = (%1 * %2) + %3).
2282 (define_expand "fma<mode>4"
2283 [(set (match_operand:SVE_F 0 "register_operand")
2284 (unspec:SVE_F
2285 [(match_dup 4)
2286 (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
2287 (match_operand:SVE_F 2 "register_operand")
2288 (match_operand:SVE_F 3 "register_operand"))]
2289 UNSPEC_MERGE_PTRUE))]
2290 "TARGET_SVE"
2291 {
2292 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
2293 }
2294 )
2295
2296 ;; fma predicated with a PTRUE.
2297 (define_insn "*fma<mode>4"
2298 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
2299 (unspec:SVE_F
2300 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2301 (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
2302 (match_operand:SVE_F 4 "register_operand" "w, w, w")
2303 (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
2304 UNSPEC_MERGE_PTRUE))]
2305 "TARGET_SVE"
2306 "@
2307 fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
2308 fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2309 movprfx\t%0, %2\;fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2310 [(set_attr "movprfx" "*,*,yes")]
2311 )
2312
2313 ;; Unpredicated fnma (%0 = (-%1 * %2) + %3).
2314 (define_expand "fnma<mode>4"
2315 [(set (match_operand:SVE_F 0 "register_operand")
2316 (unspec:SVE_F
2317 [(match_dup 4)
2318 (fma:SVE_F (neg:SVE_F
2319 (match_operand:SVE_F 1 "register_operand"))
2320 (match_operand:SVE_F 2 "register_operand")
2321 (match_operand:SVE_F 3 "register_operand"))]
2322 UNSPEC_MERGE_PTRUE))]
2323 "TARGET_SVE"
2324 {
2325 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
2326 }
2327 )
2328
2329 ;; fnma predicated with a PTRUE.
2330 (define_insn "*fnma<mode>4"
2331 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
2332 (unspec:SVE_F
2333 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2334 (fma:SVE_F (neg:SVE_F
2335 (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
2336 (match_operand:SVE_F 4 "register_operand" "w, w, w")
2337 (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
2338 UNSPEC_MERGE_PTRUE))]
2339 "TARGET_SVE"
2340 "@
2341 fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
2342 fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2343 movprfx\t%0, %2\;fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2344 [(set_attr "movprfx" "*,*,yes")]
2345 )
2346
2347 ;; Unpredicated fms (%0 = (%1 * %2) - %3).
2348 (define_expand "fms<mode>4"
2349 [(set (match_operand:SVE_F 0 "register_operand")
2350 (unspec:SVE_F
2351 [(match_dup 4)
2352 (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
2353 (match_operand:SVE_F 2 "register_operand")
2354 (neg:SVE_F
2355 (match_operand:SVE_F 3 "register_operand")))]
2356 UNSPEC_MERGE_PTRUE))]
2357 "TARGET_SVE"
2358 {
2359 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
2360 }
2361 )
2362
2363 ;; fms predicated with a PTRUE.
2364 (define_insn "*fms<mode>4"
2365 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
2366 (unspec:SVE_F
2367 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2368 (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
2369 (match_operand:SVE_F 4 "register_operand" "w, w, w")
2370 (neg:SVE_F
2371 (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
2372 UNSPEC_MERGE_PTRUE))]
2373 "TARGET_SVE"
2374 "@
2375 fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
2376 fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2377 movprfx\t%0, %2\;fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2378 [(set_attr "movprfx" "*,*,yes")]
2379 )
2380
2381 ;; Unpredicated fnms (%0 = (-%1 * %2) - %3).
2382 (define_expand "fnms<mode>4"
2383 [(set (match_operand:SVE_F 0 "register_operand")
2384 (unspec:SVE_F
2385 [(match_dup 4)
2386 (fma:SVE_F (neg:SVE_F
2387 (match_operand:SVE_F 1 "register_operand"))
2388 (match_operand:SVE_F 2 "register_operand")
2389 (neg:SVE_F
2390 (match_operand:SVE_F 3 "register_operand")))]
2391 UNSPEC_MERGE_PTRUE))]
2392 "TARGET_SVE"
2393 {
2394 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
2395 }
2396 )
2397
2398 ;; fnms predicated with a PTRUE.
2399 (define_insn "*fnms<mode>4"
2400 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
2401 (unspec:SVE_F
2402 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2403 (fma:SVE_F (neg:SVE_F
2404 (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
2405 (match_operand:SVE_F 4 "register_operand" "w, w, w")
2406 (neg:SVE_F
2407 (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
2408 UNSPEC_MERGE_PTRUE))]
2409 "TARGET_SVE"
2410 "@
2411 fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
2412 fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2413 movprfx\t%0, %2\;fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2414 [(set_attr "movprfx" "*,*,yes")]
2415 )
2416
2417 ;; Unpredicated floating-point division.
2418 (define_expand "div<mode>3"
2419 [(set (match_operand:SVE_F 0 "register_operand")
2420 (unspec:SVE_F
2421 [(match_dup 3)
2422 (div:SVE_F (match_operand:SVE_F 1 "register_operand")
2423 (match_operand:SVE_F 2 "register_operand"))]
2424 UNSPEC_MERGE_PTRUE))]
2425 "TARGET_SVE"
2426 {
2427 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
2428 }
2429 )
2430
2431 ;; Floating-point division predicated with a PTRUE.
2432 (define_insn "*div<mode>3"
2433 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
2434 (unspec:SVE_F
2435 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2436 (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w, w")
2437 (match_operand:SVE_F 3 "register_operand" "w, 0, w"))]
2438 UNSPEC_MERGE_PTRUE))]
2439 "TARGET_SVE"
2440 "@
2441 fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2442 fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2443 movprfx\t%0, %2\;fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2444 [(set_attr "movprfx" "*,*,yes")]
2445 )
2446
2447 ;; Unpredicated FNEG, FABS and FSQRT.
2448 (define_expand "<optab><mode>2"
2449 [(set (match_operand:SVE_F 0 "register_operand")
2450 (unspec:SVE_F
2451 [(match_dup 2)
2452 (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))]
2453 UNSPEC_MERGE_PTRUE))]
2454 "TARGET_SVE"
2455 {
2456 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2457 }
2458 )
2459
2460 ;; FNEG, FABS and FSQRT predicated with a PTRUE.
2461 (define_insn "*<optab><mode>2"
2462 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2463 (unspec:SVE_F
2464 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2465 (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))]
2466 UNSPEC_MERGE_PTRUE))]
2467 "TARGET_SVE"
2468 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2469 )
2470
2471 (define_insn "*fabd<mode>3"
2472 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2473 (unspec:SVE_F
2474 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2475 (abs:SVE_F
2476 (minus:SVE_F
2477 (match_operand:SVE_F 2 "register_operand" "0")
2478 (match_operand:SVE_F 3 "register_operand" "w")))]
2479 UNSPEC_MERGE_PTRUE))]
2480 "TARGET_SVE"
2481 "fabd\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
2482 )
2483
2484 ;; Unpredicated FRINTy.
2485 (define_expand "<frint_pattern><mode>2"
2486 [(set (match_operand:SVE_F 0 "register_operand")
2487 (unspec:SVE_F
2488 [(match_dup 2)
2489 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")]
2490 FRINT)]
2491 UNSPEC_MERGE_PTRUE))]
2492 "TARGET_SVE"
2493 {
2494 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2495 }
2496 )
2497
2498 ;; FRINTy predicated with a PTRUE.
2499 (define_insn "*<frint_pattern><mode>2"
2500 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2501 (unspec:SVE_F
2502 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2503 (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")]
2504 FRINT)]
2505 UNSPEC_MERGE_PTRUE))]
2506 "TARGET_SVE"
2507 "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2508 )
2509
2510 ;; Unpredicated conversion of floats to integers of the same size (HF to HI,
2511 ;; SF to SI or DF to DI).
2512 (define_expand "<fix_trunc_optab><mode><v_int_equiv>2"
2513 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2514 (unspec:<V_INT_EQUIV>
2515 [(match_dup 2)
2516 (FIXUORS:<V_INT_EQUIV>
2517 (match_operand:SVE_F 1 "register_operand"))]
2518 UNSPEC_MERGE_PTRUE))]
2519 "TARGET_SVE"
2520 {
2521 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2522 }
2523 )
2524
2525 ;; Conversion of SF to DI, SI or HI, predicated with a PTRUE.
2526 (define_insn "*<fix_trunc_optab>v16hsf<mode>2"
2527 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
2528 (unspec:SVE_HSDI
2529 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2530 (FIXUORS:SVE_HSDI
2531 (match_operand:VNx8HF 2 "register_operand" "w"))]
2532 UNSPEC_MERGE_PTRUE))]
2533 "TARGET_SVE"
2534 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h"
2535 )
2536
2537 ;; Conversion of SF to DI or SI, predicated with a PTRUE.
2538 (define_insn "*<fix_trunc_optab>vnx4sf<mode>2"
2539 [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
2540 (unspec:SVE_SDI
2541 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2542 (FIXUORS:SVE_SDI
2543 (match_operand:VNx4SF 2 "register_operand" "w"))]
2544 UNSPEC_MERGE_PTRUE))]
2545 "TARGET_SVE"
2546 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s"
2547 )
2548
2549 ;; Conversion of DF to DI or SI, predicated with a PTRUE.
2550 (define_insn "*<fix_trunc_optab>vnx2df<mode>2"
2551 [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
2552 (unspec:SVE_SDI
2553 [(match_operand:VNx2BI 1 "register_operand" "Upl")
2554 (FIXUORS:SVE_SDI
2555 (match_operand:VNx2DF 2 "register_operand" "w"))]
2556 UNSPEC_MERGE_PTRUE))]
2557 "TARGET_SVE"
2558 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d"
2559 )
2560
2561 ;; Unpredicated conversion of integers to floats of the same size
2562 ;; (HI to HF, SI to SF or DI to DF).
2563 (define_expand "<optab><v_int_equiv><mode>2"
2564 [(set (match_operand:SVE_F 0 "register_operand")
2565 (unspec:SVE_F
2566 [(match_dup 2)
2567 (FLOATUORS:SVE_F
2568 (match_operand:<V_INT_EQUIV> 1 "register_operand"))]
2569 UNSPEC_MERGE_PTRUE))]
2570 "TARGET_SVE"
2571 {
2572 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2573 }
2574 )
2575
2576 ;; Conversion of DI, SI or HI to the same number of HFs, predicated
2577 ;; with a PTRUE.
2578 (define_insn "*<optab><mode>vnx8hf2"
2579 [(set (match_operand:VNx8HF 0 "register_operand" "=w")
2580 (unspec:VNx8HF
2581 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2582 (FLOATUORS:VNx8HF
2583 (match_operand:SVE_HSDI 2 "register_operand" "w"))]
2584 UNSPEC_MERGE_PTRUE))]
2585 "TARGET_SVE"
2586 "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>"
2587 )
2588
2589 ;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE.
2590 (define_insn "*<optab><mode>vnx4sf2"
2591 [(set (match_operand:VNx4SF 0 "register_operand" "=w")
2592 (unspec:VNx4SF
2593 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2594 (FLOATUORS:VNx4SF
2595 (match_operand:SVE_SDI 2 "register_operand" "w"))]
2596 UNSPEC_MERGE_PTRUE))]
2597 "TARGET_SVE"
2598 "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>"
2599 )
2600
2601 ;; Conversion of DI or SI to DF, predicated with a PTRUE.
2602 (define_insn "aarch64_sve_<optab><mode>vnx2df2"
2603 [(set (match_operand:VNx2DF 0 "register_operand" "=w")
2604 (unspec:VNx2DF
2605 [(match_operand:VNx2BI 1 "register_operand" "Upl")
2606 (FLOATUORS:VNx2DF
2607 (match_operand:SVE_SDI 2 "register_operand" "w"))]
2608 UNSPEC_MERGE_PTRUE))]
2609 "TARGET_SVE"
2610 "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>"
2611 )
2612
2613 ;; Conversion of DFs to the same number of SFs, or SFs to the same number
2614 ;; of HFs.
2615 (define_insn "*trunc<Vwide><mode>2"
2616 [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
2617 (unspec:SVE_HSF
2618 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
2619 (unspec:SVE_HSF
2620 [(match_operand:<VWIDE> 2 "register_operand" "w")]
2621 UNSPEC_FLOAT_CONVERT)]
2622 UNSPEC_MERGE_PTRUE))]
2623 "TARGET_SVE"
2624 "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>"
2625 )
2626
2627 ;; Conversion of SFs to the same number of DFs, or HFs to the same number
2628 ;; of SFs.
2629 (define_insn "aarch64_sve_extend<mode><Vwide>2"
2630 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2631 (unspec:<VWIDE>
2632 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
2633 (unspec:<VWIDE>
2634 [(match_operand:SVE_HSF 2 "register_operand" "w")]
2635 UNSPEC_FLOAT_CONVERT)]
2636 UNSPEC_MERGE_PTRUE))]
2637 "TARGET_SVE"
2638 "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>"
2639 )
2640
2641 ;; Unpack the low or high half of a predicate, where "high" refers to
2642 ;; the low-numbered lanes for big-endian and the high-numbered lanes
2643 ;; for little-endian.
2644 (define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
2645 [(match_operand:<VWIDE> 0 "register_operand")
2646 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
2647 UNPACK)]
2648 "TARGET_SVE"
2649 {
2650 emit_insn ((<hi_lanes_optab>
2651 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
2652 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
2653 (operands[0], operands[1]));
2654 DONE;
2655 }
2656 )
2657
2658 ;; PUNPKHI and PUNPKLO.
2659 (define_insn "aarch64_sve_punpk<perm_hilo>_<mode>"
2660 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
2661 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
2662 UNPACK_UNSIGNED))]
2663 "TARGET_SVE"
2664 "punpk<perm_hilo>\t%0.h, %1.b"
2665 )
2666
2667 ;; Unpack the low or high half of a vector, where "high" refers to
2668 ;; the low-numbered lanes for big-endian and the high-numbered lanes
2669 ;; for little-endian.
2670 (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
2671 [(match_operand:<VWIDE> 0 "register_operand")
2672 (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)]
2673 "TARGET_SVE"
2674 {
2675 emit_insn ((<hi_lanes_optab>
2676 ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode>
2677 : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>)
2678 (operands[0], operands[1]));
2679 DONE;
2680 }
2681 )
2682
2683 ;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO.
2684 (define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>"
2685 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2686 (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")]
2687 UNPACK))]
2688 "TARGET_SVE"
2689 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
2690 )
2691
2692 ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
2693 ;; First unpack the source without conversion, then float-convert the
2694 ;; unpacked source.
2695 (define_expand "vec_unpacks_<perm_hilo>_<mode>"
2696 [(match_operand:<VWIDE> 0 "register_operand")
2697 (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
2698 UNPACK_UNSIGNED)]
2699 "TARGET_SVE"
2700 {
2701 /* Use ZIP to do the unpack, since we don't care about the upper halves
2702 and since it has the nice property of not needing any subregs.
2703 If using UUNPK* turns out to be preferable, we could model it as
2704 a ZIP whose first operand is zero. */
2705 rtx temp = gen_reg_rtx (<MODE>mode);
2706 emit_insn ((<hi_lanes_optab>
2707 ? gen_aarch64_sve_zip2<mode>
2708 : gen_aarch64_sve_zip1<mode>)
2709 (temp, operands[1], operands[1]));
2710 rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
2711 emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0],
2712 ptrue, temp));
2713 DONE;
2714 }
2715 )
2716
2717 ;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI
2718 ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
2719 ;; unpacked VNx4SI to VNx2DF.
2720 (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
2721 [(match_operand:VNx2DF 0 "register_operand")
2722 (FLOATUORS:VNx2DF
2723 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
2724 UNPACK_UNSIGNED))]
2725 "TARGET_SVE"
2726 {
2727 /* Use ZIP to do the unpack, since we don't care about the upper halves
2728 and since it has the nice property of not needing any subregs.
2729 If using UUNPK* turns out to be preferable, we could model it as
2730 a ZIP whose first operand is zero. */
2731 rtx temp = gen_reg_rtx (VNx4SImode);
2732 emit_insn ((<hi_lanes_optab>
2733 ? gen_aarch64_sve_zip2vnx4si
2734 : gen_aarch64_sve_zip1vnx4si)
2735 (temp, operands[1], operands[1]));
2736 rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
2737 emit_insn (gen_aarch64_sve_<FLOATUORS:optab>vnx4sivnx2df2 (operands[0],
2738 ptrue, temp));
2739 DONE;
2740 }
2741 )
2742
2743 ;; Predicate pack. Use UZP1 on the narrower type, which discards
2744 ;; the high part of each wide element.
2745 (define_insn "vec_pack_trunc_<Vwide>"
2746 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
2747 (unspec:PRED_BHS
2748 [(match_operand:<VWIDE> 1 "register_operand" "Upa")
2749 (match_operand:<VWIDE> 2 "register_operand" "Upa")]
2750 UNSPEC_PACK))]
2751 "TARGET_SVE"
2752 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2753 )
2754
2755 ;; Integer pack. Use UZP1 on the narrower type, which discards
2756 ;; the high part of each wide element.
2757 (define_insn "vec_pack_trunc_<Vwide>"
2758 [(set (match_operand:SVE_BHSI 0 "register_operand" "=w")
2759 (unspec:SVE_BHSI
2760 [(match_operand:<VWIDE> 1 "register_operand" "w")
2761 (match_operand:<VWIDE> 2 "register_operand" "w")]
2762 UNSPEC_PACK))]
2763 "TARGET_SVE"
2764 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2765 )
2766
2767 ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
2768 ;; the results into a single vector.
2769 (define_expand "vec_pack_trunc_<Vwide>"
2770 [(set (match_dup 4)
2771 (unspec:SVE_HSF
2772 [(match_dup 3)
2773 (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")]
2774 UNSPEC_FLOAT_CONVERT)]
2775 UNSPEC_MERGE_PTRUE))
2776 (set (match_dup 5)
2777 (unspec:SVE_HSF
2778 [(match_dup 3)
2779 (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")]
2780 UNSPEC_FLOAT_CONVERT)]
2781 UNSPEC_MERGE_PTRUE))
2782 (set (match_operand:SVE_HSF 0 "register_operand")
2783 (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
2784 "TARGET_SVE"
2785 {
2786 operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
2787 operands[4] = gen_reg_rtx (<MODE>mode);
2788 operands[5] = gen_reg_rtx (<MODE>mode);
2789 }
2790 )
2791
2792 ;; Convert two vectors of DF to SI and pack the results into a single vector.
2793 (define_expand "vec_pack_<su>fix_trunc_vnx2df"
2794 [(set (match_dup 4)
2795 (unspec:VNx4SI
2796 [(match_dup 3)
2797 (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))]
2798 UNSPEC_MERGE_PTRUE))
2799 (set (match_dup 5)
2800 (unspec:VNx4SI
2801 [(match_dup 3)
2802 (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))]
2803 UNSPEC_MERGE_PTRUE))
2804 (set (match_operand:VNx4SI 0 "register_operand")
2805 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
2806 "TARGET_SVE"
2807 {
2808 operands[3] = aarch64_ptrue_reg (VNx2BImode);
2809 operands[4] = gen_reg_rtx (VNx4SImode);
2810 operands[5] = gen_reg_rtx (VNx4SImode);
2811 }
2812 )
2813
2814 ;; Predicated floating-point operations with select.
2815 (define_expand "cond_<optab><mode>"
2816 [(set (match_operand:SVE_F 0 "register_operand")
2817 (unspec:SVE_F
2818 [(match_operand:<VPRED> 1 "register_operand")
2819 (unspec:SVE_F
2820 [(match_operand:SVE_F 2 "register_operand")
2821 (match_operand:SVE_F 3 "register_operand")]
2822 SVE_COND_FP_BINARY)
2823 (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
2824 UNSPEC_SEL))]
2825 "TARGET_SVE"
2826 )
2827
2828 ;; Predicated floating-point operations with select matching first operand.
2829 (define_insn "*cond_<optab><mode>_2"
2830 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
2831 (unspec:SVE_F
2832 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2833 (unspec:SVE_F
2834 [(match_operand:SVE_F 2 "register_operand" "0, w")
2835 (match_operand:SVE_F 3 "register_operand" "w, w")]
2836 SVE_COND_FP_BINARY)
2837 (match_dup 2)]
2838 UNSPEC_SEL))]
2839 "TARGET_SVE"
2840 "@
2841 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2842 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2843 [(set_attr "movprfx" "*,yes")]
2844 )
2845
2846 ;; Predicated floating-point operations with select matching second operand.
2847 (define_insn "*cond_<optab><mode>_3"
2848 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
2849 (unspec:SVE_F
2850 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2851 (unspec:SVE_F
2852 [(match_operand:SVE_F 2 "register_operand" "w, w")
2853 (match_operand:SVE_F 3 "register_operand" "0, w")]
2854 SVE_COND_FP_BINARY)
2855 (match_dup 3)]
2856 UNSPEC_SEL))]
2857 "TARGET_SVE"
2858 "@
2859 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2860 movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
2861 [(set_attr "movprfx" "*,yes")]
2862 )
2863
2864 ;; Predicated floating-point binary operations in which the values of
2865 ;; inactive lanes are distinct from the other inputs.
2866 (define_insn_and_rewrite "*cond_<optab><mode>_any"
2867 [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
2868 (unspec:SVE_F
2869 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
2870 (unspec:SVE_F
2871 [(match_operand:SVE_F 2 "register_operand" "0, w, w, w, w")
2872 (match_operand:SVE_F 3 "register_operand" "w, 0, w, w, w")]
2873 SVE_COND_FP_BINARY)
2874 (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
2875 UNSPEC_SEL))]
2876 "TARGET_SVE
2877 && !rtx_equal_p (operands[2], operands[4])
2878 && !rtx_equal_p (operands[3], operands[4])"
2879 "@
2880 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2881 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2882 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2883 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2884 #"
2885 "&& reload_completed
2886 && register_operand (operands[4], <MODE>mode)
2887 && !rtx_equal_p (operands[0], operands[4])"
2888 {
2889 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
2890 operands[4], operands[1]));
2891 operands[4] = operands[2] = operands[0];
2892 }
2893 [(set_attr "movprfx" "yes")]
2894 )
2895
2896 ;; Predicated floating-point ternary operations with select.
2897 (define_expand "cond_<optab><mode>"
2898 [(set (match_operand:SVE_F 0 "register_operand")
2899 (unspec:SVE_F
2900 [(match_operand:<VPRED> 1 "register_operand")
2901 (unspec:SVE_F
2902 [(match_operand:SVE_F 2 "register_operand")
2903 (match_operand:SVE_F 3 "register_operand")
2904 (match_operand:SVE_F 4 "register_operand")]
2905 SVE_COND_FP_TERNARY)
2906 (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")]
2907 UNSPEC_SEL))]
2908 "TARGET_SVE"
2909 {
2910 /* Swap the multiplication operands if the fallback value is the
2911 second of the two. */
2912 if (rtx_equal_p (operands[3], operands[5]))
2913 std::swap (operands[2], operands[3]);
2914 })
2915
2916 ;; Predicated floating-point ternary operations using the FMAD-like form.
2917 (define_insn "*cond_<optab><mode>_2"
2918 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
2919 (unspec:SVE_F
2920 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2921 (unspec:SVE_F
2922 [(match_operand:SVE_F 2 "register_operand" "0, w")
2923 (match_operand:SVE_F 3 "register_operand" "w, w")
2924 (match_operand:SVE_F 4 "register_operand" "w, w")]
2925 SVE_COND_FP_TERNARY)
2926 (match_dup 2)]
2927 UNSPEC_SEL))]
2928 "TARGET_SVE"
2929 "@
2930 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2931 movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2932 [(set_attr "movprfx" "*,yes")]
2933 )
2934
2935 ;; Predicated floating-point ternary operations using the FMLA-like form.
2936 (define_insn "*cond_<optab><mode>_4"
2937 [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
2938 (unspec:SVE_F
2939 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2940 (unspec:SVE_F
2941 [(match_operand:SVE_F 2 "register_operand" "w, w")
2942 (match_operand:SVE_F 3 "register_operand" "w, w")
2943 (match_operand:SVE_F 4 "register_operand" "0, w")]
2944 SVE_COND_FP_TERNARY)
2945 (match_dup 4)]
2946 UNSPEC_SEL))]
2947 "TARGET_SVE"
2948 "@
2949 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
2950 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
2951 [(set_attr "movprfx" "*,yes")]
2952 )
2953
2954 ;; Predicated floating-point ternary operations in which the value for
2955 ;; inactive lanes is distinct from the other inputs.
2956 (define_insn_and_rewrite "*cond_<optab><mode>_any"
2957 [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w")
2958 (unspec:SVE_F
2959 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2960 (unspec:SVE_F
2961 [(match_operand:SVE_F 2 "register_operand" "w, w, w")
2962 (match_operand:SVE_F 3 "register_operand" "w, w, w")
2963 (match_operand:SVE_F 4 "register_operand" "w, w, w")]
2964 SVE_COND_FP_TERNARY)
2965 (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
2966 UNSPEC_SEL))]
2967 "TARGET_SVE
2968 && !rtx_equal_p (operands[2], operands[5])
2969 && !rtx_equal_p (operands[3], operands[5])
2970 && !rtx_equal_p (operands[4], operands[5])"
2971 "@
2972 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
2973 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
2974 #"
2975 "&& reload_completed
2976 && !CONSTANT_P (operands[5])
2977 && !rtx_equal_p (operands[0], operands[5])"
2978 {
2979 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
2980 operands[5], operands[1]));
2981 operands[5] = operands[4] = operands[0];
2982 }
2983 [(set_attr "movprfx" "yes")]
2984 )
2985
2986 ;; Shift an SVE vector left and insert a scalar into element 0.
2987 (define_insn "vec_shl_insert_<mode>"
2988 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
2989 (unspec:SVE_ALL
2990 [(match_operand:SVE_ALL 1 "register_operand" "0, 0")
2991 (match_operand:<VEL> 2 "register_operand" "rZ, w")]
2992 UNSPEC_INSR))]
2993 "TARGET_SVE"
2994 "@
2995 insr\t%0.<Vetype>, %<vwcore>2
2996 insr\t%0.<Vetype>, %<Vetype>2"
2997 )
2998
2999 (define_expand "copysign<mode>3"
3000 [(match_operand:SVE_F 0 "register_operand")
3001 (match_operand:SVE_F 1 "register_operand")
3002 (match_operand:SVE_F 2 "register_operand")]
3003 "TARGET_SVE"
3004 {
3005 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
3006 rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
3007 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
3008 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
3009
3010 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
3011 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
3012
3013 emit_insn (gen_and<v_int_equiv>3
3014 (sign, arg2,
3015 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
3016 HOST_WIDE_INT_M1U
3017 << bits)));
3018 emit_insn (gen_and<v_int_equiv>3
3019 (mant, arg1,
3020 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
3021 ~(HOST_WIDE_INT_M1U
3022 << bits))));
3023 emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
3024 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
3025 DONE;
3026 }
3027 )
3028
3029 (define_expand "xorsign<mode>3"
3030 [(match_operand:SVE_F 0 "register_operand")
3031 (match_operand:SVE_F 1 "register_operand")
3032 (match_operand:SVE_F 2 "register_operand")]
3033 "TARGET_SVE"
3034 {
3035 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
3036 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
3037 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
3038
3039 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
3040 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
3041
3042 emit_insn (gen_and<v_int_equiv>3
3043 (sign, arg2,
3044 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
3045 HOST_WIDE_INT_M1U
3046 << bits)));
3047 emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
3048 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
3049 DONE;
3050 }
3051 )
3052
3053 ;; Unpredicated DOT product.
3054 (define_insn "<sur>dot_prod<vsi2qi>"
3055 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
3056 (plus:SVE_SDI
3057 (unspec:SVE_SDI
3058 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
3059 (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
3060 DOTPROD)
3061 (match_operand:SVE_SDI 3 "register_operand" "0, w")))]
3062 "TARGET_SVE"
3063 "@
3064 <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
3065 movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
3066 [(set_attr "movprfx" "*,yes")]
3067 )
3068
3069 ;; Unpredicated integer absolute difference.
3070 (define_expand "<su>abd<mode>_3"
3071 [(use (match_operand:SVE_I 0 "register_operand"))
3072 (USMAX:SVE_I (match_operand:SVE_I 1 "register_operand")
3073 (match_operand:SVE_I 2 "register_operand"))]
3074 "TARGET_SVE"
3075 {
3076 rtx pred = aarch64_ptrue_reg (<VPRED>mode);
3077 emit_insn (gen_aarch64_<su>abd<mode>_3 (operands[0], pred, operands[1],
3078 operands[2]));
3079 DONE;
3080 }
3081 )
3082
3083 ;; Predicated integer absolute difference.
3084 (define_insn "aarch64_<su>abd<mode>_3"
3085 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3086 (unspec:SVE_I
3087 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3088 (minus:SVE_I
3089 (USMAX:SVE_I
3090 (match_operand:SVE_I 2 "register_operand" "0, w")
3091 (match_operand:SVE_I 3 "register_operand" "w, w"))
3092 (<max_opp>:SVE_I
3093 (match_dup 2)
3094 (match_dup 3)))]
3095 UNSPEC_MERGE_PTRUE))]
3096 "TARGET_SVE"
3097 "@
3098 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3099 movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3100 [(set_attr "movprfx" "*,yes")]
3101 )
3102
3103 ;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
3104 ;; operands 1 and 2. The sequence also has to perform a widening reduction of
3105 ;; the difference into a vector and accumulate that into operand 3 before
3106 ;; copying that into the result operand 0.
3107 ;; Perform that with a sequence of:
3108 ;; MOV ones.b, #1
3109 ;; [SU]ABD diff.b, p0/m, op1.b, op2.b
3110 ;; MOVPRFX op0, op3 // If necessary
3111 ;; UDOT op0.s, diff.b, ones.b
3112
3113 (define_expand "<sur>sad<vsi2qi>"
3114 [(use (match_operand:SVE_SDI 0 "register_operand"))
3115 (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
3116 (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
3117 (use (match_operand:SVE_SDI 3 "register_operand"))]
3118 "TARGET_SVE"
3119 {
3120 rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
3121 rtx diff = gen_reg_rtx (<VSI2QI>mode);
3122 emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
3123 emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
3124 DONE;
3125 }
3126 )
3127
3128 ;; Standard pattern name vec_init<mode><Vel>.
3129 (define_expand "vec_init<mode><Vel>"
3130 [(match_operand:SVE_ALL 0 "register_operand" "")
3131 (match_operand 1 "" "")]
3132 "TARGET_SVE"
3133 {
3134 aarch64_sve_expand_vector_init (operands[0], operands[1]);
3135 DONE;
3136 }
3137 )