]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64-sve.md
[AArch64] Add SVE support
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-sve.md
CommitLineData
8fa7f434 1;; Machine description for AArch64 SVE.
2;; Copyright (C) 2009-2016 Free Software Foundation, Inc.
3;; Contributed by ARM Ltd.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3. If not see
19;; <http://www.gnu.org/licenses/>.
20
21;; Note on the handling of big-endian SVE
22;; --------------------------------------
23;;
24;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
25;; same way as movdi or movti would: the first byte of memory goes
26;; into the most significant byte of the register and the last byte
27;; of memory goes into the least significant byte of the register.
28;; This is the most natural ordering for Advanced SIMD and matches
29;; the ABI layout for 64-bit and 128-bit vector types.
30;;
31;; As a result, the order of bytes within the register is what GCC
32;; expects for a big-endian target, and subreg offsets therefore work
33;; as expected, with the first element in memory having subreg offset 0
34;; and the last element in memory having the subreg offset associated
35;; with a big-endian lowpart. However, this ordering also means that
36;; GCC's lane numbering does not match the architecture's numbering:
37;; GCC always treats the element at the lowest address in memory
38;; (subreg offset 0) as element 0, while the architecture treats
39;; the least significant end of the register as element 0.
40;;
41;; The situation for SVE is different. We want the layout of the
42;; SVE register to be same for mov<mode> as it is for maskload<mode>:
43;; logically, a mov<mode> load must be indistinguishable from a
44;; maskload<mode> whose mask is all true. We therefore need the
45;; register layout to match LD1 rather than LDR. The ABI layout of
46;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
47;;
48;; As a result, the architecture lane numbering matches GCC's lane
49;; numbering, with element 0 always being the first in memory.
50;; However:
51;;
52;; - Applying a subreg offset to a register does not give the element
53;; that GCC expects: the first element in memory has the subreg offset
54;; associated with a big-endian lowpart while the last element in memory
55;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
56;;
57;; - We cannot use LDR and STR for spill slots that might be accessed
58;; via subregs, since although the elements have the order GCC expects,
59;; the order of the bytes within the elements is different. We instead
60;; access spill slots via LD1 and ST1, using secondary reloads to
61;; reserve a predicate register.
62
63
64;; SVE data moves.
65(define_expand "mov<mode>"
66 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
67 (match_operand:SVE_ALL 1 "general_operand"))]
68 "TARGET_SVE"
69 {
70 /* Use the predicated load and store patterns where possible.
71 This is required for big-endian targets (see the comment at the
72 head of the file) and increases the addressing choices for
73 little-endian. */
74 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
75 && can_create_pseudo_p ())
76 {
77 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
78 DONE;
79 }
80
81 if (CONSTANT_P (operands[1]))
82 {
83 aarch64_expand_mov_immediate (operands[0], operands[1],
84 gen_vec_duplicate<mode>);
85 DONE;
86 }
87 }
88)
89
90;; Unpredicated moves (little-endian). Only allow memory operations
91;; during and after RA; before RA we want the predicated load and
92;; store patterns to be used instead.
93(define_insn "*aarch64_sve_mov<mode>_le"
94 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
95 (match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
96 "TARGET_SVE
97 && !BYTES_BIG_ENDIAN
98 && ((lra_in_progress || reload_completed)
99 || (register_operand (operands[0], <MODE>mode)
100 && nonmemory_operand (operands[1], <MODE>mode)))"
101 "@
102 ldr\t%0, %1
103 str\t%1, %0
104 mov\t%0.d, %1.d
105 * return aarch64_output_sve_mov_immediate (operands[1]);"
106)
107
108;; Unpredicated moves (big-endian). Memory accesses require secondary
109;; reloads.
110(define_insn "*aarch64_sve_mov<mode>_be"
111 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
112 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
113 "TARGET_SVE && BYTES_BIG_ENDIAN"
114 "@
115 mov\t%0.d, %1.d
116 * return aarch64_output_sve_mov_immediate (operands[1]);"
117)
118
119;; Handle big-endian memory reloads. We use byte PTRUE for all modes
120;; to try to encourage reuse.
121(define_expand "aarch64_sve_reload_be"
122 [(parallel
123 [(set (match_operand 0)
124 (match_operand 1))
125 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
126 "TARGET_SVE && BYTES_BIG_ENDIAN"
127 {
128 /* Create a PTRUE. */
129 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
130
131 /* Refer to the PTRUE in the appropriate mode for this move. */
132 machine_mode mode = GET_MODE (operands[0]);
133 machine_mode pred_mode
134 = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require ();
135 rtx pred = gen_lowpart (pred_mode, operands[2]);
136
137 /* Emit a predicated load or store. */
138 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
139 DONE;
140 }
141)
142
143;; A predicated load or store for which the predicate is known to be
144;; all-true. Note that this pattern is generated directly by
145;; aarch64_emit_sve_pred_move, so changes to this pattern will
146;; need changes there as well.
147(define_insn "*pred_mov<mode>"
148 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, m")
149 (unspec:SVE_ALL
150 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
151 (match_operand:SVE_ALL 2 "nonimmediate_operand" "m, w")]
152 UNSPEC_MERGE_PTRUE))]
153 "TARGET_SVE
154 && (register_operand (operands[0], <MODE>mode)
155 || register_operand (operands[2], <MODE>mode))"
156 "@
157 ld1<Vesize>\t%0.<Vetype>, %1/z, %2
158 st1<Vesize>\t%2.<Vetype>, %1, %0"
159)
160
161(define_expand "movmisalign<mode>"
162 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
163 (match_operand:SVE_ALL 1 "general_operand"))]
164 "TARGET_SVE"
165 {
166 /* Equivalent to a normal move for our purpooses. */
167 emit_move_insn (operands[0], operands[1]);
168 DONE;
169 }
170)
171
172(define_insn "maskload<mode><vpred>"
173 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
174 (unspec:SVE_ALL
175 [(match_operand:<VPRED> 2 "register_operand" "Upl")
176 (match_operand:SVE_ALL 1 "memory_operand" "m")]
177 UNSPEC_LD1_SVE))]
178 "TARGET_SVE"
179 "ld1<Vesize>\t%0.<Vetype>, %2/z, %1"
180)
181
182(define_insn "maskstore<mode><vpred>"
183 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
184 (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl")
185 (match_operand:SVE_ALL 1 "register_operand" "w")
186 (match_dup 0)]
187 UNSPEC_ST1_SVE))]
188 "TARGET_SVE"
189 "st1<Vesize>\t%1.<Vetype>, %2, %0"
190)
191
192(define_expand "mov<mode>"
193 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
194 (match_operand:PRED_ALL 1 "general_operand"))]
195 "TARGET_SVE"
196 {
197 if (GET_CODE (operands[0]) == MEM)
198 operands[1] = force_reg (<MODE>mode, operands[1]);
199 }
200)
201
202(define_insn "*aarch64_sve_mov<mode>"
203 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa, Upa")
204 (match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dz, Dm"))]
205 "TARGET_SVE
206 && (register_operand (operands[0], <MODE>mode)
207 || register_operand (operands[1], <MODE>mode))"
208 "@
209 mov\t%0.b, %1.b
210 str\t%1, %0
211 ldr\t%0, %1
212 pfalse\t%0.b
213 * return aarch64_output_ptrue (<MODE>mode, '<Vetype>');"
214)
215
216;; Handle extractions from a predicate by converting to an integer vector
217;; and extracting from there.
218(define_expand "vec_extract<vpred><Vel>"
219 [(match_operand:<VEL> 0 "register_operand")
220 (match_operand:<VPRED> 1 "register_operand")
221 (match_operand:SI 2 "nonmemory_operand")
222 ;; Dummy operand to which we can attach the iterator.
223 (reg:SVE_I V0_REGNUM)]
224 "TARGET_SVE"
225 {
226 rtx tmp = gen_reg_rtx (<MODE>mode);
227 emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1],
228 CONST1_RTX (<MODE>mode),
229 CONST0_RTX (<MODE>mode)));
230 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
231 DONE;
232 }
233)
234
235(define_expand "vec_extract<mode><Vel>"
236 [(set (match_operand:<VEL> 0 "register_operand")
237 (vec_select:<VEL>
238 (match_operand:SVE_ALL 1 "register_operand")
239 (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
240 "TARGET_SVE"
241 {
242 poly_int64 val;
243 if (poly_int_rtx_p (operands[2], &val)
244 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
245 {
246 /* The last element can be extracted with a LASTB and a false
247 predicate. */
248 rtx sel = force_reg (<VPRED>mode, CONST0_RTX (<VPRED>mode));
249 emit_insn (gen_aarch64_sve_lastb<mode> (operands[0], sel,
250 operands[1]));
251 DONE;
252 }
253 if (!CONST_INT_P (operands[2]))
254 {
255 /* Create an index with operand[2] as the base and -1 as the step.
256 It will then be zero for the element we care about. */
257 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
258 index = force_reg (<VEL_INT>mode, index);
259 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
260 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
261
262 /* Get a predicate that is true for only that element. */
263 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
264 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
265 rtx sel = gen_reg_rtx (<VPRED>mode);
266 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
267
268 /* Select the element using LASTB. */
269 emit_insn (gen_aarch64_sve_lastb<mode> (operands[0], sel,
270 operands[1]));
271 DONE;
272 }
273 }
274)
275
276;; Extract an element from the Advanced SIMD portion of the register.
277;; We don't just reuse the aarch64-simd.md pattern because we don't
278;; want any chnage in lane number on big-endian targets.
279(define_insn "*vec_extract<mode><Vel>_v128"
280 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
281 (vec_select:<VEL>
282 (match_operand:SVE_ALL 1 "register_operand" "w, w, w")
283 (parallel [(match_operand:SI 2 "const_int_operand")])))]
284 "TARGET_SVE
285 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 15)"
286 {
287 operands[1] = gen_lowpart (<V128>mode, operands[1]);
288 switch (which_alternative)
289 {
290 case 0:
291 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
292 case 1:
293 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
294 case 2:
295 return "st1\\t{%1.<Vetype>}[%2], %0";
296 default:
297 gcc_unreachable ();
298 }
299 }
300 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
301)
302
303;; Extract an element in the range of DUP. This pattern allows the
304;; source and destination to be different.
305(define_insn "*vec_extract<mode><Vel>_dup"
306 [(set (match_operand:<VEL> 0 "register_operand" "=w")
307 (vec_select:<VEL>
308 (match_operand:SVE_ALL 1 "register_operand" "w")
309 (parallel [(match_operand:SI 2 "const_int_operand")])))]
310 "TARGET_SVE
311 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
312 {
313 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
314 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
315 }
316)
317
318;; Extract an element outside the range of DUP. This pattern requires the
319;; source and destination to be the same.
320(define_insn "*vec_extract<mode><Vel>_ext"
321 [(set (match_operand:<VEL> 0 "register_operand" "=w")
322 (vec_select:<VEL>
323 (match_operand:SVE_ALL 1 "register_operand" "0")
324 (parallel [(match_operand:SI 2 "const_int_operand")])))]
325 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
326 {
327 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
328 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
329 return "ext\t%0.b, %0.b, %0.b, #%2";
330 }
331)
332
333;; Extract the last active element of operand 1 into operand 0.
334;; If no elements are active, extract the last inactive element instead.
335(define_insn "aarch64_sve_lastb<mode>"
336 [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
337 (unspec:<VEL>
338 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
339 (match_operand:SVE_ALL 2 "register_operand" "w, w")]
340 UNSPEC_LASTB))]
341 "TARGET_SVE"
342 "@
343 lastb\t%<vwcore>0, %1, %2.<Vetype>
344 lastb\t%<Vetype>0, %1, %2.<Vetype>"
345)
346
347(define_expand "vec_duplicate<mode>"
348 [(parallel
349 [(set (match_operand:SVE_ALL 0 "register_operand")
350 (vec_duplicate:SVE_ALL
351 (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
352 (clobber (scratch:<VPRED>))])]
353 "TARGET_SVE"
354 {
355 if (MEM_P (operands[1]))
356 {
357 rtx ptrue = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
358 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
359 CONST0_RTX (<MODE>mode)));
360 DONE;
361 }
362 }
363)
364
365;; Accept memory operands for the benefit of combine, and also in case
366;; the scalar input gets spilled to memory during RA. We want to split
367;; the load at the first opportunity in order to allow the PTRUE to be
368;; optimized with surrounding code.
369(define_insn_and_split "*vec_duplicate<mode>_reg"
370 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
371 (vec_duplicate:SVE_ALL
372 (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
373 (clobber (match_scratch:<VPRED> 2 "=X, X, Upl"))]
374 "TARGET_SVE"
375 "@
376 mov\t%0.<Vetype>, %<vwcore>1
377 mov\t%0.<Vetype>, %<Vetype>1
378 #"
379 "&& MEM_P (operands[1])"
380 [(const_int 0)]
381 {
382 if (GET_CODE (operands[2]) == SCRATCH)
383 operands[2] = gen_reg_rtx (<VPRED>mode);
384 emit_move_insn (operands[2], CONSTM1_RTX (<VPRED>mode));
385 emit_insn (gen_sve_ld1r<mode> (operands[0], operands[2], operands[1],
386 CONST0_RTX (<MODE>mode)));
387 DONE;
388 }
389 [(set_attr "length" "4,4,8")]
390)
391
392;; This is used for vec_duplicate<mode>s from memory, but can also
393;; be used by combine to optimize selects of a a vec_duplicate<mode>
394;; with zero.
395(define_insn "sve_ld1r<mode>"
396 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
397 (unspec:SVE_ALL
398 [(match_operand:<VPRED> 1 "register_operand" "Upl")
399 (vec_duplicate:SVE_ALL
400 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
401 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
402 UNSPEC_SEL))]
403 "TARGET_SVE"
404 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
405)
406
407;; Load 128 bits from memory and duplicate to fill a vector. Since there
408;; are so few operations on 128-bit "elements", we don't define a VNx1TI
409;; and simply use vectors of bytes instead.
410(define_insn "sve_ld1rq"
411 [(set (match_operand:VNx16QI 0 "register_operand" "=w")
412 (unspec:VNx16QI
413 [(match_operand:VNx16BI 1 "register_operand" "Upl")
414 (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")]
415 UNSPEC_LD1RQ))]
416 "TARGET_SVE"
417 "ld1rqb\t%0.b, %1/z, %2"
418)
419
420;; Implement a predicate broadcast by shifting the low bit of the scalar
421;; input into the top bit and using a WHILELO. An alternative would be to
422;; duplicate the input and do a compare with zero.
423(define_expand "vec_duplicate<mode>"
424 [(set (match_operand:PRED_ALL 0 "register_operand")
425 (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))]
426 "TARGET_SVE"
427 {
428 rtx tmp = gen_reg_rtx (DImode);
429 rtx op1 = gen_lowpart (DImode, operands[1]);
430 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
431 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
432 DONE;
433 }
434)
435
436(define_insn "vec_series<mode>"
437 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
438 (vec_series:SVE_I
439 (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
440 (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
441 "TARGET_SVE"
442 "@
443 index\t%0.<Vetype>, #%1, %<vw>2
444 index\t%0.<Vetype>, %<vw>1, #%2
445 index\t%0.<Vetype>, %<vw>1, %<vw>2"
446)
447
448;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
449;; of an INDEX instruction.
450(define_insn "*vec_series<mode>_plus"
451 [(set (match_operand:SVE_I 0 "register_operand" "=w")
452 (plus:SVE_I
453 (vec_duplicate:SVE_I
454 (match_operand:<VEL> 1 "register_operand" "r"))
455 (match_operand:SVE_I 2 "immediate_operand")))]
456 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
457 {
458 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
459 return "index\t%0.<Vetype>, %<vw>1, #%2";
460 }
461)
462
463(define_expand "vec_perm<mode>"
464 [(match_operand:SVE_ALL 0 "register_operand")
465 (match_operand:SVE_ALL 1 "register_operand")
466 (match_operand:SVE_ALL 2 "register_operand")
467 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
468 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
469 {
470 aarch64_expand_sve_vec_perm (operands[0], operands[1],
471 operands[2], operands[3]);
472 DONE;
473 }
474)
475
476(define_insn "*aarch64_sve_tbl<mode>"
477 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
478 (unspec:SVE_ALL
479 [(match_operand:SVE_ALL 1 "register_operand" "w")
480 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
481 UNSPEC_TBL))]
482 "TARGET_SVE"
483 "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
484)
485
486(define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
487 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
488 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
489 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
490 PERMUTE))]
491 "TARGET_SVE"
492 "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
493)
494
495(define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
496 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
497 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")
498 (match_operand:SVE_ALL 2 "register_operand" "w")]
499 PERMUTE))]
500 "TARGET_SVE"
501 "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
502)
503
504(define_insn "*aarch64_sve_rev64<mode>"
505 [(set (match_operand:SVE_BHS 0 "register_operand" "=w")
506 (unspec:SVE_BHS
507 [(match_operand:VNx2BI 1 "register_operand" "Upl")
508 (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
509 UNSPEC_REV64)]
510 UNSPEC_MERGE_PTRUE))]
511 "TARGET_SVE"
512 "rev<Vesize>\t%0.d, %1/m, %2.d"
513)
514
515(define_insn "*aarch64_sve_rev32<mode>"
516 [(set (match_operand:SVE_BH 0 "register_operand" "=w")
517 (unspec:SVE_BH
518 [(match_operand:VNx4BI 1 "register_operand" "Upl")
519 (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
520 UNSPEC_REV32)]
521 UNSPEC_MERGE_PTRUE))]
522 "TARGET_SVE"
523 "rev<Vesize>\t%0.s, %1/m, %2.s"
524)
525
526(define_insn "*aarch64_sve_rev16vnx16qi"
527 [(set (match_operand:VNx16QI 0 "register_operand" "=w")
528 (unspec:VNx16QI
529 [(match_operand:VNx8BI 1 "register_operand" "Upl")
530 (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
531 UNSPEC_REV16)]
532 UNSPEC_MERGE_PTRUE))]
533 "TARGET_SVE"
534 "revb\t%0.h, %1/m, %2.h"
535)
536
537(define_insn "*aarch64_sve_rev<mode>"
538 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
539 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")]
540 UNSPEC_REV))]
541 "TARGET_SVE"
542 "rev\t%0.<Vetype>, %1.<Vetype>")
543
544(define_insn "*aarch64_sve_dup_lane<mode>"
545 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
546 (vec_duplicate:SVE_ALL
547 (vec_select:<VEL>
548 (match_operand:SVE_ALL 1 "register_operand" "w")
549 (parallel [(match_operand:SI 2 "const_int_operand")]))))]
550 "TARGET_SVE
551 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
552 "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
553)
554
555;; Note that the immediate (third) operand is the lane index not
556;; the byte index.
557(define_insn "*aarch64_sve_ext<mode>"
558 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
559 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0")
560 (match_operand:SVE_ALL 2 "register_operand" "w")
561 (match_operand:SI 3 "const_int_operand")]
562 UNSPEC_EXT))]
563 "TARGET_SVE
564 && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
565 {
566 operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
567 return "ext\\t%0.b, %0.b, %2.b, #%3";
568 }
569)
570
571(define_insn "add<mode>3"
572 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w")
573 (plus:SVE_I
574 (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w")
575 (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, w")))]
576 "TARGET_SVE"
577 "@
578 add\t%0.<Vetype>, %0.<Vetype>, #%D2
579 sub\t%0.<Vetype>, %0.<Vetype>, #%N2
580 * return aarch64_output_sve_inc_dec_immediate (\"%0.<Vetype>\", operands[2]);
581 add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
582)
583
584(define_insn "sub<mode>3"
585 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
586 (minus:SVE_I
587 (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa")
588 (match_operand:SVE_I 2 "register_operand" "w, 0")))]
589 "TARGET_SVE"
590 "@
591 sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
592 subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
593)
594
595;; Unpredicated multiplication.
596(define_expand "mul<mode>3"
597 [(set (match_operand:SVE_I 0 "register_operand")
598 (unspec:SVE_I
599 [(match_dup 3)
600 (mult:SVE_I
601 (match_operand:SVE_I 1 "register_operand")
602 (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))]
603 UNSPEC_MERGE_PTRUE))]
604 "TARGET_SVE"
605 {
606 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
607 }
608)
609
610;; Multiplication predicated with a PTRUE. We don't actually need the
611;; predicate for the first alternative, but using Upa or X isn't likely
612;; to gain much and would make the instruction seem less uniform to the
613;; register allocator.
614(define_insn "*mul<mode>3"
615 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
616 (unspec:SVE_I
617 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
618 (mult:SVE_I
619 (match_operand:SVE_I 2 "register_operand" "%0, 0")
620 (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w"))]
621 UNSPEC_MERGE_PTRUE))]
622 "TARGET_SVE"
623 "@
624 mul\t%0.<Vetype>, %0.<Vetype>, #%3
625 mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
626)
627
628(define_insn "*madd<mode>"
629 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
630 (plus:SVE_I
631 (unspec:SVE_I
632 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
633 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
634 (match_operand:SVE_I 3 "register_operand" "w, w"))]
635 UNSPEC_MERGE_PTRUE)
636 (match_operand:SVE_I 4 "register_operand" "w, 0")))]
637 "TARGET_SVE"
638 "@
639 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
640 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
641)
642
643(define_insn "*msub<mode>3"
644 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
645 (minus:SVE_I
646 (match_operand:SVE_I 4 "register_operand" "w, 0")
647 (unspec:SVE_I
648 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
649 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
650 (match_operand:SVE_I 3 "register_operand" "w, w"))]
651 UNSPEC_MERGE_PTRUE)))]
652 "TARGET_SVE"
653 "@
654 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
655 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
656)
657
658;; Unpredicated NEG, NOT and POPCOUNT.
659(define_expand "<optab><mode>2"
660 [(set (match_operand:SVE_I 0 "register_operand")
661 (unspec:SVE_I
662 [(match_dup 2)
663 (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))]
664 UNSPEC_MERGE_PTRUE))]
665 "TARGET_SVE"
666 {
667 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
668 }
669)
670
671;; NEG, NOT and POPCOUNT predicated with a PTRUE.
672(define_insn "*<optab><mode>2"
673 [(set (match_operand:SVE_I 0 "register_operand" "=w")
674 (unspec:SVE_I
675 [(match_operand:<VPRED> 1 "register_operand" "Upl")
676 (SVE_INT_UNARY:SVE_I
677 (match_operand:SVE_I 2 "register_operand" "w"))]
678 UNSPEC_MERGE_PTRUE))]
679 "TARGET_SVE"
680 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
681)
682
683;; Vector AND, ORR and XOR.
684(define_insn "<optab><mode>3"
685 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
686 (LOGICAL:SVE_I
687 (match_operand:SVE_I 1 "register_operand" "%0, w")
688 (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))]
689 "TARGET_SVE"
690 "@
691 <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
692 <logical>\t%0.d, %1.d, %2.d"
693)
694
695;; Vector AND, ORR and XOR on floating-point modes. We avoid subregs
696;; by providing this, but we need to use UNSPECs since rtx logical ops
697;; aren't defined for floating-point modes.
698(define_insn "*<optab><mode>3"
699 [(set (match_operand:SVE_F 0 "register_operand" "=w")
700 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w")
701 (match_operand:SVE_F 2 "register_operand" "w")]
702 LOGICALF))]
703 "TARGET_SVE"
704 "<logicalf_op>\t%0.d, %1.d, %2.d"
705)
706
707;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate
708;; this pattern even though the NOT instruction itself is predicated.
709(define_insn "bic<mode>3"
710 [(set (match_operand:SVE_I 0 "register_operand" "=w")
711 (and:SVE_I
712 (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w"))
713 (match_operand:SVE_I 2 "register_operand" "w")))]
714 "TARGET_SVE"
715 "bic\t%0.d, %2.d, %1.d"
716)
717
718;; Predicate AND. We can reuse one of the inputs as the GP.
719(define_insn "and<mode>3"
720 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
721 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
722 (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
723 "TARGET_SVE"
724 "and\t%0.b, %1/z, %1.b, %2.b"
725)
726
727;; Unpredicated predicate ORR and XOR.
728(define_expand "<optab><mode>3"
729 [(set (match_operand:PRED_ALL 0 "register_operand")
730 (and:PRED_ALL
731 (LOGICAL_OR:PRED_ALL
732 (match_operand:PRED_ALL 1 "register_operand")
733 (match_operand:PRED_ALL 2 "register_operand"))
734 (match_dup 3)))]
735 "TARGET_SVE"
736 {
737 operands[3] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
738 }
739)
740
741;; Predicated predicate ORR and XOR.
742(define_insn "pred_<optab><mode>3"
743 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
744 (and:PRED_ALL
745 (LOGICAL:PRED_ALL
746 (match_operand:PRED_ALL 2 "register_operand" "Upa")
747 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
748 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
749 "TARGET_SVE"
750 "<logical>\t%0.b, %1/z, %2.b, %3.b"
751)
752
753;; Perform a logical operation on operands 2 and 3, using operand 1 as
754;; the GP (which is known to be a PTRUE). Store the result in operand 0
755;; and set the flags in the same way as for PTEST. The (and ...) in the
756;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested
757;; value is structurally equivalent to rhs of the second set.
758(define_insn "*<optab><mode>3_cc"
759 [(set (reg:CC CC_REGNUM)
760 (compare:CC
761 (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upa")
762 (and:PRED_ALL
763 (LOGICAL:PRED_ALL
764 (match_operand:PRED_ALL 2 "register_operand" "Upa")
765 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
766 (match_dup 1))]
767 UNSPEC_PTEST_PTRUE)
768 (const_int 0)))
769 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
770 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
771 (match_dup 1)))]
772 "TARGET_SVE"
773 "<logical>s\t%0.b, %1/z, %2.b, %3.b"
774)
775
776;; Unpredicated predicate inverse.
777(define_expand "one_cmpl<mode>2"
778 [(set (match_operand:PRED_ALL 0 "register_operand")
779 (and:PRED_ALL
780 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
781 (match_dup 2)))]
782 "TARGET_SVE"
783 {
784 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
785 }
786)
787
788;; Predicated predicate inverse.
789(define_insn "*one_cmpl<mode>3"
790 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
791 (and:PRED_ALL
792 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
793 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
794 "TARGET_SVE"
795 "not\t%0.b, %1/z, %2.b"
796)
797
798;; Predicated predicate BIC and ORN.
799(define_insn "*<nlogical><mode>3"
800 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
801 (and:PRED_ALL
802 (NLOGICAL:PRED_ALL
803 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
804 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
805 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
806 "TARGET_SVE"
807 "<nlogical>\t%0.b, %1/z, %3.b, %2.b"
808)
809
810;; Predicated predicate NAND and NOR.
811(define_insn "*<logical_nn><mode>3"
812 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
813 (and:PRED_ALL
814 (NLOGICAL:PRED_ALL
815 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
816 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
817 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
818 "TARGET_SVE"
819 "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
820)
821
822;; Unpredicated LSL, LSR and ASR by a vector.
823(define_expand "v<optab><mode>3"
824 [(set (match_operand:SVE_I 0 "register_operand")
825 (unspec:SVE_I
826 [(match_dup 3)
827 (ASHIFT:SVE_I
828 (match_operand:SVE_I 1 "register_operand")
829 (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
830 UNSPEC_MERGE_PTRUE))]
831 "TARGET_SVE"
832 {
833 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
834 }
835)
836
837;; LSL, LSR and ASR by a vector, predicated with a PTRUE. We don't
838;; actually need the predicate for the first alternative, but using Upa
839;; or X isn't likely to gain much and would make the instruction seem
840;; less uniform to the register allocator.
841(define_insn "*v<optab><mode>3"
842 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
843 (unspec:SVE_I
844 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
845 (ASHIFT:SVE_I
846 (match_operand:SVE_I 2 "register_operand" "w, 0")
847 (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w"))]
848 UNSPEC_MERGE_PTRUE))]
849 "TARGET_SVE"
850 "@
851 <shift>\t%0.<Vetype>, %2.<Vetype>, #%3
852 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
853)
854
855;; LSL, LSR and ASR by a scalar, which expands into one of the vector
856;; shifts above.
857(define_expand "<ASHIFT:optab><mode>3"
858 [(set (match_operand:SVE_I 0 "register_operand")
859 (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand")
860 (match_operand:<VEL> 2 "general_operand")))]
861 "TARGET_SVE"
862 {
863 rtx amount;
864 if (CONST_INT_P (operands[2]))
865 {
866 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
867 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
868 amount = force_reg (<MODE>mode, amount);
869 }
870 else
871 {
872 amount = gen_reg_rtx (<MODE>mode);
873 emit_insn (gen_vec_duplicate<mode> (amount,
874 convert_to_mode (<VEL>mode,
875 operands[2], 0)));
876 }
877 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
878 DONE;
879 }
880)
881
882;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE.
883;;
884;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP
885;; is a PTRUE even if the optimizers haven't yet been able to propagate
886;; the constant. We would use a separate unspec code for PTESTs involving
887;; GPs that might not be PTRUEs.
888(define_insn "ptest_ptrue<mode>"
889 [(set (reg:CC CC_REGNUM)
890 (compare:CC
891 (unspec:SI [(match_operand:PRED_ALL 0 "register_operand" "Upa")
892 (match_operand:PRED_ALL 1 "register_operand" "Upa")]
893 UNSPEC_PTEST_PTRUE)
894 (const_int 0)))]
895 "TARGET_SVE"
896 "ptest\t%0, %1.b"
897)
898
899;; Set element I of the result if operand1 + J < operand2 for all J in [0, I].
900;; with the comparison being unsigned.
901(define_insn "while_ult<GPI:mode><PRED_ALL:mode>"
902 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
903 (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
904 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
905 UNSPEC_WHILE_LO))
906 (clobber (reg:CC CC_REGNUM))]
907 "TARGET_SVE"
908 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
909)
910
911;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP.
912;; Handle the case in which both results are useful. The GP operand
913;; to the PTEST isn't needed, so we allow it to be anything.
914(define_insn_and_split "while_ult<GPI:mode><PRED_ALL:mode>_cc"
915 [(set (reg:CC CC_REGNUM)
916 (compare:CC
917 (unspec:SI [(match_operand:PRED_ALL 1)
918 (unspec:PRED_ALL
919 [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")
920 (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")]
921 UNSPEC_WHILE_LO)]
922 UNSPEC_PTEST_PTRUE)
923 (const_int 0)))
924 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
925 (unspec:PRED_ALL [(match_dup 2)
926 (match_dup 3)]
927 UNSPEC_WHILE_LO))]
928 "TARGET_SVE"
929 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3"
930 ;; Force the compiler to drop the unused predicate operand, so that we
931 ;; don't have an unnecessary PTRUE.
932 "&& !CONSTANT_P (operands[1])"
933 [(const_int 0)]
934 {
935 emit_insn (gen_while_ult<GPI:mode><PRED_ALL:mode>_cc
936 (operands[0], CONSTM1_RTX (<MODE>mode),
937 operands[2], operands[3]));
938 DONE;
939 }
940)
941
942;; Predicated integer comparison.
943(define_insn "*vec_cmp<cmp_op>_<mode>"
944 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
945 (unspec:<VPRED>
946 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
947 (match_operand:SVE_I 2 "register_operand" "w, w")
948 (match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")]
949 SVE_COND_INT_CMP))
950 (clobber (reg:CC CC_REGNUM))]
951 "TARGET_SVE"
952 "@
953 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
954 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
955)
956
957;; Predicated integer comparison in which only the flags result is interesting.
958(define_insn "*vec_cmp<cmp_op>_<mode>_ptest"
959 [(set (reg:CC CC_REGNUM)
960 (compare:CC
961 (unspec:SI
962 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
963 (unspec:<VPRED>
964 [(match_dup 1)
965 (match_operand:SVE_I 2 "register_operand" "w, w")
966 (match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")]
967 SVE_COND_INT_CMP)]
968 UNSPEC_PTEST_PTRUE)
969 (const_int 0)))
970 (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
971 "TARGET_SVE"
972 "@
973 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
974 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
975)
976
977;; Predicated comparison in which both the flag and predicate results
978;; are interesting.
979(define_insn "*vec_cmp<cmp_op>_<mode>_cc"
980 [(set (reg:CC CC_REGNUM)
981 (compare:CC
982 (unspec:SI
983 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
984 (unspec:<VPRED>
985 [(match_dup 1)
986 (match_operand:SVE_I 2 "register_operand" "w, w")
987 (match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")]
988 SVE_COND_INT_CMP)]
989 UNSPEC_PTEST_PTRUE)
990 (const_int 0)))
991 (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
992 (unspec:<VPRED>
993 [(match_dup 1)
994 (match_dup 2)
995 (match_dup 3)]
996 SVE_COND_INT_CMP))]
997 "TARGET_SVE"
998 "@
999 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1000 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1001)
1002
1003;; Predicated floating-point comparison (excluding FCMUO, which doesn't
1004;; allow #0.0 as an operand).
1005(define_insn "*vec_fcm<cmp_op><mode>"
1006 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1007 (unspec:<VPRED>
1008 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1009 (match_operand:SVE_F 2 "register_operand" "w, w")
1010 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
1011 SVE_COND_FP_CMP))]
1012 "TARGET_SVE"
1013 "@
1014 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
1015 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1016)
1017
1018;; Predicated FCMUO.
1019(define_insn "*vec_fcmuo<mode>"
1020 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
1021 (unspec:<VPRED>
1022 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1023 (match_operand:SVE_F 2 "register_operand" "w")
1024 (match_operand:SVE_F 3 "register_operand" "w")]
1025 UNSPEC_COND_UO))]
1026 "TARGET_SVE"
1027 "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1028)
1029
1030;; vcond_mask operand order: true, false, mask
1031;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
1032;; SEL operand order: mask, true, false
1033(define_insn "vcond_mask_<mode><vpred>"
1034 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1035 (unspec:SVE_ALL
1036 [(match_operand:<VPRED> 3 "register_operand" "Upa")
1037 (match_operand:SVE_ALL 1 "register_operand" "w")
1038 (match_operand:SVE_ALL 2 "register_operand" "w")]
1039 UNSPEC_SEL))]
1040 "TARGET_SVE"
1041 "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>"
1042)
1043
1044;; Selects between a duplicated immediate and zero.
1045(define_insn "aarch64_sve_dup<mode>_const"
1046 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1047 (unspec:SVE_I
1048 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1049 (match_operand:SVE_I 2 "aarch64_sve_dup_immediate")
1050 (match_operand:SVE_I 3 "aarch64_simd_imm_zero")]
1051 UNSPEC_SEL))]
1052 "TARGET_SVE"
1053 "mov\t%0.<Vetype>, %1/z, #%2"
1054)
1055
1056;; Integer (signed) vcond. Don't enforce an immediate range here, since it
1057;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
1058(define_expand "vcond<mode><v_int_equiv>"
1059 [(set (match_operand:SVE_ALL 0 "register_operand")
1060 (if_then_else:SVE_ALL
1061 (match_operator 3 "comparison_operator"
1062 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
1063 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
1064 (match_operand:SVE_ALL 1 "register_operand")
1065 (match_operand:SVE_ALL 2 "register_operand")))]
1066 "TARGET_SVE"
1067 {
1068 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
1069 DONE;
1070 }
1071)
1072
1073;; Integer vcondu. Don't enforce an immediate range here, since it
1074;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
1075(define_expand "vcondu<mode><v_int_equiv>"
1076 [(set (match_operand:SVE_ALL 0 "register_operand")
1077 (if_then_else:SVE_ALL
1078 (match_operator 3 "comparison_operator"
1079 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
1080 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
1081 (match_operand:SVE_ALL 1 "register_operand")
1082 (match_operand:SVE_ALL 2 "register_operand")))]
1083 "TARGET_SVE"
1084 {
1085 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
1086 DONE;
1087 }
1088)
1089
1090;; Floating-point vcond. All comparisons except FCMUO allow a zero
1091;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO
1092;; with zero.
1093(define_expand "vcond<mode><v_fp_equiv>"
1094 [(set (match_operand:SVE_SD 0 "register_operand")
1095 (if_then_else:SVE_SD
1096 (match_operator 3 "comparison_operator"
1097 [(match_operand:<V_FP_EQUIV> 4 "register_operand")
1098 (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
1099 (match_operand:SVE_SD 1 "register_operand")
1100 (match_operand:SVE_SD 2 "register_operand")))]
1101 "TARGET_SVE"
1102 {
1103 aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
1104 DONE;
1105 }
1106)
1107
1108;; Signed integer comparisons. Don't enforce an immediate range here, since
1109;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
1110;; instead.
1111(define_expand "vec_cmp<mode><vpred>"
1112 [(parallel
1113 [(set (match_operand:<VPRED> 0 "register_operand")
1114 (match_operator:<VPRED> 1 "comparison_operator"
1115 [(match_operand:SVE_I 2 "register_operand")
1116 (match_operand:SVE_I 3 "nonmemory_operand")]))
1117 (clobber (reg:CC CC_REGNUM))])]
1118 "TARGET_SVE"
1119 {
1120 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
1121 operands[2], operands[3]);
1122 DONE;
1123 }
1124)
1125
1126;; Unsigned integer comparisons. Don't enforce an immediate range here, since
1127;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
1128;; instead.
1129(define_expand "vec_cmpu<mode><vpred>"
1130 [(parallel
1131 [(set (match_operand:<VPRED> 0 "register_operand")
1132 (match_operator:<VPRED> 1 "comparison_operator"
1133 [(match_operand:SVE_I 2 "register_operand")
1134 (match_operand:SVE_I 3 "nonmemory_operand")]))
1135 (clobber (reg:CC CC_REGNUM))])]
1136 "TARGET_SVE"
1137 {
1138 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
1139 operands[2], operands[3]);
1140 DONE;
1141 }
1142)
1143
1144;; Floating-point comparisons. All comparisons except FCMUO allow a zero
1145;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
1146;; with zero.
1147(define_expand "vec_cmp<mode><vpred>"
1148 [(set (match_operand:<VPRED> 0 "register_operand")
1149 (match_operator:<VPRED> 1 "comparison_operator"
1150 [(match_operand:SVE_F 2 "register_operand")
1151 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
1152 "TARGET_SVE"
1153 {
1154 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
1155 operands[2], operands[3], false);
1156 DONE;
1157 }
1158)
1159
1160;; Branch based on predicate equality or inequality.
1161(define_expand "cbranch<mode>4"
1162 [(set (pc)
1163 (if_then_else
1164 (match_operator 0 "aarch64_equality_operator"
1165 [(match_operand:PRED_ALL 1 "register_operand")
1166 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
1167 (label_ref (match_operand 3 ""))
1168 (pc)))]
1169 ""
1170 {
1171 rtx ptrue = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
1172 rtx pred;
1173 if (operands[2] == CONST0_RTX (<MODE>mode))
1174 pred = operands[1];
1175 else
1176 {
1177 pred = gen_reg_rtx (<MODE>mode);
1178 emit_insn (gen_pred_xor<mode>3 (pred, ptrue, operands[1],
1179 operands[2]));
1180 }
1181 emit_insn (gen_ptest_ptrue<mode> (ptrue, pred));
1182 operands[1] = gen_rtx_REG (CCmode, CC_REGNUM);
1183 operands[2] = const0_rtx;
1184 }
1185)
1186
1187;; Unpredicated integer MIN/MAX.
1188(define_expand "<su><maxmin><mode>3"
1189 [(set (match_operand:SVE_I 0 "register_operand")
1190 (unspec:SVE_I
1191 [(match_dup 3)
1192 (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand")
1193 (match_operand:SVE_I 2 "register_operand"))]
1194 UNSPEC_MERGE_PTRUE))]
1195 "TARGET_SVE"
1196 {
1197 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1198 }
1199)
1200
1201;; Integer MIN/MAX predicated with a PTRUE.
1202(define_insn "*<su><maxmin><mode>3"
1203 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1204 (unspec:SVE_I
1205 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1206 (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0")
1207 (match_operand:SVE_I 3 "register_operand" "w"))]
1208 UNSPEC_MERGE_PTRUE))]
1209 "TARGET_SVE"
1210 "<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1211)
1212
1213;; Unpredicated floating-point MIN/MAX.
1214(define_expand "<su><maxmin><mode>3"
1215 [(set (match_operand:SVE_F 0 "register_operand")
1216 (unspec:SVE_F
1217 [(match_dup 3)
1218 (FMAXMIN:SVE_F (match_operand:SVE_F 1 "register_operand")
1219 (match_operand:SVE_F 2 "register_operand"))]
1220 UNSPEC_MERGE_PTRUE))]
1221 "TARGET_SVE"
1222 {
1223 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1224 }
1225)
1226
1227;; Floating-point MIN/MAX predicated with a PTRUE.
1228(define_insn "*<su><maxmin><mode>3"
1229 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1230 (unspec:SVE_F
1231 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1232 (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0")
1233 (match_operand:SVE_F 3 "register_operand" "w"))]
1234 UNSPEC_MERGE_PTRUE))]
1235 "TARGET_SVE"
1236 "f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1237)
1238
1239;; Unpredicated fmin/fmax.
1240(define_expand "<maxmin_uns><mode>3"
1241 [(set (match_operand:SVE_F 0 "register_operand")
1242 (unspec:SVE_F
1243 [(match_dup 3)
1244 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")
1245 (match_operand:SVE_F 2 "register_operand")]
1246 FMAXMIN_UNS)]
1247 UNSPEC_MERGE_PTRUE))]
1248 "TARGET_SVE"
1249 {
1250 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1251 }
1252)
1253
1254;; fmin/fmax predicated with a PTRUE.
1255(define_insn "*<maxmin_uns><mode>3"
1256 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1257 (unspec:SVE_F
1258 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1259 (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0")
1260 (match_operand:SVE_F 3 "register_operand" "w")]
1261 FMAXMIN_UNS)]
1262 UNSPEC_MERGE_PTRUE))]
1263 "TARGET_SVE"
1264 "<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1265)
1266
1267;; Unpredicated integer add reduction.
1268(define_expand "reduc_plus_scal_<mode>"
1269 [(set (match_operand:<VEL> 0 "register_operand")
1270 (unspec:<VEL> [(match_dup 2)
1271 (match_operand:SVE_I 1 "register_operand")]
1272 UNSPEC_ADDV))]
1273 "TARGET_SVE"
1274 {
1275 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1276 }
1277)
1278
1279;; Predicated integer add reduction. The result is always 64-bits.
1280(define_insn "*reduc_plus_scal_<mode>"
1281 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1282 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1283 (match_operand:SVE_I 2 "register_operand" "w")]
1284 UNSPEC_ADDV))]
1285 "TARGET_SVE"
1286 "uaddv\t%d0, %1, %2.<Vetype>"
1287)
1288
1289;; Unpredicated floating-point add reduction.
1290(define_expand "reduc_plus_scal_<mode>"
1291 [(set (match_operand:<VEL> 0 "register_operand")
1292 (unspec:<VEL> [(match_dup 2)
1293 (match_operand:SVE_F 1 "register_operand")]
1294 UNSPEC_FADDV))]
1295 "TARGET_SVE"
1296 {
1297 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1298 }
1299)
1300
1301;; Predicated floating-point add reduction.
1302(define_insn "*reduc_plus_scal_<mode>"
1303 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1304 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1305 (match_operand:SVE_F 2 "register_operand" "w")]
1306 UNSPEC_FADDV))]
1307 "TARGET_SVE"
1308 "faddv\t%<Vetype>0, %1, %2.<Vetype>"
1309)
1310
1311;; Unpredicated integer MIN/MAX reduction.
1312(define_expand "reduc_<maxmin_uns>_scal_<mode>"
1313 [(set (match_operand:<VEL> 0 "register_operand")
1314 (unspec:<VEL> [(match_dup 2)
1315 (match_operand:SVE_I 1 "register_operand")]
1316 MAXMINV))]
1317 "TARGET_SVE"
1318 {
1319 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1320 }
1321)
1322
1323;; Predicated integer MIN/MAX reduction.
1324(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
1325 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1326 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1327 (match_operand:SVE_I 2 "register_operand" "w")]
1328 MAXMINV))]
1329 "TARGET_SVE"
1330 "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
1331)
1332
1333;; Unpredicated floating-point MIN/MAX reduction.
1334(define_expand "reduc_<maxmin_uns>_scal_<mode>"
1335 [(set (match_operand:<VEL> 0 "register_operand")
1336 (unspec:<VEL> [(match_dup 2)
1337 (match_operand:SVE_F 1 "register_operand")]
1338 FMAXMINV))]
1339 "TARGET_SVE"
1340 {
1341 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1342 }
1343)
1344
1345;; Predicated floating-point MIN/MAX reduction.
1346(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
1347 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1348 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1349 (match_operand:SVE_F 2 "register_operand" "w")]
1350 FMAXMINV))]
1351 "TARGET_SVE"
1352 "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
1353)
1354
1355;; Unpredicated floating-point addition.
1356(define_expand "add<mode>3"
1357 [(set (match_operand:SVE_F 0 "register_operand")
1358 (unspec:SVE_F
1359 [(match_dup 3)
1360 (plus:SVE_F
1361 (match_operand:SVE_F 1 "register_operand")
1362 (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))]
1363 UNSPEC_MERGE_PTRUE))]
1364 "TARGET_SVE"
1365 {
1366 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1367 }
1368)
1369
1370;; Floating-point addition predicated with a PTRUE.
1371(define_insn "*add<mode>3"
1372 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
1373 (unspec:SVE_F
1374 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
1375 (plus:SVE_F
1376 (match_operand:SVE_F 2 "register_operand" "%0, 0, w")
1377 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))]
1378 UNSPEC_MERGE_PTRUE))]
1379 "TARGET_SVE"
1380 "@
1381 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1382 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
1383 fadd\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
1384)
1385
1386;; Unpredicated floating-point subtraction.
1387(define_expand "sub<mode>3"
1388 [(set (match_operand:SVE_F 0 "register_operand")
1389 (unspec:SVE_F
1390 [(match_dup 3)
1391 (minus:SVE_F
1392 (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand")
1393 (match_operand:SVE_F 2 "register_operand"))]
1394 UNSPEC_MERGE_PTRUE))]
1395 "TARGET_SVE"
1396 {
1397 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1398 }
1399)
1400
1401;; Floating-point subtraction predicated with a PTRUE.
1402(define_insn "*sub<mode>3"
1403 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
1404 (unspec:SVE_F
1405 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
1406 (minus:SVE_F
1407 (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w")
1408 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))]
1409 UNSPEC_MERGE_PTRUE))]
1410 "TARGET_SVE
1411 && (register_operand (operands[2], <MODE>mode)
1412 || register_operand (operands[3], <MODE>mode))"
1413 "@
1414 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1415 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
1416 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
1417 fsub\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
1418)
1419
1420;; Unpredicated floating-point multiplication.
1421(define_expand "mul<mode>3"
1422 [(set (match_operand:SVE_F 0 "register_operand")
1423 (unspec:SVE_F
1424 [(match_dup 3)
1425 (mult:SVE_F
1426 (match_operand:SVE_F 1 "register_operand")
1427 (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))]
1428 UNSPEC_MERGE_PTRUE))]
1429 "TARGET_SVE"
1430 {
1431 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1432 }
1433)
1434
1435;; Floating-point multiplication predicated with a PTRUE.
1436(define_insn "*mul<mode>3"
1437 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1438 (unspec:SVE_F
1439 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1440 (mult:SVE_F
1441 (match_operand:SVE_F 2 "register_operand" "%0, w")
1442 (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))]
1443 UNSPEC_MERGE_PTRUE))]
1444 "TARGET_SVE"
1445 "@
1446 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1447 fmul\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
1448)
1449
1450;; Unpredicated fma (%0 = (%1 * %2) + %3).
1451(define_expand "fma<mode>4"
1452 [(set (match_operand:SVE_F 0 "register_operand")
1453 (unspec:SVE_F
1454 [(match_dup 4)
1455 (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
1456 (match_operand:SVE_F 2 "register_operand")
1457 (match_operand:SVE_F 3 "register_operand"))]
1458 UNSPEC_MERGE_PTRUE))]
1459 "TARGET_SVE"
1460 {
1461 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1462 }
1463)
1464
1465;; fma predicated with a PTRUE.
1466(define_insn "*fma<mode>4"
1467 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1468 (unspec:SVE_F
1469 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1470 (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w")
1471 (match_operand:SVE_F 4 "register_operand" "w, w")
1472 (match_operand:SVE_F 2 "register_operand" "w, 0"))]
1473 UNSPEC_MERGE_PTRUE))]
1474 "TARGET_SVE"
1475 "@
1476 fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
1477 fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
1478)
1479
1480;; Unpredicated fnma (%0 = (-%1 * %2) + %3).
1481(define_expand "fnma<mode>4"
1482 [(set (match_operand:SVE_F 0 "register_operand")
1483 (unspec:SVE_F
1484 [(match_dup 4)
1485 (fma:SVE_F (neg:SVE_F
1486 (match_operand:SVE_F 1 "register_operand"))
1487 (match_operand:SVE_F 2 "register_operand")
1488 (match_operand:SVE_F 3 "register_operand"))]
1489 UNSPEC_MERGE_PTRUE))]
1490 "TARGET_SVE"
1491 {
1492 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1493 }
1494)
1495
1496;; fnma predicated with a PTRUE.
1497(define_insn "*fnma<mode>4"
1498 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1499 (unspec:SVE_F
1500 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1501 (fma:SVE_F (neg:SVE_F
1502 (match_operand:SVE_F 3 "register_operand" "%0, w"))
1503 (match_operand:SVE_F 4 "register_operand" "w, w")
1504 (match_operand:SVE_F 2 "register_operand" "w, 0"))]
1505 UNSPEC_MERGE_PTRUE))]
1506 "TARGET_SVE"
1507 "@
1508 fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
1509 fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
1510)
1511
1512;; Unpredicated fms (%0 = (%1 * %2) - %3).
1513(define_expand "fms<mode>4"
1514 [(set (match_operand:SVE_F 0 "register_operand")
1515 (unspec:SVE_F
1516 [(match_dup 4)
1517 (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
1518 (match_operand:SVE_F 2 "register_operand")
1519 (neg:SVE_F
1520 (match_operand:SVE_F 3 "register_operand")))]
1521 UNSPEC_MERGE_PTRUE))]
1522 "TARGET_SVE"
1523 {
1524 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1525 }
1526)
1527
1528;; fms predicated with a PTRUE.
1529(define_insn "*fms<mode>4"
1530 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1531 (unspec:SVE_F
1532 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1533 (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w")
1534 (match_operand:SVE_F 4 "register_operand" "w, w")
1535 (neg:SVE_F
1536 (match_operand:SVE_F 2 "register_operand" "w, 0")))]
1537 UNSPEC_MERGE_PTRUE))]
1538 "TARGET_SVE"
1539 "@
1540 fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
1541 fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
1542)
1543
1544;; Unpredicated fnms (%0 = (-%1 * %2) - %3).
1545(define_expand "fnms<mode>4"
1546 [(set (match_operand:SVE_F 0 "register_operand")
1547 (unspec:SVE_F
1548 [(match_dup 4)
1549 (fma:SVE_F (neg:SVE_F
1550 (match_operand:SVE_F 1 "register_operand"))
1551 (match_operand:SVE_F 2 "register_operand")
1552 (neg:SVE_F
1553 (match_operand:SVE_F 3 "register_operand")))]
1554 UNSPEC_MERGE_PTRUE))]
1555 "TARGET_SVE"
1556 {
1557 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1558 }
1559)
1560
1561;; fnms predicated with a PTRUE.
1562(define_insn "*fnms<mode>4"
1563 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1564 (unspec:SVE_F
1565 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1566 (fma:SVE_F (neg:SVE_F
1567 (match_operand:SVE_F 3 "register_operand" "%0, w"))
1568 (match_operand:SVE_F 4 "register_operand" "w, w")
1569 (neg:SVE_F
1570 (match_operand:SVE_F 2 "register_operand" "w, 0")))]
1571 UNSPEC_MERGE_PTRUE))]
1572 "TARGET_SVE"
1573 "@
1574 fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
1575 fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
1576)
1577
1578;; Unpredicated floating-point division.
1579(define_expand "div<mode>3"
1580 [(set (match_operand:SVE_F 0 "register_operand")
1581 (unspec:SVE_F
1582 [(match_dup 3)
1583 (div:SVE_F (match_operand:SVE_F 1 "register_operand")
1584 (match_operand:SVE_F 2 "register_operand"))]
1585 UNSPEC_MERGE_PTRUE))]
1586 "TARGET_SVE"
1587 {
1588 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1589 }
1590)
1591
1592;; Floating-point division predicated with a PTRUE.
1593(define_insn "*div<mode>3"
1594 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1595 (unspec:SVE_F
1596 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1597 (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w")
1598 (match_operand:SVE_F 3 "register_operand" "w, 0"))]
1599 UNSPEC_MERGE_PTRUE))]
1600 "TARGET_SVE"
1601 "@
1602 fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1603 fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
1604)
1605
1606;; Unpredicated FNEG, FABS and FSQRT.
1607(define_expand "<optab><mode>2"
1608 [(set (match_operand:SVE_F 0 "register_operand")
1609 (unspec:SVE_F
1610 [(match_dup 2)
1611 (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))]
1612 UNSPEC_MERGE_PTRUE))]
1613 "TARGET_SVE"
1614 {
1615 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1616 }
1617)
1618
1619;; FNEG, FABS and FSQRT predicated with a PTRUE.
1620(define_insn "*<optab><mode>2"
1621 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1622 (unspec:SVE_F
1623 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1624 (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))]
1625 UNSPEC_MERGE_PTRUE))]
1626 "TARGET_SVE"
1627 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
1628)
1629
1630;; Unpredicated FRINTy.
1631(define_expand "<frint_pattern><mode>2"
1632 [(set (match_operand:SVE_F 0 "register_operand")
1633 (unspec:SVE_F
1634 [(match_dup 2)
1635 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")]
1636 FRINT)]
1637 UNSPEC_MERGE_PTRUE))]
1638 "TARGET_SVE"
1639 {
1640 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1641 }
1642)
1643
1644;; FRINTy predicated with a PTRUE.
1645(define_insn "*<frint_pattern><mode>2"
1646 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1647 (unspec:SVE_F
1648 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1649 (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")]
1650 FRINT)]
1651 UNSPEC_MERGE_PTRUE))]
1652 "TARGET_SVE"
1653 "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>"
1654)
1655
1656;; Unpredicated conversion of floats to integers of the same size (HF to HI,
1657;; SF to SI or DF to DI).
1658(define_expand "<fix_trunc_optab><mode><v_int_equiv>2"
1659 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
1660 (unspec:<V_INT_EQUIV>
1661 [(match_dup 2)
1662 (FIXUORS:<V_INT_EQUIV>
1663 (match_operand:SVE_F 1 "register_operand"))]
1664 UNSPEC_MERGE_PTRUE))]
1665 "TARGET_SVE"
1666 {
1667 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1668 }
1669)
1670
1671;; Conversion of SF to DI, SI or HI, predicated with a PTRUE.
1672(define_insn "*<fix_trunc_optab>v16hsf<mode>2"
1673 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1674 (unspec:SVE_HSDI
1675 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1676 (FIXUORS:SVE_HSDI
1677 (match_operand:VNx8HF 2 "register_operand" "w"))]
1678 UNSPEC_MERGE_PTRUE))]
1679 "TARGET_SVE"
1680 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h"
1681)
1682
1683;; Conversion of SF to DI or SI, predicated with a PTRUE.
1684(define_insn "*<fix_trunc_optab>vnx4sf<mode>2"
1685 [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
1686 (unspec:SVE_SDI
1687 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1688 (FIXUORS:SVE_SDI
1689 (match_operand:VNx4SF 2 "register_operand" "w"))]
1690 UNSPEC_MERGE_PTRUE))]
1691 "TARGET_SVE"
1692 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s"
1693)
1694
1695;; Conversion of DF to DI or SI, predicated with a PTRUE.
1696(define_insn "*<fix_trunc_optab>vnx2df<mode>2"
1697 [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
1698 (unspec:SVE_SDI
1699 [(match_operand:VNx2BI 1 "register_operand" "Upl")
1700 (FIXUORS:SVE_SDI
1701 (match_operand:VNx2DF 2 "register_operand" "w"))]
1702 UNSPEC_MERGE_PTRUE))]
1703 "TARGET_SVE"
1704 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d"
1705)
1706
1707;; Unpredicated conversion of integers to floats of the same size
1708;; (HI to HF, SI to SF or DI to DF).
1709(define_expand "<optab><v_int_equiv><mode>2"
1710 [(set (match_operand:SVE_F 0 "register_operand")
1711 (unspec:SVE_F
1712 [(match_dup 2)
1713 (FLOATUORS:SVE_F
1714 (match_operand:<V_INT_EQUIV> 1 "register_operand"))]
1715 UNSPEC_MERGE_PTRUE))]
1716 "TARGET_SVE"
1717 {
1718 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1719 }
1720)
1721
1722;; Conversion of DI, SI or HI to the same number of HFs, predicated
1723;; with a PTRUE.
1724(define_insn "*<optab><mode>vnx8hf2"
1725 [(set (match_operand:VNx8HF 0 "register_operand" "=w")
1726 (unspec:VNx8HF
1727 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1728 (FLOATUORS:VNx8HF
1729 (match_operand:SVE_HSDI 2 "register_operand" "w"))]
1730 UNSPEC_MERGE_PTRUE))]
1731 "TARGET_SVE"
1732 "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>"
1733)
1734
1735;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE.
1736(define_insn "*<optab><mode>vnx4sf2"
1737 [(set (match_operand:VNx4SF 0 "register_operand" "=w")
1738 (unspec:VNx4SF
1739 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1740 (FLOATUORS:VNx4SF
1741 (match_operand:SVE_SDI 2 "register_operand" "w"))]
1742 UNSPEC_MERGE_PTRUE))]
1743 "TARGET_SVE"
1744 "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>"
1745)
1746
1747;; Conversion of DI or SI to DF, predicated with a PTRUE.
1748(define_insn "*<optab><mode>vnx2df2"
1749 [(set (match_operand:VNx2DF 0 "register_operand" "=w")
1750 (unspec:VNx2DF
1751 [(match_operand:VNx2BI 1 "register_operand" "Upl")
1752 (FLOATUORS:VNx2DF
1753 (match_operand:SVE_SDI 2 "register_operand" "w"))]
1754 UNSPEC_MERGE_PTRUE))]
1755 "TARGET_SVE"
1756 "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>"
1757)
1758
1759;; Conversion of DFs to the same number of SFs, or SFs to the same number
1760;; of HFs.
1761(define_insn "*trunc<Vwide><mode>2"
1762 [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
1763 (unspec:SVE_HSF
1764 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
1765 (unspec:SVE_HSF
1766 [(match_operand:<VWIDE> 2 "register_operand" "w")]
1767 UNSPEC_FLOAT_CONVERT)]
1768 UNSPEC_MERGE_PTRUE))]
1769 "TARGET_SVE"
1770 "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>"
1771)
1772
1773;; Conversion of SFs to the same number of DFs, or HFs to the same number
1774;; of SFs.
1775(define_insn "*extend<mode><Vwide>2"
1776 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1777 (unspec:<VWIDE>
1778 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
1779 (unspec:<VWIDE>
1780 [(match_operand:SVE_HSF 2 "register_operand" "w")]
1781 UNSPEC_FLOAT_CONVERT)]
1782 UNSPEC_MERGE_PTRUE))]
1783 "TARGET_SVE"
1784 "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>"
1785)
1786
1787;; PUNPKHI and PUNPKLO.
1788(define_insn "vec_unpack<su>_<perm_hilo>_<mode>"
1789 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
1790 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
1791 UNPACK))]
1792 "TARGET_SVE"
1793 "punpk<perm_hilo>\t%0.h, %1.b"
1794)
1795
1796;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO.
1797(define_insn "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
1798 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1799 (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")]
1800 UNPACK))]
1801 "TARGET_SVE"
1802 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
1803)
1804
1805;; Used by the vec_unpacks_<perm_hilo>_<mode> expander to unpack the bit
1806;; representation of a VNx4SF or VNx8HF without conversion. The choice
1807;; between signed and unsigned isn't significant.
1808(define_insn "*vec_unpacku_<perm_hilo>_<mode>_no_convert"
1809 [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
1810 (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand" "w")]
1811 UNPACK_UNSIGNED))]
1812 "TARGET_SVE"
1813 "uunpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
1814)
1815
1816;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
1817;; First unpack the source without conversion, then float-convert the
1818;; unpacked source.
1819(define_expand "vec_unpacks_<perm_hilo>_<mode>"
1820 [(set (match_dup 2)
1821 (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
1822 UNPACK_UNSIGNED))
1823 (set (match_operand:<VWIDE> 0 "register_operand")
1824 (unspec:<VWIDE> [(match_dup 3)
1825 (unspec:<VWIDE> [(match_dup 2)] UNSPEC_FLOAT_CONVERT)]
1826 UNSPEC_MERGE_PTRUE))]
1827 "TARGET_SVE"
1828 {
1829 operands[2] = gen_reg_rtx (<MODE>mode);
1830 operands[3] = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode));
1831 }
1832)
1833
1834;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI
1835;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
1836;; unpacked VNx4SI to VNx2DF.
1837(define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
1838 [(set (match_dup 2)
1839 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
1840 UNPACK_UNSIGNED))
1841 (set (match_operand:VNx2DF 0 "register_operand")
1842 (unspec:VNx2DF [(match_dup 3)
1843 (FLOATUORS:VNx2DF (match_dup 4))]
1844 UNSPEC_MERGE_PTRUE))]
1845 "TARGET_SVE"
1846 {
1847 operands[2] = gen_reg_rtx (VNx2DImode);
1848 operands[3] = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode));
1849 operands[4] = gen_rtx_SUBREG (VNx4SImode, operands[2], 0);
1850 }
1851)
1852
1853;; Predicate pack. Use UZP1 on the narrower type, which discards
1854;; the high part of each wide element.
1855(define_insn "vec_pack_trunc_<Vwide>"
1856 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
1857 (unspec:PRED_BHS
1858 [(match_operand:<VWIDE> 1 "register_operand" "Upa")
1859 (match_operand:<VWIDE> 2 "register_operand" "Upa")]
1860 UNSPEC_PACK))]
1861 "TARGET_SVE"
1862 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
1863)
1864
1865;; Integer pack. Use UZP1 on the narrower type, which discards
1866;; the high part of each wide element.
1867(define_insn "vec_pack_trunc_<Vwide>"
1868 [(set (match_operand:SVE_BHSI 0 "register_operand" "=w")
1869 (unspec:SVE_BHSI
1870 [(match_operand:<VWIDE> 1 "register_operand" "w")
1871 (match_operand:<VWIDE> 2 "register_operand" "w")]
1872 UNSPEC_PACK))]
1873 "TARGET_SVE"
1874 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
1875)
1876
1877;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
1878;; the results into a single vector.
1879(define_expand "vec_pack_trunc_<Vwide>"
1880 [(set (match_dup 4)
1881 (unspec:SVE_HSF
1882 [(match_dup 3)
1883 (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")]
1884 UNSPEC_FLOAT_CONVERT)]
1885 UNSPEC_MERGE_PTRUE))
1886 (set (match_dup 5)
1887 (unspec:SVE_HSF
1888 [(match_dup 3)
1889 (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")]
1890 UNSPEC_FLOAT_CONVERT)]
1891 UNSPEC_MERGE_PTRUE))
1892 (set (match_operand:SVE_HSF 0 "register_operand")
1893 (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
1894 "TARGET_SVE"
1895 {
1896 operands[3] = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode));
1897 operands[4] = gen_reg_rtx (<MODE>mode);
1898 operands[5] = gen_reg_rtx (<MODE>mode);
1899 }
1900)
1901
1902;; Convert two vectors of DF to SI and pack the results into a single vector.
1903(define_expand "vec_pack_<su>fix_trunc_vnx2df"
1904 [(set (match_dup 4)
1905 (unspec:VNx4SI
1906 [(match_dup 3)
1907 (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))]
1908 UNSPEC_MERGE_PTRUE))
1909 (set (match_dup 5)
1910 (unspec:VNx4SI
1911 [(match_dup 3)
1912 (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))]
1913 UNSPEC_MERGE_PTRUE))
1914 (set (match_operand:VNx4SI 0 "register_operand")
1915 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
1916 "TARGET_SVE"
1917 {
1918 operands[3] = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode));
1919 operands[4] = gen_reg_rtx (VNx4SImode);
1920 operands[5] = gen_reg_rtx (VNx4SImode);
1921 }
1922)