1 ;; Machine description for AArch64 SVE.
2 ;; Copyright (C) 2009-2020 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; The file is organised into the following sections (search for the full
25 ;; ---- Note on the handling of big-endian SVE
26 ;; ---- Description of UNSPEC_PTEST
27 ;; ---- Description of UNSPEC_PRED_Z
28 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
29 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
30 ;; ---- Note on FFR handling
33 ;; ---- Moves of single vectors
34 ;; ---- Moves of multiple vectors
35 ;; ---- Moves of predicates
36 ;; ---- Moves relating to the FFR
39 ;; ---- Normal contiguous loads
40 ;; ---- Extending contiguous loads
41 ;; ---- First-faulting contiguous loads
42 ;; ---- First-faulting extending contiguous loads
43 ;; ---- Non-temporal contiguous loads
44 ;; ---- Normal gather loads
45 ;; ---- Extending gather loads
46 ;; ---- First-faulting gather loads
47 ;; ---- First-faulting extending gather loads
50 ;; ---- Contiguous prefetches
51 ;; ---- Gather prefetches
54 ;; ---- Normal contiguous stores
55 ;; ---- Truncating contiguous stores
56 ;; ---- Non-temporal contiguous stores
57 ;; ---- Normal scatter stores
58 ;; ---- Truncating scatter stores
61 ;; ---- [INT,FP] Duplicate element
62 ;; ---- [INT,FP] Initialize from individual elements
63 ;; ---- [INT] Linear series
64 ;; ---- [PRED] Duplicate element
66 ;; == Vector decomposition
67 ;; ---- [INT,FP] Extract index
68 ;; ---- [INT,FP] Extract active element
69 ;; ---- [PRED] Extract index
71 ;; == Unary arithmetic
72 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
73 ;; ---- [INT] General unary arithmetic corresponding to unspecs
74 ;; ---- [INT] Sign and zero extension
75 ;; ---- [INT] Truncation
76 ;; ---- [INT] Logical inverse
77 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
78 ;; ---- [FP] General unary arithmetic corresponding to unspecs
79 ;; ---- [PRED] Inverse
81 ;; == Binary arithmetic
82 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
83 ;; ---- [INT] Addition
84 ;; ---- [INT] Subtraction
85 ;; ---- [INT] Take address
86 ;; ---- [INT] Absolute difference
87 ;; ---- [INT] Saturating addition and subtraction
88 ;; ---- [INT] Highpart multiplication
89 ;; ---- [INT] Division
90 ;; ---- [INT] Binary logical operations
91 ;; ---- [INT] Binary logical operations (inverted second input)
92 ;; ---- [INT] Shifts (rounding towards -Inf)
93 ;; ---- [INT] Shifts (rounding towards 0)
94 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
95 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
96 ;; ---- [FP] General binary arithmetic corresponding to unspecs
98 ;; ---- [FP] Complex addition
99 ;; ---- [FP] Subtraction
100 ;; ---- [FP] Absolute difference
101 ;; ---- [FP] Multiplication
102 ;; ---- [FP] Binary logical operations
103 ;; ---- [FP] Sign copying
104 ;; ---- [FP] Maximum and minimum
105 ;; ---- [PRED] Binary logical operations
106 ;; ---- [PRED] Binary logical operations (inverted second input)
107 ;; ---- [PRED] Binary logical operations (inverted result)
109 ;; == Ternary arithmetic
110 ;; ---- [INT] MLA and MAD
111 ;; ---- [INT] MLS and MSB
112 ;; ---- [INT] Dot product
113 ;; ---- [INT] Sum of absolute differences
114 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
115 ;; ---- [FP] Complex multiply-add
116 ;; ---- [FP] Trigonometric multiply-add
118 ;; == Comparisons and selects
119 ;; ---- [INT,FP] Select based on predicates
120 ;; ---- [INT,FP] Compare and select
121 ;; ---- [INT] Comparisons
122 ;; ---- [INT] While tests
123 ;; ---- [FP] Direct comparisons
124 ;; ---- [FP] Absolute comparisons
125 ;; ---- [PRED] Select
126 ;; ---- [PRED] Test bits
129 ;; ---- [INT,FP] Conditional reductions
130 ;; ---- [INT] Tree reductions
131 ;; ---- [FP] Tree reductions
132 ;; ---- [FP] Left-to-right reductions
135 ;; ---- [INT,FP] General permutes
136 ;; ---- [INT,FP] Special-purpose unary permutes
137 ;; ---- [INT,FP] Special-purpose binary permutes
138 ;; ---- [PRED] Special-purpose unary permutes
139 ;; ---- [PRED] Special-purpose binary permutes
142 ;; ---- [INT<-INT] Packs
143 ;; ---- [INT<-INT] Unpacks
144 ;; ---- [INT<-FP] Conversions
145 ;; ---- [INT<-FP] Packs
146 ;; ---- [INT<-FP] Unpacks
147 ;; ---- [FP<-INT] Conversions
148 ;; ---- [FP<-INT] Packs
149 ;; ---- [FP<-INT] Unpacks
150 ;; ---- [FP<-FP] Packs
151 ;; ---- [FP<-FP] Unpacks
152 ;; ---- [PRED<-PRED] Packs
153 ;; ---- [PRED<-PRED] Unpacks
155 ;; == Vector partitioning
156 ;; ---- [PRED] Unary partitioning
157 ;; ---- [PRED] Binary partitioning
158 ;; ---- [PRED] Scalarization
160 ;; == Counting elements
161 ;; ---- [INT] Count elements in a pattern (scalar)
162 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
163 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
164 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
165 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
166 ;; ---- [INT] Count elements in a predicate (scalar)
167 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
168 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
169 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
170 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
172 ;; =========================================================================
174 ;; =========================================================================
176 ;; -------------------------------------------------------------------------
177 ;; ---- Note on the handling of big-endian SVE
178 ;; -------------------------------------------------------------------------
180 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
181 ;; same way as movdi or movti would: the first byte of memory goes
182 ;; into the most significant byte of the register and the last byte
183 ;; of memory goes into the least significant byte of the register.
184 ;; This is the most natural ordering for Advanced SIMD and matches
185 ;; the ABI layout for 64-bit and 128-bit vector types.
187 ;; As a result, the order of bytes within the register is what GCC
188 ;; expects for a big-endian target, and subreg offsets therefore work
189 ;; as expected, with the first element in memory having subreg offset 0
190 ;; and the last element in memory having the subreg offset associated
191 ;; with a big-endian lowpart. However, this ordering also means that
192 ;; GCC's lane numbering does not match the architecture's numbering:
193 ;; GCC always treats the element at the lowest address in memory
194 ;; (subreg offset 0) as element 0, while the architecture treats
195 ;; the least significant end of the register as element 0.
197 ;; The situation for SVE is different. We want the layout of the
198 ;; SVE register to be same for mov<mode> as it is for maskload<mode>:
199 ;; logically, a mov<mode> load must be indistinguishable from a
200 ;; maskload<mode> whose mask is all true. We therefore need the
201 ;; register layout to match LD1 rather than LDR. The ABI layout of
202 ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
204 ;; As a result, the architecture lane numbering matches GCC's lane
205 ;; numbering, with element 0 always being the first in memory.
208 ;; - Applying a subreg offset to a register does not give the element
209 ;; that GCC expects: the first element in memory has the subreg offset
210 ;; associated with a big-endian lowpart while the last element in memory
211 ;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
213 ;; - We cannot use LDR and STR for spill slots that might be accessed
214 ;; via subregs, since although the elements have the order GCC expects,
215 ;; the order of the bytes within the elements is different. We instead
216 ;; access spill slots via LD1 and ST1, using secondary reloads to
217 ;; reserve a predicate register.
219 ;; -------------------------------------------------------------------------
220 ;; ---- Description of UNSPEC_PTEST
221 ;; -------------------------------------------------------------------------
223 ;; SVE provides a PTEST instruction for testing the active lanes of a
224 ;; predicate and setting the flags based on the result. The associated
225 ;; condition code tests are:
227 ;; - any (= ne): at least one active bit is set
228 ;; - none (= eq): all active bits are clear (*)
229 ;; - first (= mi): the first active bit is set
230 ;; - nfrst (= pl): the first active bit is clear (*)
231 ;; - last (= cc): the last active bit is set
232 ;; - nlast (= cs): the last active bit is clear (*)
234 ;; where the conditions marked (*) are also true when there are no active
235 ;; lanes (i.e. when the governing predicate is a PFALSE). The flags results
236 ;; of a PTEST use the condition code mode CC_NZC.
238 ;; PTEST is always a .B operation (i.e. it always operates on VNx16BI).
239 ;; This means that for other predicate modes, we need a governing predicate
240 ;; in which all bits are defined.
242 ;; For example, most predicated .H operations ignore the odd bits of the
243 ;; governing predicate, so that an active lane is represented by the
244 ;; bits "1x" and an inactive lane by the bits "0x", where "x" can be
245 ;; any value. To test a .H predicate, we instead need "10" and "00"
246 ;; respectively, so that the condition only tests the even bits of the
249 ;; Several instructions set the flags as a side-effect, in the same way
250 ;; that a separate PTEST would. It's important for code quality that we
251 ;; use these flags results as often as possible, particularly in the case
252 ;; of WHILE* and RDFFR.
254 ;; Also, some of the instructions that set the flags are unpredicated
255 ;; and instead implicitly test all .B, .H, .S or .D elements, as though
256 ;; they were predicated on a PTRUE of that size. For example, a .S
257 ;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE
260 ;; We therefore need to represent PTEST operations in a way that
261 ;; makes it easy to combine them with both predicated and unpredicated
262 ;; operations, while using a VNx16BI governing predicate for all
263 ;; predicate modes. We do this using:
265 ;; (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST)
269 ;; - GP is the real VNx16BI governing predicate
271 ;; - CAST_GP is GP cast to the mode of OP. All bits dropped by casting
272 ;; GP to CAST_GP are guaranteed to be clear in GP.
274 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
275 ;; SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and
276 ;; SVE_MAYBE_NOT_PTRUE otherwise.
278 ;; - OP is the predicate we want to test, of the same mode as CAST_GP.
280 ;; -------------------------------------------------------------------------
281 ;; ---- Description of UNSPEC_PRED_Z
282 ;; -------------------------------------------------------------------------
284 ;; SVE integer comparisons are predicated and return zero for inactive
285 ;; lanes. Sometimes we use them with predicates that are all-true and
286 ;; sometimes we use them with general predicates.
288 ;; The integer comparisons also set the flags and so build-in the effect
289 ;; of a PTEST. We therefore want to be able to combine integer comparison
290 ;; patterns with PTESTs of the result. One difficulty with doing this is
291 ;; that (as noted above) the PTEST is always a .B operation and so can place
292 ;; stronger requirements on the governing predicate than the comparison does.
294 ;; For example, when applying a separate PTEST to the result of a full-vector
295 ;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a
296 ;; .B PTRUE. In constrast, the comparison might be predicated on either
297 ;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate
298 ;; bits don't matter for .H operations.
300 ;; We therefore can't rely on a full-vector comparison using the same
301 ;; predicate register as a following PTEST. We instead need to remember
302 ;; whether a comparison is known to be a full-vector comparison and use
303 ;; this information in addition to a check for equal predicate registers.
304 ;; At the same time, it's useful to have a common representation for all
305 ;; integer comparisons, so that they can be handled by a single set of
308 ;; We therefore take a similar approach to UNSPEC_PTEST above and use:
310 ;; (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z)
314 ;; - GP is the governing predicate, of mode <M:VPRED>
316 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
317 ;; SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE
320 ;; - CODE is the comparison code
322 ;; - OP0 and OP1 are the values being compared, of mode M
324 ;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero.
326 ;; -------------------------------------------------------------------------
327 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
328 ;; -------------------------------------------------------------------------
330 ;; Many SVE integer operations are predicated. We can generate them
331 ;; from four sources:
333 ;; (1) Using normal unpredicated optabs. In this case we need to create
334 ;; an all-true predicate register to act as the governing predicate
335 ;; for the SVE instruction. There are no inactive lanes, and thus
336 ;; the values of inactive lanes don't matter.
338 ;; (2) Using _x ACLE functions. In this case the function provides a
339 ;; specific predicate and some lanes might be inactive. However,
340 ;; as for (1), the values of the inactive lanes don't matter.
341 ;; We can make extra lanes active without changing the behavior
342 ;; (although for code-quality reasons we should avoid doing so
345 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
346 ;; These optabs have a predicate operand that specifies which lanes are
347 ;; active and another operand that provides the values of inactive lanes.
349 ;; (4) Using _m and _z ACLE functions. These functions map to the same
350 ;; patterns as (3), with the _z functions setting inactive lanes to zero
351 ;; and the _m functions setting the inactive lanes to one of the function
354 ;; For (1) and (2) we need a way of attaching the predicate to a normal
355 ;; unpredicated integer operation. We do this using:
357 ;; (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X)
359 ;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED
360 ;; is a predicate of mode <M:VPRED>. PRED might or might not be a PTRUE;
361 ;; it always is for (1), but might not be for (2).
363 ;; The unspec as a whole has the same value as (code:M ...) when PRED is
364 ;; all-true. It is always semantically valid to replace PRED with a PTRUE,
365 ;; but as noted above, we should only do so if there's a specific benefit.
367 ;; (The "_X" in the unspec is named after the ACLE functions in (2).)
369 ;; For (3) and (4) we can simply use the SVE port's normal representation
370 ;; of a predicate-based select:
372 ;; (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL)
374 ;; where INACTIVE specifies the values of inactive lanes.
376 ;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather
377 ;; than inserting the integer operation directly. This is mostly useful
378 ;; if we want the combine pass to merge an integer operation with an explicit
379 ;; vcond_mask (in other words, with a following SEL instruction). However,
380 ;; it's generally better to merge such operations at the gimple level
383 ;; -------------------------------------------------------------------------
384 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
385 ;; -------------------------------------------------------------------------
387 ;; Most SVE floating-point operations are predicated. We can generate
388 ;; them from four sources:
390 ;; (1) Using normal unpredicated optabs. In this case we need to create
391 ;; an all-true predicate register to act as the governing predicate
392 ;; for the SVE instruction. There are no inactive lanes, and thus
393 ;; the values of inactive lanes don't matter.
395 ;; (2) Using _x ACLE functions. In this case the function provides a
396 ;; specific predicate and some lanes might be inactive. However,
397 ;; as for (1), the values of the inactive lanes don't matter.
399 ;; The instruction must have the same exception behavior as the
400 ;; function call unless things like command-line flags specifically
401 ;; allow otherwise. For example, with -ffast-math, it is OK to
402 ;; raise exceptions for inactive lanes, but normally it isn't.
404 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
405 ;; These optabs have a predicate operand that specifies which lanes are
406 ;; active and another operand that provides the values of inactive lanes.
408 ;; (4) Using _m and _z ACLE functions. These functions map to the same
409 ;; patterns as (3), with the _z functions setting inactive lanes to zero
410 ;; and the _m functions setting the inactive lanes to one of the function
415 ;; - In (1), the predicate is known to be all true and the pattern can use
416 ;; unpredicated operations where available.
418 ;; - In (2), the predicate might or might not be all true. The pattern can
419 ;; use unpredicated instructions if the predicate is all-true or if things
420 ;; like command-line flags allow exceptions for inactive lanes.
422 ;; - (3) and (4) represent a native SVE predicated operation. Some lanes
423 ;; might be inactive and inactive lanes of the result must have specific
424 ;; values. There is no scope for using unpredicated instructions (and no
425 ;; reason to want to), so the question about command-line flags doesn't
428 ;; It would be inaccurate to model (2) as an rtx code like (sqrt ...)
429 ;; in combination with a separate predicate operand, e.g.
431 ;; (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl")
432 ;; (sqrt:SVE_FULL_F 2 "register_operand" "w")]
435 ;; because (sqrt ...) can raise an exception for any lane, including
436 ;; inactive ones. We therefore need to use an unspec instead.
438 ;; Also, (2) requires some way of distinguishing the case in which the
439 ;; predicate might have inactive lanes and cannot be changed from the
440 ;; case in which the predicate has no inactive lanes or can be changed.
441 ;; This information is also useful when matching combined FP patterns
442 ;; in which the predicates might not be equal.
444 ;; We therefore model FP operations as an unspec of the form:
446 ;; (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>)
450 ;; - PRED is the governing predicate.
452 ;; - STRICTNESS is a CONST_INT that conceptually has mode SI. It has the
453 ;; value SVE_STRICT_GP if PRED might have inactive lanes and if those
454 ;; lanes must remain inactive. It has the value SVE_RELAXED_GP otherwise.
456 ;; - OP0 OP1 ... are the normal input operands to the operation.
458 ;; - MNEMONIC is the mnemonic of the associated SVE instruction.
460 ;; -------------------------------------------------------------------------
461 ;; ---- Note on FFR handling
462 ;; -------------------------------------------------------------------------
464 ;; Logically we want to divide FFR-related instructions into regions
465 ;; that contain exactly one of:
467 ;; - a single write to the FFR
468 ;; - any number of reads from the FFR (but only one read is likely)
469 ;; - any number of LDFF1 and LDNF1 instructions
471 ;; However, LDFF1 and LDNF1 instructions should otherwise behave like
472 ;; normal loads as far as possible. This means that they should be
473 ;; schedulable within a region in the same way that LD1 would be,
474 ;; and they should be deleted as dead if the result is unused. The loads
475 ;; should therefore not write to the FFR, since that would both serialize
476 ;; the loads with respect to each other and keep the loads live for any
479 ;; We get around this by using a fake "FFR token" (FFRT) to help describe
480 ;; the dependencies. Writing to the FFRT starts a new "FFRT region",
481 ;; while using the FFRT keeps the instruction within its region.
484 ;; - Writes start a new FFRT region as well as setting the FFR:
486 ;; W1: parallel (FFRT = <new value>, FFR = <actual FFR value>)
488 ;; - Loads use an LD1-like instruction that also uses the FFRT, so that the
489 ;; loads stay within the same FFRT region:
491 ;; L1: load data while using the FFRT
493 ;; In addition, any FFRT region that includes a load also has at least one
496 ;; L2: FFR = update(FFR, FFRT) [type == no_insn]
498 ;; to make it clear that the region both reads from and writes to the FFR.
500 ;; - Reads do the following:
502 ;; R1: FFRT = FFR [type == no_insn]
503 ;; R2: read from the FFRT
504 ;; R3: FFRT = update(FFRT) [type == no_insn]
506 ;; R1 and R3 both create new FFRT regions, so that previous LDFF1s and
507 ;; LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s
508 ;; cannot move backwards across R3.
510 ;; This way, writes are only kept alive by later loads or reads,
511 ;; and write/read pairs fold normally. For two consecutive reads,
512 ;; the first R3 is made dead by the second R1, which in turn becomes
513 ;; redundant with the first R1. We then have:
515 ;; first R1: FFRT = FFR
516 ;; first read from the FFRT
517 ;; second read from the FFRT
518 ;; second R3: FFRT = update(FFRT)
520 ;; i.e. the two FFRT regions collapse into a single one with two
521 ;; independent reads.
523 ;; The model still prevents some valid optimizations though. For example,
524 ;; if all loads in an FFRT region are deleted as dead, nothing would remove
525 ;; the L2 instructions.
527 ;; =========================================================================
529 ;; =========================================================================
531 ;; -------------------------------------------------------------------------
532 ;; ---- Moves of single vectors
533 ;; -------------------------------------------------------------------------
535 ;; - MOV (including aliases)
536 ;; - LD1B (contiguous form)
541 ;; - ST1B (contiguous form)
546 ;; -------------------------------------------------------------------------
548 (define_expand "mov<mode>"
549 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
550 (match_operand:SVE_ALL 1 "general_operand"))]
553 /* Use the predicated load and store patterns where possible.
554 This is required for big-endian targets (see the comment at the
555 head of the file) and increases the addressing choices for
557 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
558 && can_create_pseudo_p ())
560 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
564 if (CONSTANT_P (operands[1]))
566 aarch64_expand_mov_immediate (operands[0], operands[1]);
570 /* Optimize subregs on big-endian targets: we can use REV[BHW]
571 instead of going through memory. */
573 && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
578 (define_expand "movmisalign<mode>"
579 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
580 (match_operand:SVE_ALL 1 "general_operand"))]
583 /* Equivalent to a normal move for our purpooses. */
584 emit_move_insn (operands[0], operands[1]);
589 ;; Unpredicated moves that can use LDR and STR, i.e. full vectors for which
590 ;; little-endian ordering is acceptable. Only allow memory operations during
591 ;; and after RA; before RA we want the predicated load and store patterns to
593 (define_insn "*aarch64_sve_mov<mode>_ldr_str"
594 [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
595 (match_operand:SVE_FULL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
597 && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
598 && ((lra_in_progress || reload_completed)
599 || (register_operand (operands[0], <MODE>mode)
600 && nonmemory_operand (operands[1], <MODE>mode)))"
605 * return aarch64_output_sve_mov_immediate (operands[1]);"
608 ;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
609 ;; or vectors for which little-endian ordering isn't acceptable. Memory
610 ;; accesses require secondary reloads.
611 (define_insn "*aarch64_sve_mov<mode>_no_ldr_str"
612 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
613 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
615 && <MODE>mode != VNx16QImode
617 || maybe_ne (BYTES_PER_SVE_VECTOR, GET_MODE_SIZE (<MODE>mode)))"
620 * return aarch64_output_sve_mov_immediate (operands[1]);"
623 ;; Handle memory reloads for modes that can't use LDR and STR. We use
624 ;; byte PTRUE for all modes to try to encourage reuse. This pattern
625 ;; needs constraints because it is returned by TARGET_SECONDARY_RELOAD.
626 (define_expand "aarch64_sve_reload_mem"
628 [(set (match_operand 0)
630 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
633 /* Create a PTRUE. */
634 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
636 /* Refer to the PTRUE in the appropriate mode for this move. */
637 machine_mode mode = GET_MODE (operands[0]);
638 rtx pred = gen_lowpart (aarch64_sve_pred_mode (mode), operands[2]);
640 /* Emit a predicated load or store. */
641 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
646 ;; A predicated move in which the predicate is known to be all-true.
647 ;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
648 ;; so changes to this pattern will need changes there as well.
649 (define_insn_and_split "@aarch64_pred_mov<mode>"
650 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
652 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
653 (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")]
656 && (register_operand (operands[0], <MODE>mode)
657 || register_operand (operands[2], <MODE>mode))"
660 ld1<Vesize>\t%0.<Vctype>, %1/z, %2
661 st1<Vesize>\t%2.<Vctype>, %1, %0"
662 "&& register_operand (operands[0], <MODE>mode)
663 && register_operand (operands[2], <MODE>mode)"
664 [(set (match_dup 0) (match_dup 2))]
667 ;; A pattern for optimizing SUBREGs that have a reinterpreting effect
668 ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
669 ;; for details. We use a special predicate for operand 2 to reduce
670 ;; the number of patterns.
671 (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
672 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
674 [(match_operand:VNx16BI 1 "register_operand" "Upl")
675 (match_operand 2 "aarch64_any_register_operand" "w")]
677 "TARGET_SVE && BYTES_BIG_ENDIAN"
679 "&& reload_completed"
682 aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
687 ;; Reinterpret operand 1 in operand 0's mode, without changing its contents.
688 ;; This is equivalent to a subreg on little-endian targets but not for
689 ;; big-endian; see the comment at the head of the file for details.
690 (define_expand "@aarch64_sve_reinterpret<mode>"
691 [(set (match_operand:SVE_ALL 0 "register_operand")
693 [(match_operand 1 "aarch64_any_register_operand")]
694 UNSPEC_REINTERPRET))]
697 machine_mode src_mode = GET_MODE (operands[1]);
698 if (targetm.can_change_mode_class (<MODE>mode, src_mode, FP_REGS))
700 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1]));
706 ;; A pattern for handling type punning on big-endian targets. We use a
707 ;; special predicate for operand 1 to reduce the number of patterns.
708 (define_insn_and_split "*aarch64_sve_reinterpret<mode>"
709 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
711 [(match_operand 1 "aarch64_any_register_operand" "w")]
712 UNSPEC_REINTERPRET))]
715 "&& reload_completed"
716 [(set (match_dup 0) (match_dup 1))]
718 operands[1] = aarch64_replace_reg_mode (operands[1], <MODE>mode);
722 ;; -------------------------------------------------------------------------
723 ;; ---- Moves of multiple vectors
724 ;; -------------------------------------------------------------------------
725 ;; All patterns in this section are synthetic and split to real
726 ;; instructions after reload.
727 ;; -------------------------------------------------------------------------
729 (define_expand "mov<mode>"
730 [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
731 (match_operand:SVE_STRUCT 1 "general_operand"))]
734 /* Big-endian loads and stores need to be done via LD1 and ST1;
735 see the comment at the head of the file for details. */
736 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
739 gcc_assert (can_create_pseudo_p ());
740 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
744 if (CONSTANT_P (operands[1]))
746 aarch64_expand_mov_immediate (operands[0], operands[1]);
752 ;; Unpredicated structure moves (little-endian).
753 (define_insn "*aarch64_sve_mov<mode>_le"
754 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
755 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
756 "TARGET_SVE && !BYTES_BIG_ENDIAN"
758 [(set_attr "length" "<insn_length>")]
761 ;; Unpredicated structure moves (big-endian). Memory accesses require
762 ;; secondary reloads.
763 (define_insn "*aarch64_sve_mov<mode>_be"
764 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
765 (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
766 "TARGET_SVE && BYTES_BIG_ENDIAN"
768 [(set_attr "length" "<insn_length>")]
771 ;; Split unpredicated structure moves into pieces. This is the same
772 ;; for both big-endian and little-endian code, although it only needs
773 ;; to handle memory operands for little-endian code.
775 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
776 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
777 "TARGET_SVE && reload_completed"
780 rtx dest = operands[0];
781 rtx src = operands[1];
782 if (REG_P (dest) && REG_P (src))
783 aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
785 for (unsigned int i = 0; i < <vector_count>; ++i)
787 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
788 i * BYTES_PER_SVE_VECTOR);
789 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
790 i * BYTES_PER_SVE_VECTOR);
791 emit_insn (gen_rtx_SET (subdest, subsrc));
797 ;; Predicated structure moves. This works for both endiannesses but in
798 ;; practice is only useful for big-endian.
799 (define_insn_and_split "@aarch64_pred_mov<mode>"
800 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
802 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
803 (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
806 && (register_operand (operands[0], <MODE>mode)
807 || register_operand (operands[2], <MODE>mode))"
809 "&& reload_completed"
812 for (unsigned int i = 0; i < <vector_count>; ++i)
814 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
816 i * BYTES_PER_SVE_VECTOR);
817 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
819 i * BYTES_PER_SVE_VECTOR);
820 aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
824 [(set_attr "length" "<insn_length>")]
827 ;; -------------------------------------------------------------------------
828 ;; ---- Moves of predicates
829 ;; -------------------------------------------------------------------------
837 ;; -------------------------------------------------------------------------
839 (define_expand "mov<mode>"
840 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
841 (match_operand:PRED_ALL 1 "general_operand"))]
844 if (GET_CODE (operands[0]) == MEM)
845 operands[1] = force_reg (<MODE>mode, operands[1]);
847 if (CONSTANT_P (operands[1]))
849 aarch64_expand_mov_immediate (operands[0], operands[1]);
855 (define_insn "*aarch64_sve_mov<mode>"
856 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa")
857 (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))]
859 && (register_operand (operands[0], <MODE>mode)
860 || register_operand (operands[1], <MODE>mode))"
865 * return aarch64_output_sve_mov_immediate (operands[1]);"
868 ;; Match PTRUES Pn.B when both the predicate and flags are useful.
869 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc"
870 [(set (reg:CC_NZC CC_REGNUM)
874 (const_int SVE_KNOWN_PTRUE)
875 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
877 [(match_operand:SI 4 "const_int_operand")
878 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
881 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
885 return aarch64_output_sve_ptrues (operands[1]);
887 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
889 operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
893 ;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful.
894 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_cc"
895 [(set (reg:CC_NZC CC_REGNUM)
899 (const_int SVE_KNOWN_PTRUE)
901 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
903 [(match_operand:SI 4 "const_int_operand")
904 (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
907 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
911 return aarch64_output_sve_ptrues (operands[1]);
913 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
915 operands[2] = CONSTM1_RTX (VNx16BImode);
916 operands[3] = CONSTM1_RTX (<MODE>mode);
920 ;; Match PTRUES Pn.B when only the flags result is useful (which is
921 ;; a way of testing VL).
922 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest"
923 [(set (reg:CC_NZC CC_REGNUM)
927 (const_int SVE_KNOWN_PTRUE)
928 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
930 [(match_operand:SI 4 "const_int_operand")
931 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
934 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
937 return aarch64_output_sve_ptrues (operands[1]);
939 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
941 operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
945 ;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is
946 ;; a way of testing VL).
947 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest"
948 [(set (reg:CC_NZC CC_REGNUM)
952 (const_int SVE_KNOWN_PTRUE)
954 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
956 [(match_operand:SI 4 "const_int_operand")
957 (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
960 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
963 return aarch64_output_sve_ptrues (operands[1]);
965 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
967 operands[2] = CONSTM1_RTX (VNx16BImode);
968 operands[3] = CONSTM1_RTX (<MODE>mode);
972 ;; -------------------------------------------------------------------------
973 ;; ---- Moves relating to the FFR
974 ;; -------------------------------------------------------------------------
979 ;; -------------------------------------------------------------------------
981 ;; [W1 in the block comment above about FFR handling]
983 ;; Write to the FFR and start a new FFRT scheduling region.
984 (define_insn "aarch64_wrffr"
985 [(set (reg:VNx16BI FFR_REGNUM)
986 (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one" "Dm, Upa"))
987 (set (reg:VNx16BI FFRT_REGNUM)
988 (unspec:VNx16BI [(match_dup 0)] UNSPEC_WRFFR))]
995 ;; [L2 in the block comment above about FFR handling]
997 ;; Introduce a read from and write to the FFR in the current FFRT region,
998 ;; so that the FFR value is live on entry to the region and so that the FFR
999 ;; value visibly changes within the region. This is used (possibly multiple
1000 ;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions.
1001 (define_insn "aarch64_update_ffr_for_load"
1002 [(set (reg:VNx16BI FFR_REGNUM)
1003 (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)
1004 (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))]
1007 [(set_attr "type" "no_insn")]
1010 ;; [R1 in the block comment above about FFR handling]
1012 ;; Notionally copy the FFR to the FFRT, so that the current FFR value
1013 ;; can be read from there by the RDFFR instructions below. This acts
1014 ;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and
1015 ;; creates a natural dependency with earlier writes.
1016 (define_insn "aarch64_copy_ffr_to_ffrt"
1017 [(set (reg:VNx16BI FFRT_REGNUM)
1018 (reg:VNx16BI FFR_REGNUM))]
1021 [(set_attr "type" "no_insn")]
1024 ;; [R2 in the block comment above about FFR handling]
1026 ;; Read the FFR via the FFRT.
1027 (define_insn "aarch64_rdffr"
1028 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1029 (reg:VNx16BI FFRT_REGNUM))]
1034 ;; Likewise with zero predication.
1035 (define_insn "aarch64_rdffr_z"
1036 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1038 (reg:VNx16BI FFRT_REGNUM)
1039 (match_operand:VNx16BI 1 "register_operand" "Upa")))]
1044 ;; Read the FFR to test for a fault, without using the predicate result.
1045 (define_insn "*aarch64_rdffr_z_ptest"
1046 [(set (reg:CC_NZC CC_REGNUM)
1048 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1050 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1052 (reg:VNx16BI FFRT_REGNUM)
1055 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1057 "rdffrs\t%0.b, %1/z"
1060 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1061 (define_insn "*aarch64_rdffr_ptest"
1062 [(set (reg:CC_NZC CC_REGNUM)
1064 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1066 (const_int SVE_KNOWN_PTRUE)
1067 (reg:VNx16BI FFRT_REGNUM)]
1069 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1071 "rdffrs\t%0.b, %1/z"
1074 ;; Read the FFR with zero predication and test the result.
1075 (define_insn "*aarch64_rdffr_z_cc"
1076 [(set (reg:CC_NZC CC_REGNUM)
1078 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1080 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1082 (reg:VNx16BI FFRT_REGNUM)
1085 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1087 (reg:VNx16BI FFRT_REGNUM)
1090 "rdffrs\t%0.b, %1/z"
1093 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1094 (define_insn "*aarch64_rdffr_cc"
1095 [(set (reg:CC_NZC CC_REGNUM)
1097 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1099 (const_int SVE_KNOWN_PTRUE)
1100 (reg:VNx16BI FFRT_REGNUM)]
1102 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1103 (reg:VNx16BI FFRT_REGNUM))]
1105 "rdffrs\t%0.b, %1/z"
1108 ;; [R3 in the block comment above about FFR handling]
1110 ;; Arbitrarily update the FFRT after a read from the FFR. This acts as
1111 ;; a scheduling barrier for later LDFF1 and LDNF1 instructions.
1112 (define_insn "aarch64_update_ffrt"
1113 [(set (reg:VNx16BI FFRT_REGNUM)
1114 (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))]
1117 [(set_attr "type" "no_insn")]
1120 ;; =========================================================================
1122 ;; =========================================================================
1124 ;; -------------------------------------------------------------------------
1125 ;; ---- Normal contiguous loads
1126 ;; -------------------------------------------------------------------------
1127 ;; Includes contiguous forms of:
1144 ;; -------------------------------------------------------------------------
1147 (define_insn "maskload<mode><vpred>"
1148 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1150 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1151 (match_operand:SVE_ALL 1 "memory_operand" "m")]
1154 "ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
1157 ;; Unpredicated LD[234].
1158 (define_expand "vec_load_lanes<mode><vsingle>"
1159 [(set (match_operand:SVE_STRUCT 0 "register_operand")
1162 (match_operand:SVE_STRUCT 1 "memory_operand")]
1166 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1170 ;; Predicated LD[234].
1171 (define_insn "vec_mask_load_lanes<mode><vsingle>"
1172 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
1174 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1175 (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
1178 "ld<vector_count><Vesize>\t%0, %2/z, %1"
1181 ;; -------------------------------------------------------------------------
1182 ;; ---- Extending contiguous loads
1183 ;; -------------------------------------------------------------------------
1184 ;; Includes contiguous forms of:
1191 ;; -------------------------------------------------------------------------
1193 ;; Predicated load and extend, with 8 elements per 128-bit block.
1194 (define_insn_and_rewrite "@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1195 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1197 [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1198 (ANY_EXTEND:SVE_HSDI
1199 (unspec:SVE_PARTIAL_I
1200 [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1201 (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
1204 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1205 "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1206 "&& !CONSTANT_P (operands[3])"
1208 operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1212 ;; -------------------------------------------------------------------------
1213 ;; ---- First-faulting contiguous loads
1214 ;; -------------------------------------------------------------------------
1215 ;; Includes contiguous forms of:
1224 ;; -------------------------------------------------------------------------
1226 ;; Contiguous non-extending first-faulting or non-faulting loads.
1227 (define_insn "@aarch64_ld<fn>f1<mode>"
1228 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1230 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1231 (match_operand:SVE_FULL 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1232 (reg:VNx16BI FFRT_REGNUM)]
1235 "ld<fn>f1<Vesize>\t%0.<Vetype>, %2/z, %1"
1238 ;; -------------------------------------------------------------------------
1239 ;; ---- First-faulting extending contiguous loads
1240 ;; -------------------------------------------------------------------------
1241 ;; Includes contiguous forms of:
1254 ;; -------------------------------------------------------------------------
1256 ;; Predicated first-faulting or non-faulting load and extend.
1257 (define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1258 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1260 [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1261 (ANY_EXTEND:SVE_HSDI
1262 (unspec:SVE_PARTIAL_I
1263 [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1264 (match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1265 (reg:VNx16BI FFRT_REGNUM)]
1268 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1269 "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1270 "&& !CONSTANT_P (operands[3])"
1272 operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1276 ;; -------------------------------------------------------------------------
1277 ;; ---- Non-temporal contiguous loads
1278 ;; -------------------------------------------------------------------------
1284 ;; -------------------------------------------------------------------------
1286 ;; Predicated contiguous non-temporal load.
1287 (define_insn "@aarch64_ldnt1<mode>"
1288 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1290 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1291 (match_operand:SVE_FULL 1 "memory_operand" "m")]
1294 "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1"
1297 ;; -------------------------------------------------------------------------
1298 ;; ---- Normal gather loads
1299 ;; -------------------------------------------------------------------------
1300 ;; Includes gather forms of:
1303 ;; -------------------------------------------------------------------------
1305 ;; Unpredicated gather loads.
1306 (define_expand "gather_load<mode><v_int_container>"
1307 [(set (match_operand:SVE_24 0 "register_operand")
1310 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1311 (match_operand:<V_INT_CONTAINER> 2 "register_operand")
1312 (match_operand:DI 3 "const_int_operand")
1313 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1314 (mem:BLK (scratch))]
1315 UNSPEC_LD1_GATHER))]
1318 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
1322 ;; Predicated gather loads for 32-bit elements. Operand 3 is true for
1323 ;; unsigned extension and false for signed extension.
1324 (define_insn "mask_gather_load<mode><v_int_container>"
1325 [(set (match_operand:SVE_4 0 "register_operand" "=w, w, w, w, w, w")
1327 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1328 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
1329 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1330 (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
1331 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1332 (mem:BLK (scratch))]
1333 UNSPEC_LD1_GATHER))]
1336 ld1<Vesize>\t%0.s, %5/z, [%2.s]
1337 ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1338 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1339 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1340 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1341 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1344 ;; Predicated gather loads for 64-bit elements. The value of operand 3
1345 ;; doesn't matter in this case.
1346 (define_insn "mask_gather_load<mode><v_int_container>"
1347 [(set (match_operand:SVE_2 0 "register_operand" "=w, w, w, w")
1349 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1350 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
1351 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1352 (match_operand:DI 3 "const_int_operand")
1353 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
1354 (mem:BLK (scratch))]
1355 UNSPEC_LD1_GATHER))]
1358 ld1<Vesize>\t%0.d, %5/z, [%2.d]
1359 ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1360 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1361 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1364 ;; Likewise, but with the offset being extended from 32 bits.
1365 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked"
1366 [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1368 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1369 (match_operand:DI 1 "register_operand" "rk, rk")
1373 (match_operand:VNx2SI 2 "register_operand" "w, w"))]
1375 (match_operand:DI 3 "const_int_operand")
1376 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1377 (mem:BLK (scratch))]
1378 UNSPEC_LD1_GATHER))]
1381 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw]
1382 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]"
1383 "&& !CONSTANT_P (operands[6])"
1385 operands[6] = CONSTM1_RTX (VNx2BImode);
1389 ;; Likewise, but with the offset being truncated to 32 bits and then
1391 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_sxtw"
1392 [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1394 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1395 (match_operand:DI 1 "register_operand" "rk, rk")
1400 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1402 (match_operand:DI 3 "const_int_operand")
1403 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1404 (mem:BLK (scratch))]
1405 UNSPEC_LD1_GATHER))]
1408 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1409 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1410 "&& !CONSTANT_P (operands[6])"
1412 operands[6] = CONSTM1_RTX (VNx2BImode);
1416 ;; Likewise, but with the offset being truncated to 32 bits and then
1418 (define_insn "*mask_gather_load<mode><v_int_container>_uxtw"
1419 [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1421 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1422 (match_operand:DI 1 "register_operand" "rk, rk")
1424 (match_operand:VNx2DI 2 "register_operand" "w, w")
1425 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1426 (match_operand:DI 3 "const_int_operand")
1427 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1428 (mem:BLK (scratch))]
1429 UNSPEC_LD1_GATHER))]
1432 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1433 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1436 ;; -------------------------------------------------------------------------
1437 ;; ---- Extending gather loads
1438 ;; -------------------------------------------------------------------------
1439 ;; Includes gather forms of:
1446 ;; -------------------------------------------------------------------------
1448 ;; Predicated extending gather loads for 32-bit elements. Operand 3 is
1449 ;; true for unsigned extension and false for signed extension.
1450 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>"
1451 [(set (match_operand:SVE_4HSI 0 "register_operand" "=w, w, w, w, w, w")
1453 [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1454 (ANY_EXTEND:SVE_4HSI
1456 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1457 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_4BHI:Vesize>" "Z, vg<SVE_4BHI:Vesize>, rk, rk, rk, rk")
1458 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1459 (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
1460 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1461 (mem:BLK (scratch))]
1462 UNSPEC_LD1_GATHER))]
1464 "TARGET_SVE && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0"
1466 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s]
1467 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1468 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1469 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1470 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1471 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1472 "&& !CONSTANT_P (operands[6])"
1474 operands[6] = CONSTM1_RTX (VNx4BImode);
1478 ;; Predicated extending gather loads for 64-bit elements. The value of
1479 ;; operand 3 doesn't matter in this case.
1480 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>"
1481 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w, w, w")
1483 [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1484 (ANY_EXTEND:SVE_2HSDI
1486 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1487 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_2BHSI:Vesize>" "Z, vg<SVE_2BHSI:Vesize>, rk, rk")
1488 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1489 (match_operand:DI 3 "const_int_operand")
1490 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, Ui1, Ui1, i")
1491 (mem:BLK (scratch))]
1492 UNSPEC_LD1_GATHER))]
1494 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1496 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d]
1497 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1498 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d]
1499 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1500 "&& !CONSTANT_P (operands[6])"
1502 operands[6] = CONSTM1_RTX (VNx2BImode);
1506 ;; Likewise, but with the offset being extended from 32 bits.
1507 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked"
1508 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1511 (ANY_EXTEND:SVE_2HSDI
1513 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1514 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1518 (match_operand:VNx2SI 2 "register_operand" "w, w"))]
1520 (match_operand:DI 3 "const_int_operand")
1521 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1522 (mem:BLK (scratch))]
1523 UNSPEC_LD1_GATHER))]
1525 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1527 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw]
1528 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]"
1529 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1531 operands[6] = CONSTM1_RTX (VNx2BImode);
1532 operands[7] = CONSTM1_RTX (VNx2BImode);
1536 ;; Likewise, but with the offset being truncated to 32 bits and then
1538 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw"
1539 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1542 (ANY_EXTEND:SVE_2HSDI
1544 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1545 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1550 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1552 (match_operand:DI 3 "const_int_operand")
1553 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1554 (mem:BLK (scratch))]
1555 UNSPEC_LD1_GATHER))]
1557 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1559 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1560 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1561 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1563 operands[6] = CONSTM1_RTX (VNx2BImode);
1564 operands[7] = CONSTM1_RTX (VNx2BImode);
1568 ;; Likewise, but with the offset being truncated to 32 bits and then
1570 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw"
1571 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1574 (ANY_EXTEND:SVE_2HSDI
1576 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1577 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1579 (match_operand:VNx2DI 2 "register_operand" "w, w")
1580 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1581 (match_operand:DI 3 "const_int_operand")
1582 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1583 (mem:BLK (scratch))]
1584 UNSPEC_LD1_GATHER))]
1586 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1588 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1589 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1590 "&& !CONSTANT_P (operands[7])"
1592 operands[7] = CONSTM1_RTX (VNx2BImode);
1596 ;; -------------------------------------------------------------------------
1597 ;; ---- First-faulting gather loads
1598 ;; -------------------------------------------------------------------------
1599 ;; Includes gather forms of:
1602 ;; -------------------------------------------------------------------------
1604 ;; Predicated first-faulting gather loads for 32-bit elements. Operand
1605 ;; 3 is true for unsigned extension and false for signed extension.
1606 (define_insn "@aarch64_ldff1_gather<mode>"
1607 [(set (match_operand:SVE_FULL_S 0 "register_operand" "=w, w, w, w, w, w")
1609 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1610 (match_operand:DI 1 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk")
1611 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1612 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1613 (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i")
1615 (reg:VNx16BI FFRT_REGNUM)]
1616 UNSPEC_LDFF1_GATHER))]
1619 ldff1w\t%0.s, %5/z, [%2.s]
1620 ldff1w\t%0.s, %5/z, [%2.s, #%1]
1621 ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw]
1622 ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw]
1623 ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1624 ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1627 ;; Predicated first-faulting gather loads for 64-bit elements. The value
1628 ;; of operand 3 doesn't matter in this case.
1629 (define_insn "@aarch64_ldff1_gather<mode>"
1630 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w, w, w")
1632 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1633 (match_operand:DI 1 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk")
1634 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1635 (match_operand:DI 3 "const_int_operand")
1636 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i")
1638 (reg:VNx16BI FFRT_REGNUM)]
1639 UNSPEC_LDFF1_GATHER))]
1642 ldff1d\t%0.d, %5/z, [%2.d]
1643 ldff1d\t%0.d, %5/z, [%2.d, #%1]
1644 ldff1d\t%0.d, %5/z, [%1, %2.d]
1645 ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1648 ;; Likewise, but with the offset being sign-extended from 32 bits.
1649 (define_insn_and_rewrite "*aarch64_ldff1_gather<mode>_sxtw"
1650 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1652 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1653 (match_operand:DI 1 "register_operand" "rk, rk")
1658 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1660 (match_operand:DI 3 "const_int_operand")
1661 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1663 (reg:VNx16BI FFRT_REGNUM)]
1664 UNSPEC_LDFF1_GATHER))]
1667 ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw]
1668 ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1669 "&& !CONSTANT_P (operands[6])"
1671 operands[6] = CONSTM1_RTX (VNx2BImode);
1675 ;; Likewise, but with the offset being zero-extended from 32 bits.
1676 (define_insn "*aarch64_ldff1_gather<mode>_uxtw"
1677 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1679 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1680 (match_operand:DI 1 "register_operand" "rk, rk")
1682 (match_operand:VNx2DI 2 "register_operand" "w, w")
1683 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1684 (match_operand:DI 3 "const_int_operand")
1685 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1687 (reg:VNx16BI FFRT_REGNUM)]
1688 UNSPEC_LDFF1_GATHER))]
1691 ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw]
1692 ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1695 ;; -------------------------------------------------------------------------
1696 ;; ---- First-faulting extending gather loads
1697 ;; -------------------------------------------------------------------------
1698 ;; Includes gather forms of:
1705 ;; -------------------------------------------------------------------------
1707 ;; Predicated extending first-faulting gather loads for 32-bit elements.
1708 ;; Operand 3 is true for unsigned extension and false for signed extension.
1709 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
1710 [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w")
1712 [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1713 (ANY_EXTEND:VNx4_WIDE
1715 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1716 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
1717 (match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w")
1718 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1719 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1721 (reg:VNx16BI FFRT_REGNUM)]
1722 UNSPEC_LDFF1_GATHER))]
1726 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s]
1727 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1728 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1729 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1730 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1731 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1732 "&& !CONSTANT_P (operands[6])"
1734 operands[6] = CONSTM1_RTX (VNx4BImode);
1738 ;; Predicated extending first-faulting gather loads for 64-bit elements.
1739 ;; The value of operand 3 doesn't matter in this case.
1740 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
1741 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w")
1743 [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1744 (ANY_EXTEND:VNx2_WIDE
1746 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1747 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
1748 (match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w")
1749 (match_operand:DI 3 "const_int_operand")
1750 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
1752 (reg:VNx16BI FFRT_REGNUM)]
1753 UNSPEC_LDFF1_GATHER))]
1757 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d]
1758 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1759 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d]
1760 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1761 "&& !CONSTANT_P (operands[6])"
1763 operands[6] = CONSTM1_RTX (VNx2BImode);
1767 ;; Likewise, but with the offset being sign-extended from 32 bits.
1768 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw"
1769 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
1772 (ANY_EXTEND:VNx2_WIDE
1774 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1775 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1780 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1782 (match_operand:DI 3 "const_int_operand")
1783 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1785 (reg:VNx16BI FFRT_REGNUM)]
1786 UNSPEC_LDFF1_GATHER))]
1790 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1791 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1792 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1794 operands[6] = CONSTM1_RTX (VNx2BImode);
1795 operands[7] = CONSTM1_RTX (VNx2BImode);
1799 ;; Likewise, but with the offset being zero-extended from 32 bits.
1800 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw"
1801 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
1804 (ANY_EXTEND:VNx2_WIDE
1806 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1807 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1809 (match_operand:VNx2DI 2 "register_operand" "w, w")
1810 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1811 (match_operand:DI 3 "const_int_operand")
1812 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1814 (reg:VNx16BI FFRT_REGNUM)]
1815 UNSPEC_LDFF1_GATHER))]
1819 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1820 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1821 "&& !CONSTANT_P (operands[7])"
1823 operands[7] = CONSTM1_RTX (VNx2BImode);
1827 ;; =========================================================================
1829 ;; =========================================================================
1831 ;; -------------------------------------------------------------------------
1832 ;; ---- Contiguous prefetches
1833 ;; -------------------------------------------------------------------------
1834 ;; Includes contiguous forms of:
1839 ;; -------------------------------------------------------------------------
1841 ;; Contiguous predicated prefetches. Operand 2 gives the real prefetch
1842 ;; operation (as an svprfop), with operands 3 and 4 providing distilled
1844 (define_insn "@aarch64_sve_prefetch<mode>"
1845 [(prefetch (unspec:DI
1846 [(match_operand:<VPRED> 0 "register_operand" "Upl")
1847 (match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP<Vesize>")
1848 (match_operand:DI 2 "const_int_operand")]
1849 UNSPEC_SVE_PREFETCH)
1850 (match_operand:DI 3 "const_int_operand")
1851 (match_operand:DI 4 "const_int_operand"))]
1854 operands[1] = gen_rtx_MEM (<MODE>mode, operands[1]);
1855 return aarch64_output_sve_prefetch ("prf<Vesize>", operands[2], "%0, %1");
1859 ;; -------------------------------------------------------------------------
1860 ;; ---- Gather prefetches
1861 ;; -------------------------------------------------------------------------
1862 ;; Includes gather forms of:
1867 ;; -------------------------------------------------------------------------
1869 ;; Predicated gather prefetches for 32-bit bases and offsets. The operands
1871 ;; 0: the governing predicate
1872 ;; 1: the scalar component of the address
1873 ;; 2: the vector component of the address
1874 ;; 3: 1 for zero extension, 0 for sign extension
1875 ;; 4: the scale multiplier
1876 ;; 5: a vector zero that identifies the mode of data being accessed
1877 ;; 6: the prefetch operator (an svprfop)
1878 ;; 7: the normal RTL prefetch rw flag
1879 ;; 8: the normal RTL prefetch locality value
1880 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>"
1881 [(prefetch (unspec:DI
1882 [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1883 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk, rk, rk")
1884 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w")
1885 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1886 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1887 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1888 (match_operand:DI 6 "const_int_operand")]
1889 UNSPEC_SVE_PREFETCH_GATHER)
1890 (match_operand:DI 7 "const_int_operand")
1891 (match_operand:DI 8 "const_int_operand"))]
1894 static const char *const insns[][2] = {
1895 "prf<SVE_FULL_I:Vesize>", "%0, [%2.s]",
1896 "prf<SVE_FULL_I:Vesize>", "%0, [%2.s, #%1]",
1897 "prfb", "%0, [%1, %2.s, sxtw]",
1898 "prfb", "%0, [%1, %2.s, uxtw]",
1899 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, sxtw %p4]",
1900 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, uxtw %p4]"
1902 const char *const *parts = insns[which_alternative];
1903 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1907 ;; Predicated gather prefetches for 64-bit elements. The value of operand 3
1908 ;; doesn't matter in this case.
1909 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>"
1910 [(prefetch (unspec:DI
1911 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl")
1912 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk")
1913 (match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w")
1914 (match_operand:DI 3 "const_int_operand")
1915 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, i")
1916 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1917 (match_operand:DI 6 "const_int_operand")]
1918 UNSPEC_SVE_PREFETCH_GATHER)
1919 (match_operand:DI 7 "const_int_operand")
1920 (match_operand:DI 8 "const_int_operand"))]
1923 static const char *const insns[][2] = {
1924 "prf<SVE_FULL_I:Vesize>", "%0, [%2.d]",
1925 "prf<SVE_FULL_I:Vesize>", "%0, [%2.d, #%1]",
1926 "prfb", "%0, [%1, %2.d]",
1927 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, lsl %p4]"
1929 const char *const *parts = insns[which_alternative];
1930 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1934 ;; Likewise, but with the offset being sign-extended from 32 bits.
1935 (define_insn_and_rewrite "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw"
1936 [(prefetch (unspec:DI
1937 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
1938 (match_operand:DI 1 "register_operand" "rk, rk")
1943 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1945 (match_operand:DI 3 "const_int_operand")
1946 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
1947 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1948 (match_operand:DI 6 "const_int_operand")]
1949 UNSPEC_SVE_PREFETCH_GATHER)
1950 (match_operand:DI 7 "const_int_operand")
1951 (match_operand:DI 8 "const_int_operand"))]
1954 static const char *const insns[][2] = {
1955 "prfb", "%0, [%1, %2.d, sxtw]",
1956 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, sxtw %p4]"
1958 const char *const *parts = insns[which_alternative];
1959 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1961 "&& !rtx_equal_p (operands[0], operands[9])"
1963 operands[9] = copy_rtx (operands[0]);
1967 ;; Likewise, but with the offset being zero-extended from 32 bits.
1968 (define_insn "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw"
1969 [(prefetch (unspec:DI
1970 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
1971 (match_operand:DI 1 "register_operand" "rk, rk")
1973 (match_operand:VNx2DI 2 "register_operand" "w, w")
1974 (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate"))
1975 (match_operand:DI 3 "const_int_operand")
1976 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
1977 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1978 (match_operand:DI 6 "const_int_operand")]
1979 UNSPEC_SVE_PREFETCH_GATHER)
1980 (match_operand:DI 7 "const_int_operand")
1981 (match_operand:DI 8 "const_int_operand"))]
1984 static const char *const insns[][2] = {
1985 "prfb", "%0, [%1, %2.d, uxtw]",
1986 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, uxtw %p4]"
1988 const char *const *parts = insns[which_alternative];
1989 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1993 ;; =========================================================================
1995 ;; =========================================================================
1997 ;; -------------------------------------------------------------------------
1998 ;; ---- Normal contiguous stores
1999 ;; -------------------------------------------------------------------------
2000 ;; Includes contiguous forms of:
2017 ;; -------------------------------------------------------------------------
2020 (define_insn "maskstore<mode><vpred>"
2021 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
2023 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2024 (match_operand:SVE_ALL 1 "register_operand" "w")
2028 "st1<Vesize>\t%1.<Vctype>, %2, %0"
2031 ;; Unpredicated ST[234]. This is always a full update, so the dependence
2032 ;; on the old value of the memory location (via (match_dup 0)) is redundant.
2033 ;; There doesn't seem to be any obvious benefit to treating the all-true
2034 ;; case differently though. In particular, it's very unlikely that we'll
2035 ;; only find out during RTL that a store_lanes is dead.
2036 (define_expand "vec_store_lanes<mode><vsingle>"
2037 [(set (match_operand:SVE_STRUCT 0 "memory_operand")
2040 (match_operand:SVE_STRUCT 1 "register_operand")
2045 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2049 ;; Predicated ST[234].
2050 (define_insn "vec_mask_store_lanes<mode><vsingle>"
2051 [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
2053 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2054 (match_operand:SVE_STRUCT 1 "register_operand" "w")
2058 "st<vector_count><Vesize>\t%1, %2, %0"
2061 ;; -------------------------------------------------------------------------
2062 ;; ---- Truncating contiguous stores
2063 ;; -------------------------------------------------------------------------
2068 ;; -------------------------------------------------------------------------
2070 ;; Predicated truncate and store, with 8 elements per 128-bit block.
2071 (define_insn "@aarch64_store_trunc<VNx8_NARROW:mode><VNx8_WIDE:mode>"
2072 [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m")
2074 [(match_operand:VNx8BI 2 "register_operand" "Upl")
2075 (truncate:VNx8_NARROW
2076 (match_operand:VNx8_WIDE 1 "register_operand" "w"))
2080 "st1<VNx8_NARROW:Vesize>\t%1.<VNx8_WIDE:Vetype>, %2, %0"
2083 ;; Predicated truncate and store, with 4 elements per 128-bit block.
2084 (define_insn "@aarch64_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2085 [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m")
2087 [(match_operand:VNx4BI 2 "register_operand" "Upl")
2088 (truncate:VNx4_NARROW
2089 (match_operand:VNx4_WIDE 1 "register_operand" "w"))
2093 "st1<VNx4_NARROW:Vesize>\t%1.<VNx4_WIDE:Vetype>, %2, %0"
2096 ;; Predicated truncate and store, with 2 elements per 128-bit block.
2097 (define_insn "@aarch64_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2098 [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m")
2100 [(match_operand:VNx2BI 2 "register_operand" "Upl")
2101 (truncate:VNx2_NARROW
2102 (match_operand:VNx2_WIDE 1 "register_operand" "w"))
2106 "st1<VNx2_NARROW:Vesize>\t%1.<VNx2_WIDE:Vetype>, %2, %0"
2109 ;; -------------------------------------------------------------------------
2110 ;; ---- Non-temporal contiguous stores
2111 ;; -------------------------------------------------------------------------
2117 ;; -------------------------------------------------------------------------
2119 (define_insn "@aarch64_stnt1<mode>"
2120 [(set (match_operand:SVE_FULL 0 "memory_operand" "+m")
2122 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2123 (match_operand:SVE_FULL 1 "register_operand" "w")
2127 "stnt1<Vesize>\t%1.<Vetype>, %2, %0"
2130 ;; -------------------------------------------------------------------------
2131 ;; ---- Normal scatter stores
2132 ;; -------------------------------------------------------------------------
2133 ;; Includes scatter forms of:
2136 ;; -------------------------------------------------------------------------
2138 ;; Unpredicated scatter stores.
2139 (define_expand "scatter_store<mode><v_int_container>"
2140 [(set (mem:BLK (scratch))
2143 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2144 (match_operand:<V_INT_CONTAINER> 1 "register_operand")
2145 (match_operand:DI 2 "const_int_operand")
2146 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2147 (match_operand:SVE_24 4 "register_operand")]
2148 UNSPEC_ST1_SCATTER))]
2151 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
2155 ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
2156 ;; unsigned extension and false for signed extension.
2157 (define_insn "mask_scatter_store<mode><v_int_container>"
2158 [(set (mem:BLK (scratch))
2160 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2161 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
2162 (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2163 (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
2164 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2165 (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")]
2166 UNSPEC_ST1_SCATTER))]
2169 st1<Vesize>\t%4.s, %5, [%1.s]
2170 st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2171 st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2172 st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2173 st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2174 st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
2177 ;; Predicated scatter stores for 64-bit elements. The value of operand 2
2178 ;; doesn't matter in this case.
2179 (define_insn "mask_scatter_store<mode><v_int_container>"
2180 [(set (mem:BLK (scratch))
2182 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
2183 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
2184 (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
2185 (match_operand:DI 2 "const_int_operand")
2186 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
2187 (match_operand:SVE_2 4 "register_operand" "w, w, w, w")]
2188 UNSPEC_ST1_SCATTER))]
2191 st1<Vesize>\t%4.d, %5, [%1.d]
2192 st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2193 st1<Vesize>\t%4.d, %5, [%0, %1.d]
2194 st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
2197 ;; Likewise, but with the offset being extended from 32 bits.
2198 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked"
2199 [(set (mem:BLK (scratch))
2201 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2202 (match_operand:DI 0 "register_operand" "rk, rk")
2206 (match_operand:VNx2SI 1 "register_operand" "w, w"))]
2208 (match_operand:DI 2 "const_int_operand")
2209 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2210 (match_operand:SVE_2 4 "register_operand" "w, w")]
2211 UNSPEC_ST1_SCATTER))]
2214 st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw]
2215 st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]"
2216 "&& !CONSTANT_P (operands[6])"
2218 operands[6] = CONSTM1_RTX (<VPRED>mode);
2222 ;; Likewise, but with the offset being truncated to 32 bits and then
2224 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw"
2225 [(set (mem:BLK (scratch))
2227 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2228 (match_operand:DI 0 "register_operand" "rk, rk")
2233 (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2235 (match_operand:DI 2 "const_int_operand")
2236 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2237 (match_operand:SVE_2 4 "register_operand" "w, w")]
2238 UNSPEC_ST1_SCATTER))]
2241 st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2242 st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2243 "&& !CONSTANT_P (operands[6])"
2245 operands[6] = CONSTM1_RTX (<VPRED>mode);
2249 ;; Likewise, but with the offset being truncated to 32 bits and then
2251 (define_insn "*mask_scatter_store<mode><v_int_container>_uxtw"
2252 [(set (mem:BLK (scratch))
2254 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2255 (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2257 (match_operand:VNx2DI 1 "register_operand" "w, w")
2258 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2259 (match_operand:DI 2 "const_int_operand")
2260 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2261 (match_operand:SVE_2 4 "register_operand" "w, w")]
2262 UNSPEC_ST1_SCATTER))]
2265 st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2266 st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
2269 ;; -------------------------------------------------------------------------
2270 ;; ---- Truncating scatter stores
2271 ;; -------------------------------------------------------------------------
2272 ;; Includes scatter forms of:
2276 ;; -------------------------------------------------------------------------
2278 ;; Predicated truncating scatter stores for 32-bit elements. Operand 2 is
2279 ;; true for unsigned extension and false for signed extension.
2280 (define_insn "@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2281 [(set (mem:BLK (scratch))
2283 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2284 (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
2285 (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2286 (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
2287 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2288 (truncate:VNx4_NARROW
2289 (match_operand:VNx4_WIDE 4 "register_operand" "w, w, w, w, w, w"))]
2290 UNSPEC_ST1_SCATTER))]
2293 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s]
2294 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s, #%0]
2295 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2296 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2297 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2298 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
2301 ;; Predicated truncating scatter stores for 64-bit elements. The value of
2302 ;; operand 2 doesn't matter in this case.
2303 (define_insn "@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2304 [(set (mem:BLK (scratch))
2306 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
2307 (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
2308 (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
2309 (match_operand:DI 2 "const_int_operand")
2310 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
2311 (truncate:VNx2_NARROW
2312 (match_operand:VNx2_WIDE 4 "register_operand" "w, w, w, w"))]
2313 UNSPEC_ST1_SCATTER))]
2316 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d]
2317 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d, #%0]
2318 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d]
2319 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
2322 ;; Likewise, but with the offset being sign-extended from 32 bits.
2323 (define_insn_and_rewrite "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw"
2324 [(set (mem:BLK (scratch))
2326 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2327 (match_operand:DI 0 "register_operand" "rk, rk")
2332 (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2334 (match_operand:DI 2 "const_int_operand")
2335 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2336 (truncate:VNx2_NARROW
2337 (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2338 UNSPEC_ST1_SCATTER))]
2341 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2342 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2343 "&& !rtx_equal_p (operands[5], operands[6])"
2345 operands[6] = copy_rtx (operands[5]);
2349 ;; Likewise, but with the offset being zero-extended from 32 bits.
2350 (define_insn "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw"
2351 [(set (mem:BLK (scratch))
2353 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2354 (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2356 (match_operand:VNx2DI 1 "register_operand" "w, w")
2357 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2358 (match_operand:DI 2 "const_int_operand")
2359 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2360 (truncate:VNx2_NARROW
2361 (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2362 UNSPEC_ST1_SCATTER))]
2365 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2366 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
2369 ;; =========================================================================
2370 ;; == Vector creation
2371 ;; =========================================================================
2373 ;; -------------------------------------------------------------------------
2374 ;; ---- [INT,FP] Duplicate element
2375 ;; -------------------------------------------------------------------------
2387 ;; -------------------------------------------------------------------------
2389 (define_expand "vec_duplicate<mode>"
2391 [(set (match_operand:SVE_ALL 0 "register_operand")
2392 (vec_duplicate:SVE_ALL
2393 (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
2394 (clobber (scratch:VNx16BI))])]
2397 if (MEM_P (operands[1]))
2399 rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
2400 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
2401 CONST0_RTX (<MODE>mode)));
2407 ;; Accept memory operands for the benefit of combine, and also in case
2408 ;; the scalar input gets spilled to memory during RA. We want to split
2409 ;; the load at the first opportunity in order to allow the PTRUE to be
2410 ;; optimized with surrounding code.
2411 (define_insn_and_split "*vec_duplicate<mode>_reg"
2412 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
2413 (vec_duplicate:SVE_ALL
2414 (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
2415 (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
2418 mov\t%0.<Vetype>, %<vwcore>1
2419 mov\t%0.<Vetype>, %<Vetype>1
2421 "&& MEM_P (operands[1])"
2424 if (GET_CODE (operands[2]) == SCRATCH)
2425 operands[2] = gen_reg_rtx (VNx16BImode);
2426 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
2427 rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
2428 emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1],
2429 CONST0_RTX (<MODE>mode)));
2432 [(set_attr "length" "4,4,8")]
2435 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version).
2436 (define_insn "@aarch64_vec_duplicate_vq<mode>_le"
2437 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2438 (vec_duplicate:SVE_FULL
2439 (match_operand:<V128> 1 "register_operand" "w")))]
2440 "TARGET_SVE && !BYTES_BIG_ENDIAN"
2442 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2443 return "dup\t%0.q, %1.q[0]";
2447 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version).
2448 ;; The SVE register layout puts memory lane N into (architectural)
2449 ;; register lane N, whereas the Advanced SIMD layout puts the memory
2450 ;; lsb into the register lsb. We therefore have to describe this in rtl
2451 ;; terms as a reverse of the V128 vector followed by a duplicate.
2452 (define_insn "@aarch64_vec_duplicate_vq<mode>_be"
2453 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2454 (vec_duplicate:SVE_FULL
2456 (match_operand:<V128> 1 "register_operand" "w")
2457 (match_operand 2 "descending_int_parallel"))))]
2460 && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
2461 GET_MODE_NUNITS (<V128>mode) - 1)"
2463 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2464 return "dup\t%0.q, %1.q[0]";
2468 ;; This is used for vec_duplicate<mode>s from memory, but can also
2469 ;; be used by combine to optimize selects of a a vec_duplicate<mode>
2471 (define_insn "sve_ld1r<mode>"
2472 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
2474 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2475 (vec_duplicate:SVE_ALL
2476 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
2477 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
2480 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
2483 ;; Load 128 bits from memory under predicate control and duplicate to
2485 (define_insn "@aarch64_sve_ld1rq<mode>"
2486 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2488 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2489 (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")]
2493 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2494 return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1";
2498 (define_insn "@aarch64_sve_ld1ro<mode>"
2499 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2501 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2502 (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>"
2505 "TARGET_SVE && TARGET_F64MM"
2507 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2508 return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1";
2512 ;; -------------------------------------------------------------------------
2513 ;; ---- [INT,FP] Initialize from individual elements
2514 ;; -------------------------------------------------------------------------
2517 ;; -------------------------------------------------------------------------
2519 (define_expand "vec_init<mode><Vel>"
2520 [(match_operand:SVE_FULL 0 "register_operand")
2521 (match_operand 1 "")]
2524 aarch64_sve_expand_vector_init (operands[0], operands[1]);
2529 ;; Shift an SVE vector left and insert a scalar into element 0.
2530 (define_insn "vec_shl_insert_<mode>"
2531 [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??&w, ?&w")
2533 [(match_operand:SVE_FULL 1 "register_operand" "0, 0, w, w")
2534 (match_operand:<VEL> 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")]
2538 insr\t%0.<Vetype>, %<vwcore>2
2539 insr\t%0.<Vetype>, %<Vetype>2
2540 movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2
2541 movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2"
2542 [(set_attr "movprfx" "*,*,yes,yes")]
2545 ;; -------------------------------------------------------------------------
2546 ;; ---- [INT] Linear series
2547 ;; -------------------------------------------------------------------------
2550 ;; -------------------------------------------------------------------------
2552 (define_insn "vec_series<mode>"
2553 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
2555 (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
2556 (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
2559 index\t%0.<Vctype>, #%1, %<vccore>2
2560 index\t%0.<Vctype>, %<vccore>1, #%2
2561 index\t%0.<Vctype>, %<vccore>1, %<vccore>2"
2564 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
2565 ;; of an INDEX instruction.
2566 (define_insn "*vec_series<mode>_plus"
2567 [(set (match_operand:SVE_I 0 "register_operand" "=w")
2569 (vec_duplicate:SVE_I
2570 (match_operand:<VEL> 1 "register_operand" "r"))
2571 (match_operand:SVE_I 2 "immediate_operand")))]
2572 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
2574 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
2575 return "index\t%0.<Vctype>, %<vccore>1, #%2";
2579 ;; -------------------------------------------------------------------------
2580 ;; ---- [PRED] Duplicate element
2581 ;; -------------------------------------------------------------------------
2582 ;; The patterns in this section are synthetic.
2583 ;; -------------------------------------------------------------------------
2585 ;; Implement a predicate broadcast by shifting the low bit of the scalar
2586 ;; input into the top bit and using a WHILELO. An alternative would be to
2587 ;; duplicate the input and do a compare with zero.
2588 (define_expand "vec_duplicate<mode>"
2589 [(set (match_operand:PRED_ALL 0 "register_operand")
2590 (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))]
2593 rtx tmp = gen_reg_rtx (DImode);
2594 rtx op1 = gen_lowpart (DImode, operands[1]);
2595 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
2596 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
2601 ;; =========================================================================
2602 ;; == Vector decomposition
2603 ;; =========================================================================
2605 ;; -------------------------------------------------------------------------
2606 ;; ---- [INT,FP] Extract index
2607 ;; -------------------------------------------------------------------------
2609 ;; - DUP (Advanced SIMD)
2612 ;; - ST1 (Advanced SIMD)
2613 ;; - UMOV (Advanced SIMD)
2614 ;; -------------------------------------------------------------------------
2616 (define_expand "vec_extract<mode><Vel>"
2617 [(set (match_operand:<VEL> 0 "register_operand")
2619 (match_operand:SVE_FULL 1 "register_operand")
2620 (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
2624 if (poly_int_rtx_p (operands[2], &val)
2625 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
2627 /* The last element can be extracted with a LASTB and a false
2629 rtx sel = aarch64_pfalse_reg (<VPRED>mode);
2630 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2633 if (!CONST_INT_P (operands[2]))
2635 /* Create an index with operand[2] as the base and -1 as the step.
2636 It will then be zero for the element we care about. */
2637 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
2638 index = force_reg (<VEL_INT>mode, index);
2639 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
2640 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
2642 /* Get a predicate that is true for only that element. */
2643 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
2644 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
2645 rtx sel = gen_reg_rtx (<VPRED>mode);
2646 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
2648 /* Select the element using LASTB. */
2649 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2655 ;; Extract element zero. This is a special case because we want to force
2656 ;; the registers to be the same for the second alternative, and then
2657 ;; split the instruction into nothing after RA.
2658 (define_insn_and_split "*vec_extract<mode><Vel>_0"
2659 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2661 (match_operand:SVE_FULL 1 "register_operand" "w, 0, w")
2662 (parallel [(const_int 0)])))]
2665 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2666 switch (which_alternative)
2669 return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
2673 return "st1\\t{%1.<Vetype>}[0], %0";
2678 "&& reload_completed
2679 && REG_P (operands[0])
2680 && REGNO (operands[0]) == REGNO (operands[1])"
2683 emit_note (NOTE_INSN_DELETED);
2686 [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
2689 ;; Extract an element from the Advanced SIMD portion of the register.
2690 ;; We don't just reuse the aarch64-simd.md pattern because we don't
2691 ;; want any change in lane number on big-endian targets.
2692 (define_insn "*vec_extract<mode><Vel>_v128"
2693 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2695 (match_operand:SVE_FULL 1 "register_operand" "w, w, w")
2696 (parallel [(match_operand:SI 2 "const_int_operand")])))]
2698 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
2700 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2701 switch (which_alternative)
2704 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2706 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2708 return "st1\\t{%1.<Vetype>}[%2], %0";
2713 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
2716 ;; Extract an element in the range of DUP. This pattern allows the
2717 ;; source and destination to be different.
2718 (define_insn "*vec_extract<mode><Vel>_dup"
2719 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2721 (match_operand:SVE_FULL 1 "register_operand" "w")
2722 (parallel [(match_operand:SI 2 "const_int_operand")])))]
2724 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
2726 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2727 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
2731 ;; Extract an element outside the range of DUP. This pattern requires the
2732 ;; source and destination to be the same.
2733 (define_insn "*vec_extract<mode><Vel>_ext"
2734 [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w")
2736 (match_operand:SVE_FULL 1 "register_operand" "0, w")
2737 (parallel [(match_operand:SI 2 "const_int_operand")])))]
2738 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
2740 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2741 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
2742 return (which_alternative == 0
2743 ? "ext\t%0.b, %0.b, %0.b, #%2"
2744 : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2");
2746 [(set_attr "movprfx" "*,yes")]
2749 ;; -------------------------------------------------------------------------
2750 ;; ---- [INT,FP] Extract active element
2751 ;; -------------------------------------------------------------------------
2755 ;; -------------------------------------------------------------------------
2757 ;; Extract the last active element of operand 1 into operand 0.
2758 ;; If no elements are active, extract the last inactive element instead.
2759 (define_insn "@extract_<last_op>_<mode>"
2760 [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
2762 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2763 (match_operand:SVE_FULL 2 "register_operand" "w, w")]
2767 last<ab>\t%<vwcore>0, %1, %2.<Vetype>
2768 last<ab>\t%<Vetype>0, %1, %2.<Vetype>"
2771 ;; -------------------------------------------------------------------------
2772 ;; ---- [PRED] Extract index
2773 ;; -------------------------------------------------------------------------
2774 ;; The patterns in this section are synthetic.
2775 ;; -------------------------------------------------------------------------
2777 ;; Handle extractions from a predicate by converting to an integer vector
2778 ;; and extracting from there.
2779 (define_expand "vec_extract<vpred><Vel>"
2780 [(match_operand:<VEL> 0 "register_operand")
2781 (match_operand:<VPRED> 1 "register_operand")
2782 (match_operand:SI 2 "nonmemory_operand")
2783 ;; Dummy operand to which we can attach the iterator.
2784 (reg:SVE_FULL_I V0_REGNUM)]
2787 rtx tmp = gen_reg_rtx (<MODE>mode);
2788 emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
2789 CONST1_RTX (<MODE>mode),
2790 CONST0_RTX (<MODE>mode)));
2791 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
2796 ;; =========================================================================
2797 ;; == Unary arithmetic
2798 ;; =========================================================================
2800 ;; -------------------------------------------------------------------------
2801 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
2802 ;; -------------------------------------------------------------------------
2807 ;; - CNT (= popcount)
2810 ;; -------------------------------------------------------------------------
2812 ;; Unpredicated integer unary arithmetic.
2813 (define_expand "<optab><mode>2"
2814 [(set (match_operand:SVE_FULL_I 0 "register_operand")
2817 (SVE_INT_UNARY:SVE_FULL_I
2818 (match_operand:SVE_FULL_I 1 "register_operand"))]
2822 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2826 ;; Integer unary arithmetic predicated with a PTRUE.
2827 (define_insn "@aarch64_pred_<optab><mode>"
2828 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
2830 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2831 (SVE_INT_UNARY:SVE_FULL_I
2832 (match_operand:SVE_FULL_I 2 "register_operand" "w"))]
2835 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2838 ;; Predicated integer unary arithmetic with merging.
2839 (define_expand "@cond_<optab><mode>"
2840 [(set (match_operand:SVE_FULL_I 0 "register_operand")
2842 [(match_operand:<VPRED> 1 "register_operand")
2843 (SVE_INT_UNARY:SVE_FULL_I
2844 (match_operand:SVE_FULL_I 2 "register_operand"))
2845 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
2850 ;; Predicated integer unary arithmetic, merging with the first input.
2851 (define_insn "*cond_<optab><mode>_2"
2852 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
2854 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2855 (SVE_INT_UNARY:SVE_FULL_I
2856 (match_operand:SVE_FULL_I 2 "register_operand" "0, w"))
2861 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
2862 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2863 [(set_attr "movprfx" "*,yes")]
2866 ;; Predicated integer unary arithmetic, merging with an independent value.
2868 ;; The earlyclobber isn't needed for the first alternative, but omitting
2869 ;; it would only help the case in which operands 2 and 3 are the same,
2870 ;; which is handled above rather than here. Marking all the alternatives
2871 ;; as earlyclobber helps to make the instruction more regular to the
2872 ;; register allocator.
2873 (define_insn "*cond_<optab><mode>_any"
2874 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
2876 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2877 (SVE_INT_UNARY:SVE_FULL_I
2878 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w"))
2879 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2881 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
2883 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2884 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2885 movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2886 [(set_attr "movprfx" "*,yes,yes")]
2889 ;; -------------------------------------------------------------------------
2890 ;; ---- [INT] General unary arithmetic corresponding to unspecs
2891 ;; -------------------------------------------------------------------------
2897 ;; -------------------------------------------------------------------------
2899 ;; Predicated integer unary operations.
2900 (define_insn "@aarch64_pred_<optab><mode>"
2901 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
2903 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2905 [(match_operand:SVE_FULL_I 2 "register_operand" "w")]
2908 "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
2909 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2912 ;; Predicated integer unary operations with merging.
2913 (define_insn "@cond_<optab><mode>"
2914 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, ?&w")
2916 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2918 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")]
2920 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2922 "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
2924 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2925 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2926 movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2927 [(set_attr "movprfx" "*,yes,yes")]
2930 ;; -------------------------------------------------------------------------
2931 ;; ---- [INT] Sign and zero extension
2932 ;; -------------------------------------------------------------------------
2940 ;; -------------------------------------------------------------------------
2942 ;; Unpredicated sign and zero extension from a narrower mode.
2943 (define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
2944 [(set (match_operand:SVE_HSDI 0 "register_operand")
2947 (ANY_EXTEND:SVE_HSDI
2948 (match_operand:SVE_PARTIAL_I 1 "register_operand"))]
2950 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2952 operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode);
2956 ;; Predicated sign and zero extension from a narrower mode.
2957 (define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
2958 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
2960 [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl")
2961 (ANY_EXTEND:SVE_HSDI
2962 (match_operand:SVE_PARTIAL_I 2 "register_operand" "w"))]
2964 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2965 "<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>"
2968 ;; Predicated truncate-and-sign-extend operations.
2969 (define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
2970 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
2971 (unspec:SVE_FULL_HSDI
2972 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
2973 (sign_extend:SVE_FULL_HSDI
2974 (truncate:SVE_PARTIAL_I
2975 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")))]
2978 && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2979 "sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
2982 ;; Predicated truncate-and-sign-extend operations with merging.
2983 (define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
2984 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w")
2985 (unspec:SVE_FULL_HSDI
2986 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
2987 (sign_extend:SVE_FULL_HSDI
2988 (truncate:SVE_PARTIAL_I
2989 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")))
2990 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2993 && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2995 sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
2996 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
2997 movprfx\t%0, %3\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
2998 [(set_attr "movprfx" "*,yes,yes")]
3001 ;; Predicated truncate-and-zero-extend operations, merging with the
3004 ;; The canonical form of this operation is an AND of a constant rather
3005 ;; than (zero_extend (truncate ...)).
3006 (define_insn "*cond_uxt<mode>_2"
3007 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3009 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3011 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3012 (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate"))
3017 uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype>
3018 movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3019 [(set_attr "movprfx" "*,yes")]
3022 ;; Predicated truncate-and-zero-extend operations, merging with an
3023 ;; independent value.
3025 ;; The earlyclobber isn't needed for the first alternative, but omitting
3026 ;; it would only help the case in which operands 2 and 4 are the same,
3027 ;; which is handled above rather than here. Marking all the alternatives
3028 ;; as early-clobber helps to make the instruction more regular to the
3029 ;; register allocator.
3030 (define_insn "*cond_uxt<mode>_any"
3031 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
3033 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3035 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
3036 (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate"))
3037 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3039 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
3041 uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3042 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3043 movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3044 [(set_attr "movprfx" "*,yes,yes")]
3047 ;; -------------------------------------------------------------------------
3048 ;; ---- [INT] Truncation
3049 ;; -------------------------------------------------------------------------
3050 ;; The patterns in this section are synthetic.
3051 ;; -------------------------------------------------------------------------
3053 ;; Truncate to a partial SVE vector from either a full vector or a
3054 ;; wider partial vector. This is a no-op, because we can just ignore
3055 ;; the unused upper bits of the source.
3056 (define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
3057 [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w")
3058 (truncate:SVE_PARTIAL_I
3059 (match_operand:SVE_HSDI 1 "register_operand" "w")))]
3060 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3062 "&& reload_completed"
3063 [(set (match_dup 0) (match_dup 1))]
3065 operands[1] = aarch64_replace_reg_mode (operands[1],
3066 <SVE_PARTIAL_I:MODE>mode);
3070 ;; -------------------------------------------------------------------------
3071 ;; ---- [INT] Logical inverse
3072 ;; -------------------------------------------------------------------------
3075 ;; -------------------------------------------------------------------------
3077 ;; Predicated logical inverse.
3078 (define_expand "@aarch64_pred_cnot<mode>"
3079 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3082 [(match_operand:<VPRED> 1 "register_operand")
3083 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
3085 (match_operand:SVE_FULL_I 3 "register_operand")
3093 operands[4] = CONST0_RTX (<MODE>mode);
3094 operands[5] = CONST1_RTX (<MODE>mode);
3098 (define_insn "*cnot<mode>"
3099 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
3102 [(match_operand:<VPRED> 1 "register_operand" "Upl")
3103 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
3105 (match_operand:SVE_FULL_I 2 "register_operand" "w")
3106 (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3108 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3112 "cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3115 ;; Predicated logical inverse with merging.
3116 (define_expand "@cond_cnot<mode>"
3117 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3119 [(match_operand:<VPRED> 1 "register_operand")
3123 (const_int SVE_KNOWN_PTRUE)
3125 (match_operand:SVE_FULL_I 2 "register_operand")
3131 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
3135 operands[4] = CONSTM1_RTX (<VPRED>mode);
3136 operands[5] = CONST0_RTX (<MODE>mode);
3137 operands[6] = CONST1_RTX (<MODE>mode);
3141 ;; Predicated logical inverse, merging with the first input.
3142 (define_insn_and_rewrite "*cond_cnot<mode>_2"
3143 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3145 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3146 ;; Logical inverse of operand 2 (as above).
3150 (const_int SVE_KNOWN_PTRUE)
3152 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3153 (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3155 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3162 cnot\t%0.<Vetype>, %1/m, %0.<Vetype>
3163 movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3164 "&& !CONSTANT_P (operands[5])"
3166 operands[5] = CONSTM1_RTX (<VPRED>mode);
3168 [(set_attr "movprfx" "*,yes")]
3171 ;; Predicated logical inverse, merging with an independent value.
3173 ;; The earlyclobber isn't needed for the first alternative, but omitting
3174 ;; it would only help the case in which operands 2 and 6 are the same,
3175 ;; which is handled above rather than here. Marking all the alternatives
3176 ;; as earlyclobber helps to make the instruction more regular to the
3177 ;; register allocator.
3178 (define_insn_and_rewrite "*cond_cnot<mode>_any"
3179 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
3181 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3182 ;; Logical inverse of operand 2 (as above).
3186 (const_int SVE_KNOWN_PTRUE)
3188 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
3189 (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3191 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3194 (match_operand:SVE_FULL_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3196 "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])"
3198 cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3199 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3200 movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3201 "&& !CONSTANT_P (operands[5])"
3203 operands[5] = CONSTM1_RTX (<VPRED>mode);
3205 [(set_attr "movprfx" "*,yes,yes")]
3208 ;; -------------------------------------------------------------------------
3209 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
3210 ;; -------------------------------------------------------------------------
3213 ;; -------------------------------------------------------------------------
3215 ;; Unpredicated unary operations that take an integer and return a float.
3216 (define_insn "@aarch64_sve_<optab><mode>"
3217 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3219 [(match_operand:<V_INT_EQUIV> 1 "register_operand" "w")]
3222 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3225 ;; -------------------------------------------------------------------------
3226 ;; ---- [FP] General unary arithmetic corresponding to unspecs
3227 ;; -------------------------------------------------------------------------
3242 ;; -------------------------------------------------------------------------
3244 ;; Unpredicated floating-point unary operations.
3245 (define_insn "@aarch64_sve_<optab><mode>"
3246 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3248 [(match_operand:SVE_FULL_F 1 "register_operand" "w")]
3251 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3254 ;; Unpredicated floating-point unary operations.
3255 (define_expand "<optab><mode>2"
3256 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3259 (const_int SVE_RELAXED_GP)
3260 (match_operand:SVE_FULL_F 1 "register_operand")]
3261 SVE_COND_FP_UNARY))]
3264 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3268 ;; Predicated floating-point unary operations.
3269 (define_insn "@aarch64_pred_<optab><mode>"
3270 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3272 [(match_operand:<VPRED> 1 "register_operand" "Upl")
3273 (match_operand:SI 3 "aarch64_sve_gp_strictness")
3274 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
3275 SVE_COND_FP_UNARY))]
3277 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3280 ;; Predicated floating-point unary arithmetic with merging.
3281 (define_expand "@cond_<optab><mode>"
3282 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3284 [(match_operand:<VPRED> 1 "register_operand")
3287 (const_int SVE_STRICT_GP)
3288 (match_operand:SVE_FULL_F 2 "register_operand")]
3290 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3295 ;; Predicated floating-point unary arithmetic, merging with the first input.
3296 (define_insn_and_rewrite "*cond_<optab><mode>_2"
3297 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
3299 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3302 (match_operand:SI 4 "aarch64_sve_gp_strictness")
3303 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
3307 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[3], operands[1])"
3309 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3310 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3311 "&& !rtx_equal_p (operands[1], operands[3])"
3313 operands[3] = copy_rtx (operands[1]);
3315 [(set_attr "movprfx" "*,yes")]
3318 ;; Predicated floating-point unary arithmetic, merging with an independent
3321 ;; The earlyclobber isn't needed for the first alternative, but omitting
3322 ;; it would only help the case in which operands 2 and 3 are the same,
3323 ;; which is handled above rather than here. Marking all the alternatives
3324 ;; as earlyclobber helps to make the instruction more regular to the
3325 ;; register allocator.
3326 (define_insn_and_rewrite "*cond_<optab><mode>_any"
3327 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
3329 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3332 (match_operand:SI 5 "aarch64_sve_gp_strictness")
3333 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
3335 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3338 && !rtx_equal_p (operands[2], operands[3])
3339 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
3341 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3342 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3343 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3344 "&& !rtx_equal_p (operands[1], operands[4])"
3346 operands[4] = copy_rtx (operands[1]);
3348 [(set_attr "movprfx" "*,yes,yes")]
3351 ;; -------------------------------------------------------------------------
3352 ;; ---- [PRED] Inverse
3353 ;; -------------------------------------------------------------------------
3356 ;; -------------------------------------------------------------------------
3358 ;; Unpredicated predicate inverse.
3359 (define_expand "one_cmpl<mode>2"
3360 [(set (match_operand:PRED_ALL 0 "register_operand")
3362 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
3366 operands[2] = aarch64_ptrue_reg (<MODE>mode);
3370 ;; Predicated predicate inverse.
3371 (define_insn "*one_cmpl<mode>3"
3372 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3374 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
3375 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
3377 "not\t%0.b, %1/z, %2.b"
3380 ;; =========================================================================
3381 ;; == Binary arithmetic
3382 ;; =========================================================================
3384 ;; -------------------------------------------------------------------------
3385 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
3386 ;; -------------------------------------------------------------------------
3388 ;; - ADD (merging form only)
3389 ;; - AND (merging form only)
3390 ;; - ASR (merging form only)
3391 ;; - EOR (merging form only)
3392 ;; - LSL (merging form only)
3393 ;; - LSR (merging form only)
3395 ;; - ORR (merging form only)
3398 ;; - SQADD (SVE2 merging form only)
3399 ;; - SQSUB (SVE2 merging form only)
3400 ;; - SUB (merging form only)
3403 ;; - UQADD (SVE2 merging form only)
3404 ;; - UQSUB (SVE2 merging form only)
3405 ;; -------------------------------------------------------------------------
3407 ;; Unpredicated integer binary operations that have an immediate form.
3408 (define_expand "<optab><mode>3"
3409 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3412 (SVE_INT_BINARY_IMM:SVE_FULL_I
3413 (match_operand:SVE_FULL_I 1 "register_operand")
3414 (match_operand:SVE_FULL_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
3418 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3422 ;; Integer binary operations that have an immediate form, predicated
3423 ;; with a PTRUE. We don't actually need the predicate for the first
3424 ;; and third alternatives, but using Upa or X isn't likely to gain much
3425 ;; and would make the instruction seem less uniform to the register
3427 (define_insn_and_split "@aarch64_pred_<optab><mode>"
3428 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w")
3430 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
3431 (SVE_INT_BINARY_IMM:SVE_FULL_I
3432 (match_operand:SVE_FULL_I 2 "register_operand" "%0, 0, w, w")
3433 (match_operand:SVE_FULL_I 3 "aarch64_sve_<sve_imm_con>_operand" "<sve_imm_con>, w, <sve_imm_con>, w"))]
3438 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3440 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3441 ; Split the unpredicated form after reload, so that we don't have
3442 ; the unnecessary PTRUE.
3443 "&& reload_completed
3444 && !register_operand (operands[3], <MODE>mode)"
3446 (SVE_INT_BINARY_IMM:SVE_FULL_I (match_dup 2) (match_dup 3)))]
3448 [(set_attr "movprfx" "*,*,yes,yes")]
3451 ;; Unpredicated binary operations with a constant (post-RA only).
3452 ;; These are generated by splitting a predicated instruction whose
3453 ;; predicate is unused.
3454 (define_insn "*post_ra_<optab><mode>3"
3455 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3456 (SVE_INT_BINARY_IMM:SVE_FULL_I
3457 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")
3458 (match_operand:SVE_FULL_I 2 "aarch64_sve_<sve_imm_con>_immediate")))]
3459 "TARGET_SVE && reload_completed"
3461 <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2
3462 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2"
3463 [(set_attr "movprfx" "*,yes")]
3466 ;; Predicated integer operations with merging.
3467 (define_expand "@cond_<optab><mode>"
3468 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3470 [(match_operand:<VPRED> 1 "register_operand")
3471 (SVE_INT_BINARY:SVE_FULL_I
3472 (match_operand:SVE_FULL_I 2 "register_operand")
3473 (match_operand:SVE_FULL_I 3 "<sve_pred_int_rhs2_operand>"))
3474 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
3479 ;; Predicated integer operations, merging with the first input.
3480 (define_insn "*cond_<optab><mode>_2"
3481 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3483 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3484 (SVE_INT_BINARY:SVE_FULL_I
3485 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3486 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
3491 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3492 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3493 [(set_attr "movprfx" "*,yes")]
3496 ;; Predicated integer operations, merging with the second input.
3497 (define_insn "*cond_<optab><mode>_3"
3498 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3500 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3501 (SVE_INT_BINARY:SVE_FULL_I
3502 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
3503 (match_operand:SVE_FULL_I 3 "register_operand" "0, w"))
3508 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3509 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
3510 [(set_attr "movprfx" "*,yes")]
3513 ;; Predicated integer operations, merging with an independent value.
3514 (define_insn_and_rewrite "*cond_<optab><mode>_any"
3515 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
3517 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
3518 (SVE_INT_BINARY:SVE_FULL_I
3519 (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
3520 (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w"))
3521 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
3524 && !rtx_equal_p (operands[2], operands[4])
3525 && !rtx_equal_p (operands[3], operands[4])"
3527 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3528 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3529 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3530 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3532 "&& reload_completed
3533 && register_operand (operands[4], <MODE>mode)
3534 && !rtx_equal_p (operands[0], operands[4])"
3536 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
3537 operands[4], operands[1]));
3538 operands[4] = operands[2] = operands[0];
3540 [(set_attr "movprfx" "yes")]
3543 ;; -------------------------------------------------------------------------
3544 ;; ---- [INT] Addition
3545 ;; -------------------------------------------------------------------------
3557 ;; -------------------------------------------------------------------------
3559 (define_insn "add<mode>3"
3560 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?w, ?w, w")
3562 (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w, w, w")
3563 (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))]
3566 add\t%0.<Vetype>, %0.<Vetype>, #%D2
3567 sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3568 * return aarch64_output_sve_vector_inc_dec (\"%0.<Vetype>\", operands[2]);
3569 movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
3570 movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3571 add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3572 [(set_attr "movprfx" "*,*,*,yes,yes,*")]
3575 ;; Merging forms are handled through SVE_INT_BINARY.
3577 ;; -------------------------------------------------------------------------
3578 ;; ---- [INT] Subtraction
3579 ;; -------------------------------------------------------------------------
3583 ;; -------------------------------------------------------------------------
3585 (define_insn "sub<mode>3"
3586 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
3588 (match_operand:SVE_FULL_I 1 "aarch64_sve_arith_operand" "w, vsa, vsa")
3589 (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w")))]
3592 sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
3593 subr\t%0.<Vetype>, %0.<Vetype>, #%D1
3594 movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
3595 [(set_attr "movprfx" "*,*,yes")]
3598 ;; Merging forms are handled through SVE_INT_BINARY.
3600 ;; -------------------------------------------------------------------------
3601 ;; ---- [INT] Take address
3602 ;; -------------------------------------------------------------------------
3605 ;; -------------------------------------------------------------------------
3607 ;; An unshifted and unscaled ADR. This is functionally equivalent to an ADD,
3608 ;; but the svadrb intrinsics should preserve the user's choice.
3609 (define_insn "@aarch64_adr<mode>"
3610 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
3611 (unspec:SVE_FULL_SDI
3612 [(match_operand:SVE_FULL_SDI 1 "register_operand" "w")
3613 (match_operand:SVE_FULL_SDI 2 "register_operand" "w")]
3616 "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>]"
3619 ;; Same, but with the offset being sign-extended from the low 32 bits.
3620 (define_insn_and_rewrite "*aarch64_adr_sxtw"
3621 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3623 [(match_operand:VNx2DI 1 "register_operand" "w")
3628 (match_operand:VNx2DI 2 "register_operand" "w")))]
3632 "adr\t%0.d, [%1.d, %2.d, sxtw]"
3633 "&& !CONSTANT_P (operands[3])"
3635 operands[3] = CONSTM1_RTX (VNx2BImode);
3639 ;; Same, but with the offset being zero-extended from the low 32 bits.
3640 (define_insn "*aarch64_adr_uxtw_unspec"
3641 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3643 [(match_operand:VNx2DI 1 "register_operand" "w")
3645 (match_operand:VNx2DI 2 "register_operand" "w")
3646 (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))]
3649 "adr\t%0.d, [%1.d, %2.d, uxtw]"
3652 ;; Same, matching as a PLUS rather than unspec.
3653 (define_insn "*aarch64_adr_uxtw_and"
3654 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3657 (match_operand:VNx2DI 2 "register_operand" "w")
3658 (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))
3659 (match_operand:VNx2DI 1 "register_operand" "w")))]
3661 "adr\t%0.d, [%1.d, %2.d, uxtw]"
3664 ;; ADR with a nonzero shift.
3665 (define_expand "@aarch64_adr<mode>_shift"
3666 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
3668 (unspec:SVE_FULL_SDI
3670 (ashift:SVE_FULL_SDI
3671 (match_operand:SVE_FULL_SDI 2 "register_operand")
3672 (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
3674 (match_operand:SVE_FULL_SDI 1 "register_operand")))]
3677 operands[4] = CONSTM1_RTX (<VPRED>mode);
3681 (define_insn_and_rewrite "*aarch64_adr<mode>_shift"
3682 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
3684 (unspec:SVE_FULL_SDI
3686 (ashift:SVE_FULL_SDI
3687 (match_operand:SVE_FULL_SDI 2 "register_operand" "w")
3688 (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
3690 (match_operand:SVE_FULL_SDI 1 "register_operand" "w")))]
3692 "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>, lsl %3]"
3693 "&& !CONSTANT_P (operands[4])"
3695 operands[4] = CONSTM1_RTX (<VPRED>mode);
3699 ;; Same, but with the index being sign-extended from the low 32 bits.
3700 (define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
3701 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3710 (match_operand:VNx2DI 2 "register_operand" "w")))]
3712 (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3714 (match_operand:VNx2DI 1 "register_operand" "w")))]
3716 "adr\t%0.d, [%1.d, %2.d, sxtw %3]"
3717 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
3719 operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
3723 ;; Same, but with the index being zero-extended from the low 32 bits.
3724 (define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
3725 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3731 (match_operand:VNx2DI 2 "register_operand" "w")
3732 (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
3733 (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3735 (match_operand:VNx2DI 1 "register_operand" "w")))]
3737 "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
3738 "&& !CONSTANT_P (operands[5])"
3740 operands[5] = CONSTM1_RTX (VNx2BImode);
3744 ;; -------------------------------------------------------------------------
3745 ;; ---- [INT] Absolute difference
3746 ;; -------------------------------------------------------------------------
3750 ;; -------------------------------------------------------------------------
3752 ;; Unpredicated integer absolute difference.
3753 (define_expand "<su>abd<mode>_3"
3754 [(use (match_operand:SVE_FULL_I 0 "register_operand"))
3756 (match_operand:SVE_FULL_I 1 "register_operand")
3757 (match_operand:SVE_FULL_I 2 "register_operand"))]
3760 rtx pred = aarch64_ptrue_reg (<VPRED>mode);
3761 emit_insn (gen_aarch64_pred_<su>abd<mode> (operands[0], pred, operands[1],
3767 ;; Predicated integer absolute difference.
3768 (define_insn "@aarch64_pred_<su>abd<mode>"
3769 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3771 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3774 (match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
3775 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
3776 (<max_opp>:SVE_FULL_I
3782 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3783 movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3784 [(set_attr "movprfx" "*,yes")]
3787 (define_expand "@aarch64_cond_<su>abd<mode>"
3788 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3790 [(match_operand:<VPRED> 1 "register_operand")
3795 (match_operand:SVE_FULL_I 2 "register_operand")
3796 (match_operand:SVE_FULL_I 3 "register_operand"))]
3800 (<max_opp>:SVE_FULL_I
3804 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
3808 if (rtx_equal_p (operands[3], operands[4]))
3809 std::swap (operands[2], operands[3]);
3812 ;; Predicated integer absolute difference, merging with the first input.
3813 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2"
3814 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3816 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3821 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3822 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
3826 (<max_opp>:SVE_FULL_I
3834 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3835 movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3836 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
3838 operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
3840 [(set_attr "movprfx" "*,yes")]
3843 ;; Predicated integer absolute difference, merging with an independent value.
3844 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any"
3845 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
3847 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
3852 (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
3853 (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w"))]
3857 (<max_opp>:SVE_FULL_I
3861 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
3864 && !rtx_equal_p (operands[2], operands[4])
3865 && !rtx_equal_p (operands[3], operands[4])"
3867 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3868 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3869 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3870 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3874 if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6]))
3875 operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode);
3876 else if (reload_completed
3877 && register_operand (operands[4], <MODE>mode)
3878 && !rtx_equal_p (operands[0], operands[4]))
3880 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
3881 operands[4], operands[1]));
3882 operands[4] = operands[2] = operands[0];
3887 [(set_attr "movprfx" "yes")]
3890 ;; -------------------------------------------------------------------------
3891 ;; ---- [INT] Saturating addition and subtraction
3892 ;; -------------------------------------------------------------------------
3897 ;; -------------------------------------------------------------------------
3899 ;; Unpredicated saturating signed addition and subtraction.
3900 (define_insn "@aarch64_sve_<optab><mode>"
3901 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w, w")
3902 (SBINQOPS:SVE_FULL_I
3903 (match_operand:SVE_FULL_I 1 "register_operand" "0, 0, w, w, w")
3904 (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand" "vsQ, vsS, vsQ, vsS, w")))]
3907 <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3908 <binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
3909 movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3910 movprfx\t%0, %1\;<binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
3911 <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3912 [(set_attr "movprfx" "*,*,yes,yes,*")]
3915 ;; Unpredicated saturating unsigned addition and subtraction.
3916 (define_insn "@aarch64_sve_<optab><mode>"
3917 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, w")
3918 (UBINQOPS:SVE_FULL_I
3919 (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w")
3920 (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand" "vsa, vsa, w")))]
3923 <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3924 movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3925 <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3926 [(set_attr "movprfx" "*,yes,*")]
3929 ;; -------------------------------------------------------------------------
3930 ;; ---- [INT] Highpart multiplication
3931 ;; -------------------------------------------------------------------------
3935 ;; -------------------------------------------------------------------------
3937 ;; Unpredicated highpart multiplication.
3938 (define_expand "<su>mul<mode>3_highpart"
3939 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3943 [(match_operand:SVE_FULL_I 1 "register_operand")
3944 (match_operand:SVE_FULL_I 2 "register_operand")]
3949 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3953 ;; Predicated highpart multiplication.
3954 (define_insn "@aarch64_pred_<optab><mode>"
3955 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3957 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3959 [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
3960 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
3965 <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3966 movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3967 [(set_attr "movprfx" "*,yes")]
3970 ;; Predicated highpart multiplications with merging.
3971 (define_expand "@cond_<optab><mode>"
3972 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3974 [(match_operand:<VPRED> 1 "register_operand")
3976 [(match_operand:SVE_FULL_I 2 "register_operand")
3977 (match_operand:SVE_FULL_I 3 "register_operand")]
3979 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
3983 /* Only target code is aware of these operations, so we don't need
3984 to handle the fully-general case. */
3985 gcc_assert (rtx_equal_p (operands[2], operands[4])
3986 || CONSTANT_P (operands[4]));
3989 ;; Predicated highpart multiplications, merging with the first input.
3990 (define_insn "*cond_<optab><mode>_2"
3991 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3993 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3995 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3996 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
4002 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4003 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4004 [(set_attr "movprfx" "*,yes")])
4006 ;; Predicated highpart multiplications, merging with zero.
4007 (define_insn "*cond_<optab><mode>_z"
4008 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w")
4010 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4012 [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
4013 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
4015 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
4019 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4020 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4021 [(set_attr "movprfx" "yes")])
4023 ;; -------------------------------------------------------------------------
4024 ;; ---- [INT] Division
4025 ;; -------------------------------------------------------------------------
4031 ;; -------------------------------------------------------------------------
4033 ;; Unpredicated integer division.
4034 (define_expand "<optab><mode>3"
4035 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4036 (unspec:SVE_FULL_SDI
4038 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4039 (match_operand:SVE_FULL_SDI 1 "register_operand")
4040 (match_operand:SVE_FULL_SDI 2 "register_operand"))]
4044 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4048 ;; Integer division predicated with a PTRUE.
4049 (define_insn "@aarch64_pred_<optab><mode>"
4050 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w, ?&w")
4051 (unspec:SVE_FULL_SDI
4052 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4053 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4054 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w")
4055 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w"))]
4059 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4060 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4061 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4062 [(set_attr "movprfx" "*,*,yes")]
4065 ;; Predicated integer division with merging.
4066 (define_expand "@cond_<optab><mode>"
4067 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4068 (unspec:SVE_FULL_SDI
4069 [(match_operand:<VPRED> 1 "register_operand")
4070 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4071 (match_operand:SVE_FULL_SDI 2 "register_operand")
4072 (match_operand:SVE_FULL_SDI 3 "register_operand"))
4073 (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
4078 ;; Predicated integer division, merging with the first input.
4079 (define_insn "*cond_<optab><mode>_2"
4080 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4081 (unspec:SVE_FULL_SDI
4082 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4083 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4084 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w")
4085 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))
4090 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4091 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4092 [(set_attr "movprfx" "*,yes")]
4095 ;; Predicated integer division, merging with the second input.
4096 (define_insn "*cond_<optab><mode>_3"
4097 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4098 (unspec:SVE_FULL_SDI
4099 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4100 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4101 (match_operand:SVE_FULL_SDI 2 "register_operand" "w, w")
4102 (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w"))
4107 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4108 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
4109 [(set_attr "movprfx" "*,yes")]
4112 ;; Predicated integer division, merging with an independent value.
4113 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4114 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4115 (unspec:SVE_FULL_SDI
4116 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
4117 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4118 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w, w, w")
4119 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w, w, w"))
4120 (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
4123 && !rtx_equal_p (operands[2], operands[4])
4124 && !rtx_equal_p (operands[3], operands[4])"
4126 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4127 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4128 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4129 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4131 "&& reload_completed
4132 && register_operand (operands[4], <MODE>mode)
4133 && !rtx_equal_p (operands[0], operands[4])"
4135 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4136 operands[4], operands[1]));
4137 operands[4] = operands[2] = operands[0];
4139 [(set_attr "movprfx" "yes")]
4142 ;; -------------------------------------------------------------------------
4143 ;; ---- [INT] Binary logical operations
4144 ;; -------------------------------------------------------------------------
4149 ;; -------------------------------------------------------------------------
4151 ;; Unpredicated integer binary logical operations.
4152 (define_insn "<optab><mode>3"
4153 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?w, w")
4155 (match_operand:SVE_FULL_I 1 "register_operand" "%0, w, w")
4156 (match_operand:SVE_FULL_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))]
4159 <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4160 movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4161 <logical>\t%0.d, %1.d, %2.d"
4162 [(set_attr "movprfx" "*,yes,*")]
4165 ;; Merging forms are handled through SVE_INT_BINARY.
4167 ;; -------------------------------------------------------------------------
4168 ;; ---- [INT] Binary logical operations (inverted second input)
4169 ;; -------------------------------------------------------------------------
4172 ;; -------------------------------------------------------------------------
4174 ;; Unpredicated BIC.
4175 (define_expand "@aarch64_bic<mode>"
4176 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4180 (not:SVE_FULL_I (match_operand:SVE_FULL_I 2 "register_operand"))]
4182 (match_operand:SVE_FULL_I 1 "register_operand")))]
4185 operands[3] = CONSTM1_RTX (<VPRED>mode);
4190 (define_insn_and_rewrite "*bic<mode>3"
4191 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
4196 (match_operand:SVE_FULL_I 2 "register_operand" "w"))]
4198 (match_operand:SVE_FULL_I 1 "register_operand" "w")))]
4200 "bic\t%0.d, %1.d, %2.d"
4201 "&& !CONSTANT_P (operands[3])"
4203 operands[3] = CONSTM1_RTX (<VPRED>mode);
4207 ;; Predicated BIC with merging.
4208 (define_expand "@cond_bic<mode>"
4209 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4211 [(match_operand:<VPRED> 1 "register_operand")
4213 (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand"))
4214 (match_operand:SVE_FULL_I 2 "register_operand"))
4215 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4220 ;; Predicated integer BIC, merging with the first input.
4221 (define_insn "*cond_bic<mode>_2"
4222 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4224 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4227 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
4228 (match_operand:SVE_FULL_I 2 "register_operand" "0, w"))
4233 bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4234 movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4235 [(set_attr "movprfx" "*,yes")]
4238 ;; Predicated integer BIC, merging with an independent value.
4239 (define_insn_and_rewrite "*cond_bic<mode>_any"
4240 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, ?&w")
4242 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4245 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, w"))
4246 (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w"))
4247 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4249 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4251 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4252 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4253 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4255 "&& reload_completed
4256 && register_operand (operands[4], <MODE>mode)
4257 && !rtx_equal_p (operands[0], operands[4])"
4259 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4260 operands[4], operands[1]));
4261 operands[4] = operands[2] = operands[0];
4263 [(set_attr "movprfx" "yes")]
4266 ;; -------------------------------------------------------------------------
4267 ;; ---- [INT] Shifts (rounding towards -Inf)
4268 ;; -------------------------------------------------------------------------
4276 ;; -------------------------------------------------------------------------
4278 ;; Unpredicated shift by a scalar, which expands into one of the vector
4280 (define_expand "<ASHIFT:optab><mode>3"
4281 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4283 (match_operand:SVE_FULL_I 1 "register_operand")
4284 (match_operand:<VEL> 2 "general_operand")))]
4288 if (CONST_INT_P (operands[2]))
4290 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
4291 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
4292 amount = force_reg (<MODE>mode, amount);
4296 amount = gen_reg_rtx (<MODE>mode);
4297 emit_insn (gen_vec_duplicate<mode> (amount,
4298 convert_to_mode (<VEL>mode,
4301 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
4306 ;; Unpredicated shift by a vector.
4307 (define_expand "v<optab><mode>3"
4308 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4312 (match_operand:SVE_FULL_I 1 "register_operand")
4313 (match_operand:SVE_FULL_I 2 "aarch64_sve_<lr>shift_operand"))]
4317 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4321 ;; Shift by a vector, predicated with a PTRUE. We don't actually need
4322 ;; the predicate for the first alternative, but using Upa or X isn't
4323 ;; likely to gain much and would make the instruction seem less uniform
4324 ;; to the register allocator.
4325 (define_insn_and_split "@aarch64_pred_<optab><mode>"
4326 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w")
4328 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4330 (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w")
4331 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))]
4336 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4337 <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype>
4338 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4339 "&& reload_completed
4340 && !register_operand (operands[3], <MODE>mode)"
4341 [(set (match_dup 0) (ASHIFT:SVE_FULL_I (match_dup 2) (match_dup 3)))]
4343 [(set_attr "movprfx" "*,*,*,yes")]
4346 ;; Unpredicated shift operations by a constant (post-RA only).
4347 ;; These are generated by splitting a predicated instruction whose
4348 ;; predicate is unused.
4349 (define_insn "*post_ra_v<optab><mode>3"
4350 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
4352 (match_operand:SVE_FULL_I 1 "register_operand" "w")
4353 (match_operand:SVE_FULL_I 2 "aarch64_simd_<lr>shift_imm")))]
4354 "TARGET_SVE && reload_completed"
4355 "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
4358 ;; Predicated integer shift, merging with the first input.
4359 (define_insn "*cond_<optab><mode>_2_const"
4360 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4362 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4364 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4365 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm"))
4370 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4371 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4372 [(set_attr "movprfx" "*,yes")]
4375 ;; Predicated integer shift, merging with an independent value.
4376 (define_insn_and_rewrite "*cond_<optab><mode>_any_const"
4377 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, &w, ?&w")
4379 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4381 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
4382 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm"))
4383 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
4385 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4387 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4388 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4390 "&& reload_completed
4391 && register_operand (operands[4], <MODE>mode)
4392 && !rtx_equal_p (operands[0], operands[4])"
4394 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4395 operands[4], operands[1]));
4396 operands[4] = operands[2] = operands[0];
4398 [(set_attr "movprfx" "yes")]
4401 ;; Unpredicated shifts of narrow elements by 64-bit amounts.
4402 (define_insn "@aarch64_sve_<sve_int_op><mode>"
4403 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
4404 (unspec:SVE_FULL_BHSI
4405 [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")
4406 (match_operand:VNx2DI 2 "register_operand" "w")]
4409 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.d"
4412 ;; Merging predicated shifts of narrow elements by 64-bit amounts.
4413 (define_expand "@cond_<sve_int_op><mode>"
4414 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
4415 (unspec:SVE_FULL_BHSI
4416 [(match_operand:<VPRED> 1 "register_operand")
4417 (unspec:SVE_FULL_BHSI
4418 [(match_operand:SVE_FULL_BHSI 2 "register_operand")
4419 (match_operand:VNx2DI 3 "register_operand")]
4421 (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")]
4426 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with
4428 (define_insn "*cond_<sve_int_op><mode>_m"
4429 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w, ?&w")
4430 (unspec:SVE_FULL_BHSI
4431 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4432 (unspec:SVE_FULL_BHSI
4433 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
4434 (match_operand:VNx2DI 3 "register_operand" "w, w")]
4440 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4441 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4442 [(set_attr "movprfx" "*, yes")])
4444 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero.
4445 (define_insn "*cond_<sve_int_op><mode>_z"
4446 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=&w, &w")
4447 (unspec:SVE_FULL_BHSI
4448 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4449 (unspec:SVE_FULL_BHSI
4450 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
4451 (match_operand:VNx2DI 3 "register_operand" "w, w")]
4453 (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")]
4457 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4458 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4459 [(set_attr "movprfx" "yes")])
4461 ;; -------------------------------------------------------------------------
4462 ;; ---- [INT] Shifts (rounding towards 0)
4463 ;; -------------------------------------------------------------------------
4469 ;; -------------------------------------------------------------------------
4471 ;; Unpredicated <SVE_INT_OP>.
4472 (define_expand "sdiv_pow2<mode>3"
4473 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4477 [(match_operand:SVE_FULL_I 1 "register_operand")
4478 (match_operand 2 "aarch64_simd_rshift_imm")]
4484 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4488 ;; Predicated right shift with merging.
4489 (define_expand "@cond_<sve_int_op><mode>"
4490 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4492 [(match_operand:<VPRED> 1 "register_operand")
4494 [(match_operand:SVE_FULL_I 2 "register_operand")
4495 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
4497 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4502 ;; Predicated right shift, merging with the first input.
4503 (define_insn "*cond_<sve_int_op><mode>_2"
4504 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4506 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4508 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4509 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
4515 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4516 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4517 [(set_attr "movprfx" "*,yes")])
4519 ;; Predicated right shift, merging with zero.
4520 (define_insn "*cond_<sve_int_op><mode>_z"
4521 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
4523 [(match_operand:<VPRED> 1 "register_operand" "Upl")
4525 [(match_operand:SVE_FULL_I 2 "register_operand" "w")
4526 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
4528 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
4531 "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4532 [(set_attr "movprfx" "yes")])
4534 ;; -------------------------------------------------------------------------
4535 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
4536 ;; -------------------------------------------------------------------------
4541 ;; -------------------------------------------------------------------------
4543 ;; Unpredicated floating-point binary operations that take an integer as
4544 ;; their second operand.
4545 (define_insn "@aarch64_sve_<optab><mode>"
4546 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4548 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
4549 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
4550 SVE_FP_BINARY_INT))]
4552 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4555 ;; Predicated floating-point binary operations that take an integer
4556 ;; as their second operand.
4557 (define_insn "@aarch64_pred_<optab><mode>"
4558 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4560 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4561 (match_operand:SI 4 "aarch64_sve_gp_strictness")
4562 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4563 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4564 SVE_COND_FP_BINARY_INT))]
4567 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4568 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4569 [(set_attr "movprfx" "*,yes")]
4572 ;; Predicated floating-point binary operations with merging, taking an
4573 ;; integer as their second operand.
4574 (define_expand "@cond_<optab><mode>"
4575 [(set (match_operand:SVE_FULL_F 0 "register_operand")
4577 [(match_operand:<VPRED> 1 "register_operand")
4580 (const_int SVE_STRICT_GP)
4581 (match_operand:SVE_FULL_F 2 "register_operand")
4582 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
4583 SVE_COND_FP_BINARY_INT)
4584 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
4589 ;; Predicated floating-point binary operations that take an integer as their
4590 ;; second operand, with inactive lanes coming from the first operand.
4591 (define_insn_and_rewrite "*cond_<optab><mode>_2"
4592 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4594 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4597 (match_operand:SI 5 "aarch64_sve_gp_strictness")
4598 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4599 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4600 SVE_COND_FP_BINARY_INT)
4603 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4605 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4606 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4607 "&& !rtx_equal_p (operands[1], operands[4])"
4609 operands[4] = copy_rtx (operands[1]);
4611 [(set_attr "movprfx" "*,yes")]
4614 ;; Predicated floating-point binary operations that take an integer as
4615 ;; their second operand, with the values of inactive lanes being distinct
4616 ;; from the other inputs.
4617 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4618 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
4620 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4623 (match_operand:SI 6 "aarch64_sve_gp_strictness")
4624 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
4625 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
4626 SVE_COND_FP_BINARY_INT)
4627 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4630 && !rtx_equal_p (operands[2], operands[4])
4631 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4633 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4634 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4635 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4639 if (reload_completed
4640 && register_operand (operands[4], <MODE>mode)
4641 && !rtx_equal_p (operands[0], operands[4]))
4643 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4644 operands[4], operands[1]));
4645 operands[4] = operands[2] = operands[0];
4647 else if (!rtx_equal_p (operands[1], operands[5]))
4648 operands[5] = copy_rtx (operands[1]);
4652 [(set_attr "movprfx" "yes")]
4655 ;; -------------------------------------------------------------------------
4656 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
4657 ;; -------------------------------------------------------------------------
4658 ;; Includes post-RA forms of:
4662 ;; -------------------------------------------------------------------------
4664 ;; Unpredicated floating-point binary operations (post-RA only).
4665 ;; These are generated by splitting a predicated instruction whose
4666 ;; predicate is unused.
4667 (define_insn "*post_ra_<sve_fp_op><mode>3"
4668 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4669 (SVE_UNPRED_FP_BINARY:SVE_FULL_F
4670 (match_operand:SVE_FULL_F 1 "register_operand" "w")
4671 (match_operand:SVE_FULL_F 2 "register_operand" "w")))]
4672 "TARGET_SVE && reload_completed"
4673 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
4675 ;; -------------------------------------------------------------------------
4676 ;; ---- [FP] General binary arithmetic corresponding to unspecs
4677 ;; -------------------------------------------------------------------------
4678 ;; Includes merging forms of:
4679 ;; - FADD (constant forms handled in the "Addition" section)
4683 ;; - FMAXNM (including #0.0 and #1.0)
4685 ;; - FMINNM (including #0.0 and #1.0)
4686 ;; - FMUL (including #0.5 and #2.0)
4690 ;; - FSUB (constant forms handled in the "Addition" section)
4691 ;; - FSUBR (constant forms handled in the "Subtraction" section)
4692 ;; -------------------------------------------------------------------------
4694 ;; Unpredicated floating-point binary operations.
4695 (define_insn "@aarch64_sve_<optab><mode>"
4696 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4698 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
4699 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
4702 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4705 ;; Unpredicated floating-point binary operations that need to be predicated
4707 (define_expand "<optab><mode>3"
4708 [(set (match_operand:SVE_FULL_F 0 "register_operand")
4711 (const_int SVE_RELAXED_GP)
4712 (match_operand:SVE_FULL_F 1 "<sve_pred_fp_rhs1_operand>")
4713 (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs2_operand>")]
4714 SVE_COND_FP_BINARY))]
4717 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4721 ;; Predicated floating-point binary operations that have no immediate forms.
4722 (define_insn "@aarch64_pred_<optab><mode>"
4723 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
4725 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4726 (match_operand:SI 4 "aarch64_sve_gp_strictness")
4727 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w")
4728 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w")]
4729 SVE_COND_FP_BINARY_REG))]
4732 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4733 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4734 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4735 [(set_attr "movprfx" "*,*,yes")]
4738 ;; Predicated floating-point operations with merging.
4739 (define_expand "@cond_<optab><mode>"
4740 [(set (match_operand:SVE_FULL_F 0 "register_operand")
4742 [(match_operand:<VPRED> 1 "register_operand")
4745 (const_int SVE_STRICT_GP)
4746 (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs1_operand>")
4747 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_operand>")]
4749 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
4754 ;; Predicated floating-point operations, merging with the first input.
4755 (define_insn_and_rewrite "*cond_<optab><mode>_2"
4756 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4758 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4761 (match_operand:SI 5 "aarch64_sve_gp_strictness")
4762 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4763 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
4767 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4769 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4770 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4771 "&& !rtx_equal_p (operands[1], operands[4])"
4773 operands[4] = copy_rtx (operands[1]);
4775 [(set_attr "movprfx" "*,yes")]
4778 ;; Same for operations that take a 1-bit constant.
4779 (define_insn_and_rewrite "*cond_<optab><mode>_2_const"
4780 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
4782 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4785 (match_operand:SI 5 "aarch64_sve_gp_strictness")
4786 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4787 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
4788 SVE_COND_FP_BINARY_I1)
4791 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4793 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4794 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4795 "&& !rtx_equal_p (operands[1], operands[4])"
4797 operands[4] = copy_rtx (operands[1]);
4799 [(set_attr "movprfx" "*,yes")]
4802 ;; Predicated floating-point operations, merging with the second input.
4803 (define_insn_and_rewrite "*cond_<optab><mode>_3"
4804 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4806 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4809 (match_operand:SI 5 "aarch64_sve_gp_strictness")
4810 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
4811 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
4815 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4817 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4818 movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
4819 "&& !rtx_equal_p (operands[1], operands[4])"
4821 operands[4] = copy_rtx (operands[1]);
4823 [(set_attr "movprfx" "*,yes")]
4826 ;; Predicated floating-point operations, merging with an independent value.
4827 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4828 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4830 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
4833 (match_operand:SI 6 "aarch64_sve_gp_strictness")
4834 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
4835 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
4837 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
4840 && !rtx_equal_p (operands[2], operands[4])
4841 && !rtx_equal_p (operands[3], operands[4])
4842 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4844 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4845 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4846 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4847 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4851 if (reload_completed
4852 && register_operand (operands[4], <MODE>mode)
4853 && !rtx_equal_p (operands[0], operands[4]))
4855 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4856 operands[4], operands[1]));
4857 operands[4] = operands[2] = operands[0];
4859 else if (!rtx_equal_p (operands[1], operands[5]))
4860 operands[5] = copy_rtx (operands[1]);
4864 [(set_attr "movprfx" "yes")]
4867 ;; Same for operations that take a 1-bit constant.
4868 (define_insn_and_rewrite "*cond_<optab><mode>_any_const"
4869 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
4871 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4874 (match_operand:SI 6 "aarch64_sve_gp_strictness")
4875 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
4876 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
4877 SVE_COND_FP_BINARY_I1)
4878 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
4881 && !rtx_equal_p (operands[2], operands[4])
4882 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4884 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4885 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4889 if (reload_completed
4890 && register_operand (operands[4], <MODE>mode)
4891 && !rtx_equal_p (operands[0], operands[4]))
4893 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4894 operands[4], operands[1]));
4895 operands[4] = operands[2] = operands[0];
4897 else if (!rtx_equal_p (operands[1], operands[5]))
4898 operands[5] = copy_rtx (operands[1]);
4902 [(set_attr "movprfx" "yes")]
4905 ;; -------------------------------------------------------------------------
4906 ;; ---- [FP] Addition
4907 ;; -------------------------------------------------------------------------
4911 ;; -------------------------------------------------------------------------
4913 ;; Predicated floating-point addition.
4914 (define_insn_and_split "@aarch64_pred_<optab><mode>"
4915 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w, ?&w")
4917 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl")
4918 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z, Ui1, i, i, Ui1")
4919 (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, 0, w, w, w")
4920 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w, w, vsA, vsN, w")]
4924 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4925 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4927 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4928 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4929 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4930 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4931 ; Split the unpredicated form after reload, so that we don't have
4932 ; the unnecessary PTRUE.
4933 "&& reload_completed
4934 && register_operand (operands[3], <MODE>mode)
4935 && INTVAL (operands[4]) == SVE_RELAXED_GP"
4936 [(set (match_dup 0) (plus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
4938 [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
4941 ;; Predicated floating-point addition of a constant, merging with the
4943 (define_insn_and_rewrite "*cond_add<mode>_2_const"
4944 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
4946 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4949 (match_operand:SI 5 "aarch64_sve_gp_strictness")
4950 (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
4951 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
4955 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4957 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4958 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4959 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4960 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
4961 "&& !rtx_equal_p (operands[1], operands[4])"
4963 operands[4] = copy_rtx (operands[1]);
4965 [(set_attr "movprfx" "*,*,yes,yes")]
4968 ;; Predicated floating-point addition of a constant, merging with an
4969 ;; independent value.
4970 (define_insn_and_rewrite "*cond_add<mode>_any_const"
4971 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
4973 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
4976 (match_operand:SI 6 "aarch64_sve_gp_strictness")
4977 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
4978 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
4980 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
4983 && !rtx_equal_p (operands[2], operands[4])
4984 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4986 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4987 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4988 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4989 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4994 if (reload_completed
4995 && register_operand (operands[4], <MODE>mode)
4996 && !rtx_equal_p (operands[0], operands[4]))
4998 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4999 operands[4], operands[1]));
5000 operands[4] = operands[2] = operands[0];
5002 else if (!rtx_equal_p (operands[1], operands[5]))
5003 operands[5] = copy_rtx (operands[1]);
5007 [(set_attr "movprfx" "yes")]
5010 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
5012 ;; -------------------------------------------------------------------------
5013 ;; ---- [FP] Complex addition
5014 ;; -------------------------------------------------------------------------
5017 ;; -------------------------------------------------------------------------
5019 ;; Predicated FCADD.
5020 (define_insn "@aarch64_pred_<optab><mode>"
5021 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5023 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5024 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5025 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5026 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5030 fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5031 movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5032 [(set_attr "movprfx" "*,yes")]
5035 ;; Predicated FCADD with merging.
5036 (define_expand "@cond_<optab><mode>"
5037 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5039 [(match_operand:<VPRED> 1 "register_operand")
5042 (const_int SVE_STRICT_GP)
5043 (match_operand:SVE_FULL_F 2 "register_operand")
5044 (match_operand:SVE_FULL_F 3 "register_operand")]
5046 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5051 ;; Predicated FCADD, merging with the first input.
5052 (define_insn_and_rewrite "*cond_<optab><mode>_2"
5053 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5055 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5058 (match_operand:SI 5 "aarch64_sve_gp_strictness")
5059 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5060 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5064 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
5066 fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5067 movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5068 "&& !rtx_equal_p (operands[1], operands[4])"
5070 operands[4] = copy_rtx (operands[1]);
5072 [(set_attr "movprfx" "*,yes")]
5075 ;; Predicated FCADD, merging with an independent value.
5076 (define_insn_and_rewrite "*cond_<optab><mode>_any"
5077 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
5079 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5082 (match_operand:SI 6 "aarch64_sve_gp_strictness")
5083 (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
5084 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
5086 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
5089 && !rtx_equal_p (operands[2], operands[4])
5090 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5092 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5093 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5094 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5098 if (reload_completed
5099 && register_operand (operands[4], <MODE>mode)
5100 && !rtx_equal_p (operands[0], operands[4]))
5102 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5103 operands[4], operands[1]));
5104 operands[4] = operands[2] = operands[0];
5106 else if (!rtx_equal_p (operands[1], operands[5]))
5107 operands[5] = copy_rtx (operands[1]);
5111 [(set_attr "movprfx" "yes")]
5114 ;; -------------------------------------------------------------------------
5115 ;; ---- [FP] Subtraction
5116 ;; -------------------------------------------------------------------------
5120 ;; -------------------------------------------------------------------------
5122 ;; Predicated floating-point subtraction.
5123 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5124 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w")
5126 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5127 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, Ui1, i, Ui1")
5128 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand" "vsA, w, 0, w, vsA, w")
5129 (match_operand:SVE_FULL_F 3 "register_operand" "0, w, w, 0, w, w")]
5133 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5135 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5136 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5137 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5138 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5139 ; Split the unpredicated form after reload, so that we don't have
5140 ; the unnecessary PTRUE.
5141 "&& reload_completed
5142 && register_operand (operands[2], <MODE>mode)
5143 && INTVAL (operands[4]) == SVE_RELAXED_GP"
5144 [(set (match_dup 0) (minus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5146 [(set_attr "movprfx" "*,*,*,*,yes,yes")]
5149 ;; Predicated floating-point subtraction from a constant, merging with the
5151 (define_insn_and_rewrite "*cond_sub<mode>_3_const"
5152 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5154 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5157 (match_operand:SI 5 "aarch64_sve_gp_strictness")
5158 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5159 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5163 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
5165 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5166 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
5167 "&& !rtx_equal_p (operands[1], operands[4])"
5169 operands[4] = copy_rtx (operands[1]);
5171 [(set_attr "movprfx" "*,yes")]
5174 ;; Predicated floating-point subtraction from a constant, merging with an
5175 ;; independent value.
5176 (define_insn_and_rewrite "*cond_sub<mode>_any_const"
5177 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5179 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5182 (match_operand:SI 6 "aarch64_sve_gp_strictness")
5183 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5184 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")]
5186 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5189 && !rtx_equal_p (operands[3], operands[4])
5190 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5192 movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5193 movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5197 if (reload_completed
5198 && register_operand (operands[4], <MODE>mode)
5199 && !rtx_equal_p (operands[0], operands[4]))
5201 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5202 operands[4], operands[1]));
5203 operands[4] = operands[3] = operands[0];
5205 else if (!rtx_equal_p (operands[1], operands[5]))
5206 operands[5] = copy_rtx (operands[1]);
5210 [(set_attr "movprfx" "yes")]
5213 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
5215 ;; -------------------------------------------------------------------------
5216 ;; ---- [FP] Absolute difference
5217 ;; -------------------------------------------------------------------------
5220 ;; -------------------------------------------------------------------------
5222 ;; Predicated floating-point absolute difference.
5223 (define_expand "@aarch64_pred_abd<mode>"
5224 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5226 [(match_operand:<VPRED> 1 "register_operand")
5227 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5231 (match_operand:SVE_FULL_F 2 "register_operand")
5232 (match_operand:SVE_FULL_F 3 "register_operand")]
5238 ;; Predicated floating-point absolute difference.
5239 (define_insn_and_rewrite "*aarch64_pred_abd<mode>"
5240 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5242 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5243 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5246 (match_operand:SI 6 "aarch64_sve_gp_strictness")
5247 (match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
5248 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5251 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5253 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5254 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5255 "&& !rtx_equal_p (operands[1], operands[5])"
5257 operands[5] = copy_rtx (operands[1]);
5259 [(set_attr "movprfx" "*,yes")]
5262 (define_expand "@aarch64_cond_abd<mode>"
5263 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5265 [(match_operand:<VPRED> 1 "register_operand")
5268 (const_int SVE_STRICT_GP)
5271 (const_int SVE_STRICT_GP)
5272 (match_operand:SVE_FULL_F 2 "register_operand")
5273 (match_operand:SVE_FULL_F 3 "register_operand")]
5276 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5280 if (rtx_equal_p (operands[3], operands[4]))
5281 std::swap (operands[2], operands[3]);
5284 ;; Predicated floating-point absolute difference, merging with the first
5286 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_2"
5287 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5289 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5292 (match_operand:SI 5 "aarch64_sve_gp_strictness")
5295 (match_operand:SI 7 "aarch64_sve_gp_strictness")
5296 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5297 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5303 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])
5304 && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
5306 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5307 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5308 "&& (!rtx_equal_p (operands[1], operands[4])
5309 || !rtx_equal_p (operands[1], operands[6]))"
5311 operands[4] = copy_rtx (operands[1]);
5312 operands[6] = copy_rtx (operands[1]);
5314 [(set_attr "movprfx" "*,yes")]
5317 ;; Predicated floating-point absolute difference, merging with the second
5319 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_3"
5320 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5322 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5325 (match_operand:SI 5 "aarch64_sve_gp_strictness")
5328 (match_operand:SI 7 "aarch64_sve_gp_strictness")
5329 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
5330 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5336 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])
5337 && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
5339 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5340 movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
5341 "&& (!rtx_equal_p (operands[1], operands[4])
5342 || !rtx_equal_p (operands[1], operands[6]))"
5344 operands[4] = copy_rtx (operands[1]);
5345 operands[6] = copy_rtx (operands[1]);
5347 [(set_attr "movprfx" "*,yes")]
5350 ;; Predicated floating-point absolute difference, merging with an
5351 ;; independent value.
5352 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any"
5353 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
5355 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5358 (match_operand:SI 6 "aarch64_sve_gp_strictness")
5361 (match_operand:SI 8 "aarch64_sve_gp_strictness")
5362 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
5363 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
5366 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
5369 && !rtx_equal_p (operands[2], operands[4])
5370 && !rtx_equal_p (operands[3], operands[4])
5371 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])
5372 && aarch64_sve_pred_dominates_p (&operands[7], operands[1])"
5374 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5375 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5376 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5377 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5381 if (reload_completed
5382 && register_operand (operands[4], <MODE>mode)
5383 && !rtx_equal_p (operands[0], operands[4]))
5385 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5386 operands[4], operands[1]));
5387 operands[4] = operands[3] = operands[0];
5389 else if (!rtx_equal_p (operands[1], operands[5])
5390 || !rtx_equal_p (operands[1], operands[7]))
5392 operands[5] = copy_rtx (operands[1]);
5393 operands[7] = copy_rtx (operands[1]);
5398 [(set_attr "movprfx" "yes")]
5401 ;; -------------------------------------------------------------------------
5402 ;; ---- [FP] Multiplication
5403 ;; -------------------------------------------------------------------------
5406 ;; -------------------------------------------------------------------------
5408 ;; Predicated floating-point multiplication.
5409 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5410 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, ?&w, ?&w")
5412 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5413 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, i, Ui1")
5414 (match_operand:SVE_FULL_F 2 "register_operand" "%0, w, 0, w, w")
5415 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand" "vsM, w, w, vsM, w")]
5419 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5421 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5422 movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5423 movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5424 ; Split the unpredicated form after reload, so that we don't have
5425 ; the unnecessary PTRUE.
5426 "&& reload_completed
5427 && register_operand (operands[3], <MODE>mode)
5428 && INTVAL (operands[4]) == SVE_RELAXED_GP"
5429 [(set (match_dup 0) (mult:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5431 [(set_attr "movprfx" "*,*,*,yes,yes")]
5434 ;; Merging forms are handled through SVE_COND_FP_BINARY and
5435 ;; SVE_COND_FP_BINARY_I1.
5437 ;; Unpredicated multiplication by selected lanes.
5438 (define_insn "@aarch64_mul_lane_<mode>"
5439 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5442 [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>")
5443 (match_operand:SI 3 "const_int_operand")]
5444 UNSPEC_SVE_LANE_SELECT)
5445 (match_operand:SVE_FULL_F 1 "register_operand" "w")))]
5447 "fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
5450 ;; -------------------------------------------------------------------------
5451 ;; ---- [FP] Binary logical operations
5452 ;; -------------------------------------------------------------------------
5457 ;; -------------------------------------------------------------------------
5459 ;; Binary logical operations on floating-point modes. We avoid subregs
5460 ;; by providing this, but we need to use UNSPECs since rtx logical ops
5461 ;; aren't defined for floating-point modes.
5462 (define_insn "*<optab><mode>3"
5463 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5465 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
5466 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
5469 "<logicalf_op>\t%0.d, %1.d, %2.d"
5472 ;; -------------------------------------------------------------------------
5473 ;; ---- [FP] Sign copying
5474 ;; -------------------------------------------------------------------------
5475 ;; The patterns in this section are synthetic.
5476 ;; -------------------------------------------------------------------------
5478 (define_expand "copysign<mode>3"
5479 [(match_operand:SVE_FULL_F 0 "register_operand")
5480 (match_operand:SVE_FULL_F 1 "register_operand")
5481 (match_operand:SVE_FULL_F 2 "register_operand")]
5484 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
5485 rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
5486 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
5487 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
5489 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
5490 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
5492 emit_insn (gen_and<v_int_equiv>3
5494 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
5497 emit_insn (gen_and<v_int_equiv>3
5499 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
5502 emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
5503 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
5508 (define_expand "xorsign<mode>3"
5509 [(match_operand:SVE_FULL_F 0 "register_operand")
5510 (match_operand:SVE_FULL_F 1 "register_operand")
5511 (match_operand:SVE_FULL_F 2 "register_operand")]
5514 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
5515 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
5516 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
5518 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
5519 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
5521 emit_insn (gen_and<v_int_equiv>3
5523 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
5526 emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
5527 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
5532 ;; -------------------------------------------------------------------------
5533 ;; ---- [FP] Maximum and minimum
5534 ;; -------------------------------------------------------------------------
5540 ;; -------------------------------------------------------------------------
5542 ;; Unpredicated fmax/fmin (the libm functions). The optabs for the
5543 ;; smin/smax rtx codes are handled in the generic section above.
5544 (define_expand "<maxmin_uns><mode>3"
5545 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5548 (const_int SVE_RELAXED_GP)
5549 (match_operand:SVE_FULL_F 1 "register_operand")
5550 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_maxmin_operand")]
5551 SVE_COND_FP_MAXMIN_PUBLIC))]
5554 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5558 ;; Predicated floating-point maximum/minimum.
5559 (define_insn "@aarch64_pred_<optab><mode>"
5560 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w, ?&w")
5562 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5563 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5564 (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, w")
5565 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand" "vsB, w, vsB, w")]
5566 SVE_COND_FP_MAXMIN))]
5569 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5570 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5571 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5572 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5573 [(set_attr "movprfx" "*,*,yes,yes")]
5576 ;; Merging forms are handled through SVE_COND_FP_BINARY and
5577 ;; SVE_COND_FP_BINARY_I1.
5579 ;; -------------------------------------------------------------------------
5580 ;; ---- [PRED] Binary logical operations
5581 ;; -------------------------------------------------------------------------
5589 ;; -------------------------------------------------------------------------
5591 ;; Predicate AND. We can reuse one of the inputs as the GP.
5592 ;; Doubling the second operand is the preferred implementation
5593 ;; of the MOV alias, so we use that instead of %1/z, %1, %2.
5594 (define_insn "and<mode>3"
5595 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5596 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
5597 (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
5599 "and\t%0.b, %1/z, %2.b, %2.b"
5602 ;; Unpredicated predicate EOR and ORR.
5603 (define_expand "<optab><mode>3"
5604 [(set (match_operand:PRED_ALL 0 "register_operand")
5606 (LOGICAL_OR:PRED_ALL
5607 (match_operand:PRED_ALL 1 "register_operand")
5608 (match_operand:PRED_ALL 2 "register_operand"))
5612 operands[3] = aarch64_ptrue_reg (<MODE>mode);
5616 ;; Predicated predicate AND, EOR and ORR.
5617 (define_insn "@aarch64_pred_<optab><mode>_z"
5618 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5621 (match_operand:PRED_ALL 2 "register_operand" "Upa")
5622 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5623 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
5625 "<logical>\t%0.b, %1/z, %2.b, %3.b"
5628 ;; Perform a logical operation on operands 2 and 3, using operand 1 as
5629 ;; the GP. Store the result in operand 0 and set the flags in the same
5630 ;; way as for PTEST.
5631 (define_insn "*<optab><mode>3_cc"
5632 [(set (reg:CC_NZC CC_REGNUM)
5634 [(match_operand:VNx16BI 1 "register_operand" "Upa")
5636 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5639 (match_operand:PRED_ALL 2 "register_operand" "Upa")
5640 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5643 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5644 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
5647 "<logical>s\t%0.b, %1/z, %2.b, %3.b"
5650 ;; Same with just the flags result.
5651 (define_insn "*<optab><mode>3_ptest"
5652 [(set (reg:CC_NZC CC_REGNUM)
5654 [(match_operand:VNx16BI 1 "register_operand" "Upa")
5656 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5659 (match_operand:PRED_ALL 2 "register_operand" "Upa")
5660 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5663 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
5665 "<logical>s\t%0.b, %1/z, %2.b, %3.b"
5668 ;; -------------------------------------------------------------------------
5669 ;; ---- [PRED] Binary logical operations (inverted second input)
5670 ;; -------------------------------------------------------------------------
5674 ;; -------------------------------------------------------------------------
5676 ;; Predicated predicate BIC and ORN.
5677 (define_insn "aarch64_pred_<nlogical><mode>_z"
5678 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5681 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5682 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5683 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
5685 "<nlogical>\t%0.b, %1/z, %2.b, %3.b"
5688 ;; Same, but set the flags as a side-effect.
5689 (define_insn "*<nlogical><mode>3_cc"
5690 [(set (reg:CC_NZC CC_REGNUM)
5692 [(match_operand:VNx16BI 1 "register_operand" "Upa")
5694 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5698 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5699 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5702 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5703 (and:PRED_ALL (NLOGICAL:PRED_ALL
5704 (not:PRED_ALL (match_dup 3))
5708 "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
5711 ;; Same with just the flags result.
5712 (define_insn "*<nlogical><mode>3_ptest"
5713 [(set (reg:CC_NZC CC_REGNUM)
5715 [(match_operand:VNx16BI 1 "register_operand" "Upa")
5717 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5721 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5722 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5725 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
5727 "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
5730 ;; -------------------------------------------------------------------------
5731 ;; ---- [PRED] Binary logical operations (inverted result)
5732 ;; -------------------------------------------------------------------------
5736 ;; -------------------------------------------------------------------------
5738 ;; Predicated predicate NAND and NOR.
5739 (define_insn "aarch64_pred_<logical_nn><mode>_z"
5740 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5743 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5744 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
5745 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
5747 "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
5750 ;; Same, but set the flags as a side-effect.
5751 (define_insn "*<logical_nn><mode>3_cc"
5752 [(set (reg:CC_NZC CC_REGNUM)
5754 [(match_operand:VNx16BI 1 "register_operand" "Upa")
5756 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5760 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5762 (match_operand:PRED_ALL 3 "register_operand" "Upa")))
5765 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5766 (and:PRED_ALL (NLOGICAL:PRED_ALL
5767 (not:PRED_ALL (match_dup 2))
5768 (not:PRED_ALL (match_dup 3)))
5771 "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
5774 ;; Same with just the flags result.
5775 (define_insn "*<logical_nn><mode>3_ptest"
5776 [(set (reg:CC_NZC CC_REGNUM)
5778 [(match_operand:VNx16BI 1 "register_operand" "Upa")
5780 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5784 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5786 (match_operand:PRED_ALL 3 "register_operand" "Upa")))
5789 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
5791 "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
5794 ;; =========================================================================
5795 ;; == Ternary arithmetic
5796 ;; =========================================================================
5798 ;; -------------------------------------------------------------------------
5799 ;; ---- [INT] MLA and MAD
5800 ;; -------------------------------------------------------------------------
5804 ;; -------------------------------------------------------------------------
5806 ;; Unpredicated integer addition of product.
5807 (define_expand "fma<mode>4"
5808 [(set (match_operand:SVE_FULL_I 0 "register_operand")
5813 (match_operand:SVE_FULL_I 1 "register_operand")
5814 (match_operand:SVE_FULL_I 2 "nonmemory_operand"))]
5816 (match_operand:SVE_FULL_I 3 "register_operand")))]
5819 if (aarch64_prepare_sve_int_fma (operands, PLUS))
5821 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
5825 ;; Predicated integer addition of product.
5826 (define_insn "@aarch64_pred_fma<mode>"
5827 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
5830 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5832 (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w")
5833 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))]
5835 (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w")))]
5838 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
5839 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5840 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
5841 [(set_attr "movprfx" "*,*,yes")]
5844 ;; Predicated integer addition of product with merging.
5845 (define_expand "cond_fma<mode>"
5846 [(set (match_operand:SVE_FULL_I 0 "register_operand")
5848 [(match_operand:<VPRED> 1 "register_operand")
5851 (match_operand:SVE_FULL_I 2 "register_operand")
5852 (match_operand:SVE_FULL_I 3 "general_operand"))
5853 (match_operand:SVE_FULL_I 4 "register_operand"))
5854 (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")]
5858 if (aarch64_prepare_sve_cond_int_fma (operands, PLUS))
5860 /* Swap the multiplication operands if the fallback value is the
5861 second of the two. */
5862 if (rtx_equal_p (operands[3], operands[5]))
5863 std::swap (operands[2], operands[3]);
5867 ;; Predicated integer addition of product, merging with the first input.
5868 (define_insn "*cond_fma<mode>_2"
5869 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
5871 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5874 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
5875 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
5876 (match_operand:SVE_FULL_I 4 "register_operand" "w, w"))
5881 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
5882 movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
5883 [(set_attr "movprfx" "*,yes")]
5886 ;; Predicated integer addition of product, merging with the third input.
5887 (define_insn "*cond_fma<mode>_4"
5888 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
5890 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5893 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
5894 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
5895 (match_operand:SVE_FULL_I 4 "register_operand" "0, w"))
5900 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5901 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
5902 [(set_attr "movprfx" "*,yes")]
5905 ;; Predicated integer addition of product, merging with an independent value.
5906 (define_insn_and_rewrite "*cond_fma<mode>_any"
5907 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
5909 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5912 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w")
5913 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w"))
5914 (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w"))
5915 (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
5918 && !rtx_equal_p (operands[2], operands[5])
5919 && !rtx_equal_p (operands[3], operands[5])
5920 && !rtx_equal_p (operands[4], operands[5])"
5922 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5923 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5924 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
5925 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
5926 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5928 "&& reload_completed
5929 && register_operand (operands[5], <MODE>mode)
5930 && !rtx_equal_p (operands[0], operands[5])"
5932 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
5933 operands[5], operands[1]));
5934 operands[5] = operands[4] = operands[0];
5936 [(set_attr "movprfx" "yes")]
5939 ;; -------------------------------------------------------------------------
5940 ;; ---- [INT] MLS and MSB
5941 ;; -------------------------------------------------------------------------
5945 ;; -------------------------------------------------------------------------
5947 ;; Unpredicated integer subtraction of product.
5948 (define_expand "fnma<mode>4"
5949 [(set (match_operand:SVE_FULL_I 0 "register_operand")
5951 (match_operand:SVE_FULL_I 3 "register_operand")
5955 (match_operand:SVE_FULL_I 1 "register_operand")
5956 (match_operand:SVE_FULL_I 2 "general_operand"))]
5960 if (aarch64_prepare_sve_int_fma (operands, MINUS))
5962 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
5966 ;; Predicated integer subtraction of product.
5967 (define_insn "@aarch64_pred_fnma<mode>"
5968 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
5970 (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w")
5972 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5974 (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w")
5975 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))]
5979 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
5980 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5981 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
5982 [(set_attr "movprfx" "*,*,yes")]
5985 ;; Predicated integer subtraction of product with merging.
5986 (define_expand "cond_fnma<mode>"
5987 [(set (match_operand:SVE_FULL_I 0 "register_operand")
5989 [(match_operand:<VPRED> 1 "register_operand")
5991 (match_operand:SVE_FULL_I 4 "register_operand")
5993 (match_operand:SVE_FULL_I 2 "register_operand")
5994 (match_operand:SVE_FULL_I 3 "general_operand")))
5995 (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")]
5999 if (aarch64_prepare_sve_cond_int_fma (operands, MINUS))
6001 /* Swap the multiplication operands if the fallback value is the
6002 second of the two. */
6003 if (rtx_equal_p (operands[3], operands[5]))
6004 std::swap (operands[2], operands[3]);
6008 ;; Predicated integer subtraction of product, merging with the first input.
6009 (define_insn "*cond_fnma<mode>_2"
6010 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
6012 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6014 (match_operand:SVE_FULL_I 4 "register_operand" "w, w")
6016 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
6017 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))
6022 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6023 movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6024 [(set_attr "movprfx" "*,yes")]
6027 ;; Predicated integer subtraction of product, merging with the third input.
6028 (define_insn "*cond_fnma<mode>_4"
6029 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
6031 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6033 (match_operand:SVE_FULL_I 4 "register_operand" "0, w")
6035 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6036 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))
6041 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6042 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6043 [(set_attr "movprfx" "*,yes")]
6046 ;; Predicated integer subtraction of product, merging with an
6047 ;; independent value.
6048 (define_insn_and_rewrite "*cond_fnma<mode>_any"
6049 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6051 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6053 (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w")
6055 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w")
6056 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w")))
6057 (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6060 && !rtx_equal_p (operands[2], operands[5])
6061 && !rtx_equal_p (operands[3], operands[5])
6062 && !rtx_equal_p (operands[4], operands[5])"
6064 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6065 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6066 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6067 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6068 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6070 "&& reload_completed
6071 && register_operand (operands[5], <MODE>mode)
6072 && !rtx_equal_p (operands[0], operands[5])"
6074 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6075 operands[5], operands[1]));
6076 operands[5] = operands[4] = operands[0];
6078 [(set_attr "movprfx" "yes")]
6081 ;; -------------------------------------------------------------------------
6082 ;; ---- [INT] Dot product
6083 ;; -------------------------------------------------------------------------
6087 ;; -------------------------------------------------------------------------
6089 ;; Four-element integer dot-product with accumulation.
6090 (define_insn "<sur>dot_prod<vsi2qi>"
6091 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6093 (unspec:SVE_FULL_SDI
6094 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6095 (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
6097 (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")))]
6100 <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
6101 movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
6102 [(set_attr "movprfx" "*,yes")]
6105 ;; Four-element integer dot-product by selected lanes with accumulation.
6106 (define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
6107 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6109 (unspec:SVE_FULL_SDI
6110 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6112 [(match_operand:<VSI2QI> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
6113 (match_operand:SI 3 "const_int_operand")]
6114 UNSPEC_SVE_LANE_SELECT)]
6116 (match_operand:SVE_FULL_SDI 4 "register_operand" "0, w")))]
6119 <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]
6120 movprfx\t%0, %4\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]"
6121 [(set_attr "movprfx" "*,yes")]
6124 ;; -------------------------------------------------------------------------
6125 ;; ---- [INT] Sum of absolute differences
6126 ;; -------------------------------------------------------------------------
6127 ;; The patterns in this section are synthetic.
6128 ;; -------------------------------------------------------------------------
6130 ;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
6131 ;; operands 1 and 2. The sequence also has to perform a widening reduction of
6132 ;; the difference into a vector and accumulate that into operand 3 before
6133 ;; copying that into the result operand 0.
6134 ;; Perform that with a sequence of:
6136 ;; [SU]ABD diff.b, p0/m, op1.b, op2.b
6137 ;; MOVPRFX op0, op3 // If necessary
6138 ;; UDOT op0.s, diff.b, ones.b
6139 (define_expand "<sur>sad<vsi2qi>"
6140 [(use (match_operand:SVE_FULL_SDI 0 "register_operand"))
6141 (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
6142 (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
6143 (use (match_operand:SVE_FULL_SDI 3 "register_operand"))]
6146 rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
6147 rtx diff = gen_reg_rtx (<VSI2QI>mode);
6148 emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
6149 emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
6154 ;; -------------------------------------------------------------------------
6155 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
6156 ;; -------------------------------------------------------------------------
6157 ;; Includes merging patterns for:
6166 ;; -------------------------------------------------------------------------
6168 ;; Unpredicated floating-point ternary operations.
6169 (define_expand "<optab><mode>4"
6170 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6173 (const_int SVE_RELAXED_GP)
6174 (match_operand:SVE_FULL_F 1 "register_operand")
6175 (match_operand:SVE_FULL_F 2 "register_operand")
6176 (match_operand:SVE_FULL_F 3 "register_operand")]
6177 SVE_COND_FP_TERNARY))]
6180 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6184 ;; Predicated floating-point ternary operations.
6185 (define_insn "@aarch64_pred_<optab><mode>"
6186 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
6188 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
6189 (match_operand:SI 5 "aarch64_sve_gp_strictness")
6190 (match_operand:SVE_FULL_F 2 "register_operand" "%w, 0, w")
6191 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")
6192 (match_operand:SVE_FULL_F 4 "register_operand" "0, w, w")]
6193 SVE_COND_FP_TERNARY))]
6196 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6197 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6198 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6199 [(set_attr "movprfx" "*,*,yes")]
6202 ;; Predicated floating-point ternary operations with merging.
6203 (define_expand "@cond_<optab><mode>"
6204 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6206 [(match_operand:<VPRED> 1 "register_operand")
6209 (const_int SVE_STRICT_GP)
6210 (match_operand:SVE_FULL_F 2 "register_operand")
6211 (match_operand:SVE_FULL_F 3 "register_operand")
6212 (match_operand:SVE_FULL_F 4 "register_operand")]
6213 SVE_COND_FP_TERNARY)
6214 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
6218 /* Swap the multiplication operands if the fallback value is the
6219 second of the two. */
6220 if (rtx_equal_p (operands[3], operands[5]))
6221 std::swap (operands[2], operands[3]);
6224 ;; Predicated floating-point ternary operations, merging with the
6226 (define_insn_and_rewrite "*cond_<optab><mode>_2"
6227 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6229 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6232 (match_operand:SI 6 "aarch64_sve_gp_strictness")
6233 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
6234 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6235 (match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
6236 SVE_COND_FP_TERNARY)
6239 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
6241 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6242 movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6243 "&& !rtx_equal_p (operands[1], operands[5])"
6245 operands[5] = copy_rtx (operands[1]);
6247 [(set_attr "movprfx" "*,yes")]
6250 ;; Predicated floating-point ternary operations, merging with the
6252 (define_insn_and_rewrite "*cond_<optab><mode>_4"
6253 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6255 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6258 (match_operand:SI 6 "aarch64_sve_gp_strictness")
6259 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6260 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6261 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6262 SVE_COND_FP_TERNARY)
6265 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
6267 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6268 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6269 "&& !rtx_equal_p (operands[1], operands[5])"
6271 operands[5] = copy_rtx (operands[1]);
6273 [(set_attr "movprfx" "*,yes")]
6276 ;; Predicated floating-point ternary operations, merging with an
6277 ;; independent value.
6278 (define_insn_and_rewrite "*cond_<optab><mode>_any"
6279 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6281 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6284 (match_operand:SI 7 "aarch64_sve_gp_strictness")
6285 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
6286 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
6287 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
6288 SVE_COND_FP_TERNARY)
6289 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6292 && !rtx_equal_p (operands[2], operands[5])
6293 && !rtx_equal_p (operands[3], operands[5])
6294 && !rtx_equal_p (operands[4], operands[5])
6295 && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
6297 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6298 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6299 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6300 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6301 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6305 if (reload_completed
6306 && register_operand (operands[5], <MODE>mode)
6307 && !rtx_equal_p (operands[0], operands[5]))
6309 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6310 operands[5], operands[1]));
6311 operands[5] = operands[4] = operands[0];
6313 else if (!rtx_equal_p (operands[1], operands[6]))
6314 operands[6] = copy_rtx (operands[1]);
6318 [(set_attr "movprfx" "yes")]
6321 ;; Unpredicated FMLA and FMLS by selected lanes. It doesn't seem worth using
6322 ;; (fma ...) since target-independent code won't understand the indexing.
6323 (define_insn "@aarch64_<optab>_lane_<mode>"
6324 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6326 [(match_operand:SVE_FULL_F 1 "register_operand" "w, w")
6328 [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
6329 (match_operand:SI 3 "const_int_operand")]
6330 UNSPEC_SVE_LANE_SELECT)
6331 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6332 SVE_FP_TERNARY_LANE))]
6335 <sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
6336 movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
6337 [(set_attr "movprfx" "*,yes")]
6340 ;; -------------------------------------------------------------------------
6341 ;; ---- [FP] Complex multiply-add
6342 ;; -------------------------------------------------------------------------
6343 ;; Includes merging patterns for:
6345 ;; -------------------------------------------------------------------------
6347 ;; Predicated FCMLA.
6348 (define_insn "@aarch64_pred_<optab><mode>"
6349 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6351 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6352 (match_operand:SI 5 "aarch64_sve_gp_strictness")
6353 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6354 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6355 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6359 fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6360 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
6361 [(set_attr "movprfx" "*,yes")]
6364 ;; Predicated FCMLA with merging.
6365 (define_expand "@cond_<optab><mode>"
6366 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6368 [(match_operand:<VPRED> 1 "register_operand")
6371 (const_int SVE_STRICT_GP)
6372 (match_operand:SVE_FULL_F 2 "register_operand")
6373 (match_operand:SVE_FULL_F 3 "register_operand")
6374 (match_operand:SVE_FULL_F 4 "register_operand")]
6376 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
6381 ;; Predicated FCMLA, merging with the third input.
6382 (define_insn_and_rewrite "*cond_<optab><mode>_4"
6383 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6385 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6388 (match_operand:SI 6 "aarch64_sve_gp_strictness")
6389 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6390 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6391 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6395 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
6397 fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6398 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
6399 "&& !rtx_equal_p (operands[1], operands[5])"
6401 operands[5] = copy_rtx (operands[1]);
6403 [(set_attr "movprfx" "*,yes")]
6406 ;; Predicated FCMLA, merging with an independent value.
6407 (define_insn_and_rewrite "*cond_<optab><mode>_any"
6408 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
6410 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
6413 (match_operand:SI 7 "aarch64_sve_gp_strictness")
6414 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
6415 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
6416 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
6418 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
6421 && !rtx_equal_p (operands[4], operands[5])
6422 && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
6424 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6425 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6426 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6430 if (reload_completed
6431 && register_operand (operands[5], <MODE>mode)
6432 && !rtx_equal_p (operands[0], operands[5]))
6434 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6435 operands[5], operands[1]));
6436 operands[5] = operands[4] = operands[0];
6438 else if (!rtx_equal_p (operands[1], operands[6]))
6439 operands[6] = copy_rtx (operands[1]);
6443 [(set_attr "movprfx" "yes")]
6446 ;; Unpredicated FCMLA with indexing.
6447 (define_insn "@aarch64_<optab>_lane_<mode>"
6448 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w")
6449 (unspec:SVE_FULL_HSF
6450 [(match_operand:SVE_FULL_HSF 1 "register_operand" "w, w")
6451 (unspec:SVE_FULL_HSF
6452 [(match_operand:SVE_FULL_HSF 2 "register_operand" "<sve_lane_pair_con>, <sve_lane_pair_con>")
6453 (match_operand:SI 3 "const_int_operand")]
6454 UNSPEC_SVE_LANE_SELECT)
6455 (match_operand:SVE_FULL_HSF 4 "register_operand" "0, w")]
6459 fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>
6460 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>"
6461 [(set_attr "movprfx" "*,yes")]
6464 ;; -------------------------------------------------------------------------
6465 ;; ---- [FP] Trigonometric multiply-add
6466 ;; -------------------------------------------------------------------------
6469 ;; -------------------------------------------------------------------------
6471 (define_insn "@aarch64_sve_tmad<mode>"
6472 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6474 [(match_operand:SVE_FULL_F 1 "register_operand" "0, w")
6475 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6476 (match_operand:DI 3 "const_int_operand")]
6480 ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
6481 movprfx\t%0, %1\;ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3"
6482 [(set_attr "movprfx" "*,yes")]
6485 ;; =========================================================================
6486 ;; == Comparisons and selects
6487 ;; =========================================================================
6489 ;; -------------------------------------------------------------------------
6490 ;; ---- [INT,FP] Select based on predicates
6491 ;; -------------------------------------------------------------------------
6492 ;; Includes merging patterns for:
6496 ;; -------------------------------------------------------------------------
6498 ;; vcond_mask operand order: true, false, mask
6499 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
6500 ;; SEL operand order: mask, true, false
6501 (define_expand "@vcond_mask_<mode><vpred>"
6502 [(set (match_operand:SVE_FULL 0 "register_operand")
6504 [(match_operand:<VPRED> 3 "register_operand")
6505 (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm")
6506 (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero")]
6510 if (register_operand (operands[1], <MODE>mode))
6511 operands[2] = force_reg (<MODE>mode, operands[2]);
6517 ;; - a duplicated immediate and a register
6518 ;; - a duplicated immediate and zero
6519 (define_insn "*vcond_mask_<mode><vpred>"
6520 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
6522 [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl")
6523 (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
6524 (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
6527 && (!register_operand (operands[1], <MODE>mode)
6528 || register_operand (operands[2], <MODE>mode))"
6530 sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
6531 mov\t%0.<Vetype>, %3/m, #%I1
6532 mov\t%0.<Vetype>, %3/z, #%I1
6533 fmov\t%0.<Vetype>, %3/m, #%1
6534 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
6535 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
6536 movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1"
6537 [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
6540 ;; Optimize selects between a duplicated scalar variable and another vector,
6541 ;; the latter of which can be a zero constant or a variable. Treat duplicates
6542 ;; of GPRs as being more expensive than duplicates of FPRs, since they
6543 ;; involve a cross-file move.
6544 (define_insn "@aarch64_sel_dup<mode>"
6545 [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w")
6547 [(match_operand:<VPRED> 3 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6548 (vec_duplicate:SVE_FULL
6549 (match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w"))
6550 (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")]
6554 mov\t%0.<Vetype>, %3/m, %<vwcore>1
6555 mov\t%0.<Vetype>, %3/m, %<Vetype>1
6556 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
6557 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
6558 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
6559 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1"
6560 [(set_attr "movprfx" "*,*,yes,yes,yes,yes")]
6563 ;; -------------------------------------------------------------------------
6564 ;; ---- [INT,FP] Compare and select
6565 ;; -------------------------------------------------------------------------
6566 ;; The patterns in this section are synthetic.
6567 ;; -------------------------------------------------------------------------
6569 ;; Integer (signed) vcond. Don't enforce an immediate range here, since it
6570 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
6571 (define_expand "vcond<mode><v_int_equiv>"
6572 [(set (match_operand:SVE_FULL 0 "register_operand")
6573 (if_then_else:SVE_FULL
6574 (match_operator 3 "comparison_operator"
6575 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
6576 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
6577 (match_operand:SVE_FULL 1 "nonmemory_operand")
6578 (match_operand:SVE_FULL 2 "nonmemory_operand")))]
6581 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
6586 ;; Integer vcondu. Don't enforce an immediate range here, since it
6587 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
6588 (define_expand "vcondu<mode><v_int_equiv>"
6589 [(set (match_operand:SVE_FULL 0 "register_operand")
6590 (if_then_else:SVE_FULL
6591 (match_operator 3 "comparison_operator"
6592 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
6593 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
6594 (match_operand:SVE_FULL 1 "nonmemory_operand")
6595 (match_operand:SVE_FULL 2 "nonmemory_operand")))]
6598 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
6603 ;; Floating-point vcond. All comparisons except FCMUO allow a zero operand;
6604 ;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
6605 (define_expand "vcond<mode><v_fp_equiv>"
6606 [(set (match_operand:SVE_FULL_HSD 0 "register_operand")
6607 (if_then_else:SVE_FULL_HSD
6608 (match_operator 3 "comparison_operator"
6609 [(match_operand:<V_FP_EQUIV> 4 "register_operand")
6610 (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
6611 (match_operand:SVE_FULL_HSD 1 "nonmemory_operand")
6612 (match_operand:SVE_FULL_HSD 2 "nonmemory_operand")))]
6615 aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
6620 ;; -------------------------------------------------------------------------
6621 ;; ---- [INT] Comparisons
6622 ;; -------------------------------------------------------------------------
6634 ;; -------------------------------------------------------------------------
6636 ;; Signed integer comparisons. Don't enforce an immediate range here, since
6637 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
6639 (define_expand "vec_cmp<mode><vpred>"
6641 [(set (match_operand:<VPRED> 0 "register_operand")
6642 (match_operator:<VPRED> 1 "comparison_operator"
6643 [(match_operand:SVE_FULL_I 2 "register_operand")
6644 (match_operand:SVE_FULL_I 3 "nonmemory_operand")]))
6645 (clobber (reg:CC_NZC CC_REGNUM))])]
6648 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
6649 operands[2], operands[3]);
6654 ;; Unsigned integer comparisons. Don't enforce an immediate range here, since
6655 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
6657 (define_expand "vec_cmpu<mode><vpred>"
6659 [(set (match_operand:<VPRED> 0 "register_operand")
6660 (match_operator:<VPRED> 1 "comparison_operator"
6661 [(match_operand:SVE_FULL_I 2 "register_operand")
6662 (match_operand:SVE_FULL_I 3 "nonmemory_operand")]))
6663 (clobber (reg:CC_NZC CC_REGNUM))])]
6666 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
6667 operands[2], operands[3]);
6672 ;; Predicated integer comparisons.
6673 (define_insn "@aarch64_pred_cmp<cmp_op><mode>"
6674 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6676 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6677 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
6678 (SVE_INT_CMP:<VPRED>
6679 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")
6680 (match_operand:SVE_FULL_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6682 (clobber (reg:CC_NZC CC_REGNUM))]
6685 cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
6686 cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
6689 ;; Predicated integer comparisons in which both the flag and predicate
6690 ;; results are interesting.
6691 (define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
6692 [(set (reg:CC_NZC CC_REGNUM)
6694 [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
6696 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6699 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
6700 (SVE_INT_CMP:<VPRED>
6701 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6702 (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6705 (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6709 (SVE_INT_CMP:<VPRED>
6714 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
6716 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
6717 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
6718 "&& !rtx_equal_p (operands[4], operands[6])"
6720 operands[6] = copy_rtx (operands[4]);
6721 operands[7] = operands[5];
6725 ;; Predicated integer comparisons in which only the flags result is
6727 (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
6728 [(set (reg:CC_NZC CC_REGNUM)
6730 [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
6732 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6735 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
6736 (SVE_INT_CMP:<VPRED>
6737 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6738 (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6741 (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
6743 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
6745 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
6746 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
6747 "&& !rtx_equal_p (operands[4], operands[6])"
6749 operands[6] = copy_rtx (operands[4]);
6750 operands[7] = operands[5];
6754 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated
6755 ;; comparison with an AND. Split the instruction into its preferred form
6756 ;; at the earliest opportunity, in order to get rid of the redundant
6758 (define_insn_and_split "*cmp<cmp_op><mode>_and"
6759 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6763 (const_int SVE_KNOWN_PTRUE)
6764 (SVE_INT_CMP:<VPRED>
6765 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6766 (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6768 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
6769 (clobber (reg:CC_NZC CC_REGNUM))]
6777 (const_int SVE_MAYBE_NOT_PTRUE)
6778 (SVE_INT_CMP:<VPRED>
6782 (clobber (reg:CC_NZC CC_REGNUM))])]
6785 ;; Predicated integer wide comparisons.
6786 (define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide"
6787 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
6789 [(match_operand:VNx16BI 1 "register_operand" "Upl")
6790 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
6792 [(match_operand:SVE_FULL_BHSI 3 "register_operand" "w")
6793 (match_operand:VNx2DI 4 "register_operand" "w")]
6794 SVE_COND_INT_CMP_WIDE)]
6796 (clobber (reg:CC_NZC CC_REGNUM))]
6798 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d"
6801 ;; Predicated integer wide comparisons in which both the flag and
6802 ;; predicate results are interesting.
6803 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
6804 [(set (reg:CC_NZC CC_REGNUM)
6806 [(match_operand:VNx16BI 1 "register_operand" "Upl")
6808 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6810 [(match_operand:VNx16BI 6 "register_operand" "Upl")
6811 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
6813 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
6814 (match_operand:VNx2DI 3 "register_operand" "w")]
6815 SVE_COND_INT_CMP_WIDE)]
6818 (set (match_operand:<VPRED> 0 "register_operand" "=Upa")
6825 SVE_COND_INT_CMP_WIDE)]
6828 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
6829 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
6832 ;; Predicated integer wide comparisons in which only the flags result
6834 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
6835 [(set (reg:CC_NZC CC_REGNUM)
6837 [(match_operand:VNx16BI 1 "register_operand" "Upl")
6839 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6841 [(match_operand:VNx16BI 6 "register_operand" "Upl")
6842 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
6844 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
6845 (match_operand:VNx2DI 3 "register_operand" "w")]
6846 SVE_COND_INT_CMP_WIDE)]
6849 (clobber (match_scratch:<VPRED> 0 "=Upa"))]
6851 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
6852 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
6855 ;; -------------------------------------------------------------------------
6856 ;; ---- [INT] While tests
6857 ;; -------------------------------------------------------------------------
6869 ;; -------------------------------------------------------------------------
6871 ;; Set element I of the result if (cmp (plus operand1 J) operand2) is
6872 ;; true for all J in [0, I].
6873 (define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>"
6874 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6875 (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
6876 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
6878 (clobber (reg:CC_NZC CC_REGNUM))]
6880 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
6883 ;; The WHILE instructions set the flags in the same way as a PTEST with
6884 ;; a PTRUE GP. Handle the case in which both results are useful. The GP
6885 ;; operands to the PTEST aren't needed, so we allow them to be anything.
6886 (define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_cc"
6887 [(set (reg:CC_NZC CC_REGNUM)
6891 (const_int SVE_KNOWN_PTRUE)
6893 [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
6894 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
6897 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6898 (unspec:PRED_ALL [(match_dup 1)
6902 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
6903 ;; Force the compiler to drop the unused predicate operand, so that we
6904 ;; don't have an unnecessary PTRUE.
6905 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
6907 operands[3] = CONSTM1_RTX (VNx16BImode);
6908 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
6912 ;; Same, but handle the case in which only the flags result is useful.
6913 (define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest"
6914 [(set (reg:CC_NZC CC_REGNUM)
6918 (const_int SVE_KNOWN_PTRUE)
6920 [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
6921 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
6924 (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
6926 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
6927 ;; Force the compiler to drop the unused predicate operand, so that we
6928 ;; don't have an unnecessary PTRUE.
6929 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
6931 operands[3] = CONSTM1_RTX (VNx16BImode);
6932 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
6936 ;; -------------------------------------------------------------------------
6937 ;; ---- [FP] Direct comparisons
6938 ;; -------------------------------------------------------------------------
6947 ;; -------------------------------------------------------------------------
6949 ;; Floating-point comparisons. All comparisons except FCMUO allow a zero
6950 ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
6952 (define_expand "vec_cmp<mode><vpred>"
6953 [(set (match_operand:<VPRED> 0 "register_operand")
6954 (match_operator:<VPRED> 1 "comparison_operator"
6955 [(match_operand:SVE_FULL_F 2 "register_operand")
6956 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))]
6959 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
6960 operands[2], operands[3], false);
6965 ;; Predicated floating-point comparisons.
6966 (define_insn "@aarch64_pred_fcm<cmp_op><mode>"
6967 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6969 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6970 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
6971 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6972 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, w")]
6973 SVE_COND_FP_CMP_I0))]
6976 fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0
6977 fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
6980 ;; Same for unordered comparisons.
6981 (define_insn "@aarch64_pred_fcmuo<mode>"
6982 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
6984 [(match_operand:<VPRED> 1 "register_operand" "Upl")
6985 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
6986 (match_operand:SVE_FULL_F 3 "register_operand" "w")
6987 (match_operand:SVE_FULL_F 4 "register_operand" "w")]
6988 UNSPEC_COND_FCMUO))]
6990 "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
6993 ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
6994 ;; with another predicate P. This does not have the same trapping behavior
6995 ;; as predicating the comparison itself on P, but it's a legitimate fold,
6996 ;; since we can drop any potentially-trapping operations whose results
6999 ;; Split the instruction into its preferred form (below) at the earliest
7000 ;; opportunity, in order to get rid of the redundant operand 1.
7001 (define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
7002 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7005 [(match_operand:<VPRED> 1)
7006 (const_int SVE_KNOWN_PTRUE)
7007 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7008 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
7010 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
7017 (const_int SVE_MAYBE_NOT_PTRUE)
7020 SVE_COND_FP_CMP_I0))]
7023 ;; Same for unordered comparisons.
7024 (define_insn_and_split "*fcmuo<mode>_and_combine"
7025 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7028 [(match_operand:<VPRED> 1)
7029 (const_int SVE_KNOWN_PTRUE)
7030 (match_operand:SVE_FULL_F 2 "register_operand" "w")
7031 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
7033 (match_operand:<VPRED> 4 "register_operand" "Upl")))]
7040 (const_int SVE_MAYBE_NOT_PTRUE)
7043 UNSPEC_COND_FCMUO))]
7046 ;; -------------------------------------------------------------------------
7047 ;; ---- [FP] Absolute comparisons
7048 ;; -------------------------------------------------------------------------
7054 ;; -------------------------------------------------------------------------
7056 ;; Predicated floating-point absolute comparisons.
7057 (define_expand "@aarch64_pred_fac<cmp_op><mode>"
7058 [(set (match_operand:<VPRED> 0 "register_operand")
7060 [(match_operand:<VPRED> 1 "register_operand")
7061 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7065 (match_operand:SVE_FULL_F 3 "register_operand")]
7070 (match_operand:SVE_FULL_F 4 "register_operand")]
7072 SVE_COND_FP_ABS_CMP))]
7076 (define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>"
7077 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7079 [(match_operand:<VPRED> 1 "register_operand" "Upl")
7080 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
7083 (match_operand:SI 6 "aarch64_sve_gp_strictness")
7084 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7088 (match_operand:SI 8 "aarch64_sve_gp_strictness")
7089 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
7091 SVE_COND_FP_ABS_CMP))]
7093 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])
7094 && aarch64_sve_pred_dominates_p (&operands[7], operands[1])"
7095 "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
7096 "&& (!rtx_equal_p (operands[1], operands[5])
7097 || !rtx_equal_p (operands[1], operands[7]))"
7099 operands[5] = copy_rtx (operands[1]);
7100 operands[7] = copy_rtx (operands[1]);
7104 ;; -------------------------------------------------------------------------
7105 ;; ---- [PRED] Select
7106 ;; -------------------------------------------------------------------------
7109 ;; -------------------------------------------------------------------------
7111 (define_insn "@vcond_mask_<mode><mode>"
7112 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7115 (match_operand:PRED_ALL 3 "register_operand" "Upa")
7116 (match_operand:PRED_ALL 1 "register_operand" "Upa"))
7119 (match_operand:PRED_ALL 2 "register_operand" "Upa"))))]
7121 "sel\t%0.b, %3, %1.b, %2.b"
7124 ;; -------------------------------------------------------------------------
7125 ;; ---- [PRED] Test bits
7126 ;; -------------------------------------------------------------------------
7129 ;; -------------------------------------------------------------------------
7131 ;; Branch based on predicate equality or inequality.
7132 (define_expand "cbranch<mode>4"
7135 (match_operator 0 "aarch64_equality_operator"
7136 [(match_operand:PRED_ALL 1 "register_operand")
7137 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
7138 (label_ref (match_operand 3 ""))
7142 rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
7143 rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
7144 rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
7146 if (operands[2] == CONST0_RTX (<MODE>mode))
7150 pred = gen_reg_rtx (<MODE>mode);
7151 emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
7154 emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
7155 operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
7156 operands[2] = const0_rtx;
7160 ;; See "Description of UNSPEC_PTEST" above for details.
7161 (define_insn "aarch64_ptest<mode>"
7162 [(set (reg:CC_NZC CC_REGNUM)
7163 (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
7165 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7166 (match_operand:PRED_ALL 3 "register_operand" "Upa")]
7172 ;; =========================================================================
7174 ;; =========================================================================
7176 ;; -------------------------------------------------------------------------
7177 ;; ---- [INT,FP] Conditional reductions
7178 ;; -------------------------------------------------------------------------
7182 ;; -------------------------------------------------------------------------
7184 ;; Set operand 0 to the last active element in operand 3, or to tied
7185 ;; operand 1 if no elements are active.
7186 (define_insn "@fold_extract_<last_op>_<mode>"
7187 [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
7189 [(match_operand:<VEL> 1 "register_operand" "0, 0")
7190 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
7191 (match_operand:SVE_FULL 3 "register_operand" "w, w")]
7195 clast<ab>\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
7196 clast<ab>\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
7199 (define_insn "@aarch64_fold_extract_vector_<last_op>_<mode>"
7200 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
7202 [(match_operand:SVE_FULL 1 "register_operand" "0, w")
7203 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
7204 (match_operand:SVE_FULL 3 "register_operand" "w, w")]
7208 clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
7209 movprfx\t%0, %1\;clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>"
7212 ;; -------------------------------------------------------------------------
7213 ;; ---- [INT] Tree reductions
7214 ;; -------------------------------------------------------------------------
7225 ;; -------------------------------------------------------------------------
7227 ;; Unpredicated integer add reduction.
7228 (define_expand "reduc_plus_scal_<mode>"
7229 [(match_operand:<VEL> 0 "register_operand")
7230 (match_operand:SVE_FULL_I 1 "register_operand")]
7233 rtx pred = aarch64_ptrue_reg (<VPRED>mode);
7234 rtx tmp = <VEL>mode == DImode ? operands[0] : gen_reg_rtx (DImode);
7235 emit_insn (gen_aarch64_pred_reduc_uadd_<mode> (tmp, pred, operands[1]));
7236 if (tmp != operands[0])
7237 emit_move_insn (operands[0], gen_lowpart (<VEL>mode, tmp));
7242 ;; Predicated integer add reduction. The result is always 64-bits.
7243 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
7244 [(set (match_operand:DI 0 "register_operand" "=w")
7245 (unspec:DI [(match_operand:<VPRED> 1 "register_operand" "Upl")
7246 (match_operand:SVE_FULL_I 2 "register_operand" "w")]
7248 "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
7249 "<su>addv\t%d0, %1, %2.<Vetype>"
7252 ;; Unpredicated integer reductions.
7253 (define_expand "reduc_<optab>_scal_<mode>"
7254 [(set (match_operand:<VEL> 0 "register_operand")
7255 (unspec:<VEL> [(match_dup 2)
7256 (match_operand:SVE_FULL_I 1 "register_operand")]
7257 SVE_INT_REDUCTION))]
7260 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7264 ;; Predicated integer reductions.
7265 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
7266 [(set (match_operand:<VEL> 0 "register_operand" "=w")
7267 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
7268 (match_operand:SVE_FULL_I 2 "register_operand" "w")]
7269 SVE_INT_REDUCTION))]
7271 "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>"
7274 ;; -------------------------------------------------------------------------
7275 ;; ---- [FP] Tree reductions
7276 ;; -------------------------------------------------------------------------
7283 ;; -------------------------------------------------------------------------
7285 ;; Unpredicated floating-point tree reductions.
7286 (define_expand "reduc_<optab>_scal_<mode>"
7287 [(set (match_operand:<VEL> 0 "register_operand")
7288 (unspec:<VEL> [(match_dup 2)
7289 (match_operand:SVE_FULL_F 1 "register_operand")]
7293 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7297 ;; Predicated floating-point tree reductions.
7298 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
7299 [(set (match_operand:<VEL> 0 "register_operand" "=w")
7300 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
7301 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7304 "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>"
7307 ;; -------------------------------------------------------------------------
7308 ;; ---- [FP] Left-to-right reductions
7309 ;; -------------------------------------------------------------------------
7312 ;; -------------------------------------------------------------------------
7314 ;; Unpredicated in-order FP reductions.
7315 (define_expand "fold_left_plus_<mode>"
7316 [(set (match_operand:<VEL> 0 "register_operand")
7317 (unspec:<VEL> [(match_dup 3)
7318 (match_operand:<VEL> 1 "register_operand")
7319 (match_operand:SVE_FULL_F 2 "register_operand")]
7323 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
7327 ;; Predicated in-order FP reductions.
7328 (define_insn "mask_fold_left_plus_<mode>"
7329 [(set (match_operand:<VEL> 0 "register_operand" "=w")
7330 (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
7331 (match_operand:<VEL> 1 "register_operand" "0")
7332 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7335 "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
7338 ;; =========================================================================
7340 ;; =========================================================================
7342 ;; -------------------------------------------------------------------------
7343 ;; ---- [INT,FP] General permutes
7344 ;; -------------------------------------------------------------------------
7347 ;; -------------------------------------------------------------------------
7349 (define_expand "vec_perm<mode>"
7350 [(match_operand:SVE_FULL 0 "register_operand")
7351 (match_operand:SVE_FULL 1 "register_operand")
7352 (match_operand:SVE_FULL 2 "register_operand")
7353 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
7354 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
7356 aarch64_expand_sve_vec_perm (operands[0], operands[1],
7357 operands[2], operands[3]);
7362 (define_insn "@aarch64_sve_tbl<mode>"
7363 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7365 [(match_operand:SVE_FULL 1 "register_operand" "w")
7366 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
7369 "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7372 ;; -------------------------------------------------------------------------
7373 ;; ---- [INT,FP] Special-purpose unary permutes
7374 ;; -------------------------------------------------------------------------
7379 ;; -------------------------------------------------------------------------
7381 ;; Compact active elements and pad with zeros.
7382 (define_insn "@aarch64_sve_compact<mode>"
7383 [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w")
7385 [(match_operand:<VPRED> 1 "register_operand" "Upl")
7386 (match_operand:SVE_FULL_SD 2 "register_operand" "w")]
7387 UNSPEC_SVE_COMPACT))]
7389 "compact\t%0.<Vetype>, %1, %2.<Vetype>"
7392 ;; Duplicate one element of a vector.
7393 (define_insn "@aarch64_sve_dup_lane<mode>"
7394 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7395 (vec_duplicate:SVE_FULL
7397 (match_operand:SVE_FULL 1 "register_operand" "w")
7398 (parallel [(match_operand:SI 2 "const_int_operand")]))))]
7400 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
7401 "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
7404 ;; Use DUP.Q to duplicate a 128-bit segment of a register.
7406 ;; The vec_select:<V128> sets memory lane number N of the V128 to lane
7407 ;; number op2 + N of op1. (We don't need to distinguish between memory
7408 ;; and architectural register lane numbering for op1 or op0, since the
7409 ;; two numbering schemes are the same for SVE.)
7411 ;; The vec_duplicate:SVE_FULL then copies memory lane number N of the
7412 ;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP
7413 ;; of op0. We therefore get the correct result for both endiannesses.
7415 ;; The wrinkle is that for big-endian V128 registers, memory lane numbering
7416 ;; is in the opposite order to architectural register lane numbering.
7417 ;; Thus if we were to do this operation via a V128 temporary register,
7418 ;; the vec_select and vec_duplicate would both involve a reverse operation
7419 ;; for big-endian targets. In this fused pattern the two reverses cancel
7421 (define_insn "@aarch64_sve_dupq_lane<mode>"
7422 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7423 (vec_duplicate:SVE_FULL
7425 (match_operand:SVE_FULL 1 "register_operand" "w")
7426 (match_operand 2 "ascending_int_parallel"))))]
7428 && (INTVAL (XVECEXP (operands[2], 0, 0))
7429 * GET_MODE_SIZE (<VEL>mode)) % 16 == 0
7430 && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0))
7431 * GET_MODE_SIZE (<VEL>mode), 0, 63)"
7433 unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0))
7434 * GET_MODE_SIZE (<VEL>mode));
7435 operands[2] = gen_int_mode (byte / 16, DImode);
7436 return "dup\t%0.q, %1.q[%2]";
7440 ;; Reverse the order of elements within a full vector.
7441 (define_insn "@aarch64_sve_rev<mode>"
7442 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7444 [(match_operand:SVE_FULL 1 "register_operand" "w")]
7447 "rev\t%0.<Vetype>, %1.<Vetype>")
7449 ;; -------------------------------------------------------------------------
7450 ;; ---- [INT,FP] Special-purpose binary permutes
7451 ;; -------------------------------------------------------------------------
7460 ;; -------------------------------------------------------------------------
7462 ;; Like EXT, but start at the first active element.
7463 (define_insn "@aarch64_sve_splice<mode>"
7464 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
7466 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7467 (match_operand:SVE_FULL 2 "register_operand" "0, w")
7468 (match_operand:SVE_FULL 3 "register_operand" "w, w")]
7469 UNSPEC_SVE_SPLICE))]
7472 splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>
7473 movprfx\t%0, %2\;splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>"
7474 [(set_attr "movprfx" "*, yes")]
7477 ;; Permutes that take half the elements from one vector and half the
7478 ;; elements from the other.
7479 (define_insn "@aarch64_sve_<perm_insn><mode>"
7480 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7482 [(match_operand:SVE_FULL 1 "register_operand" "w")
7483 (match_operand:SVE_FULL 2 "register_operand" "w")]
7486 "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7489 ;; Concatenate two vectors and extract a subvector. Note that the
7490 ;; immediate (third) operand is the lane index not the byte index.
7491 (define_insn "@aarch64_sve_ext<mode>"
7492 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
7494 [(match_operand:SVE_FULL 1 "register_operand" "0, w")
7495 (match_operand:SVE_FULL 2 "register_operand" "w, w")
7496 (match_operand:SI 3 "const_int_operand")]
7499 && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
7501 operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
7502 return (which_alternative == 0
7503 ? "ext\\t%0.b, %0.b, %2.b, #%3"
7504 : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3");
7506 [(set_attr "movprfx" "*,yes")]
7509 ;; -------------------------------------------------------------------------
7510 ;; ---- [PRED] Special-purpose unary permutes
7511 ;; -------------------------------------------------------------------------
7514 ;; -------------------------------------------------------------------------
7516 (define_insn "@aarch64_sve_rev<mode>"
7517 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7518 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")]
7521 "rev\t%0.<Vetype>, %1.<Vetype>")
7523 ;; -------------------------------------------------------------------------
7524 ;; ---- [PRED] Special-purpose binary permutes
7525 ;; -------------------------------------------------------------------------
7533 ;; -------------------------------------------------------------------------
7535 ;; Permutes that take half the elements from one vector and half the
7536 ;; elements from the other.
7537 (define_insn "@aarch64_sve_<perm_insn><mode>"
7538 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7539 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
7540 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
7543 "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7546 ;; =========================================================================
7548 ;; =========================================================================
7550 ;; -------------------------------------------------------------------------
7551 ;; ---- [INT<-INT] Packs
7552 ;; -------------------------------------------------------------------------
7555 ;; -------------------------------------------------------------------------
7557 ;; Integer pack. Use UZP1 on the narrower type, which discards
7558 ;; the high part of each wide element.
7559 (define_insn "vec_pack_trunc_<Vwide>"
7560 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
7561 (unspec:SVE_FULL_BHSI
7562 [(match_operand:<VWIDE> 1 "register_operand" "w")
7563 (match_operand:<VWIDE> 2 "register_operand" "w")]
7566 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7569 ;; -------------------------------------------------------------------------
7570 ;; ---- [INT<-INT] Unpacks
7571 ;; -------------------------------------------------------------------------
7577 ;; -------------------------------------------------------------------------
7579 ;; Unpack the low or high half of a vector, where "high" refers to
7580 ;; the low-numbered lanes for big-endian and the high-numbered lanes
7581 ;; for little-endian.
7582 (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_FULL_BHSI:mode>"
7583 [(match_operand:<VWIDE> 0 "register_operand")
7585 [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)]
7588 emit_insn ((<hi_lanes_optab>
7589 ? gen_aarch64_sve_<su>unpkhi_<SVE_FULL_BHSI:mode>
7590 : gen_aarch64_sve_<su>unpklo_<SVE_FULL_BHSI:mode>)
7591 (operands[0], operands[1]));
7596 (define_insn "@aarch64_sve_<su>unpk<perm_hilo>_<SVE_FULL_BHSI:mode>"
7597 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
7599 [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")]
7602 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
7605 ;; -------------------------------------------------------------------------
7606 ;; ---- [INT<-FP] Conversions
7607 ;; -------------------------------------------------------------------------
7611 ;; -------------------------------------------------------------------------
7613 ;; Unpredicated conversion of floats to integers of the same size (HF to HI,
7614 ;; SF to SI or DF to DI).
7615 (define_expand "<optab><mode><v_int_equiv>2"
7616 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
7617 (unspec:<V_INT_EQUIV>
7619 (const_int SVE_RELAXED_GP)
7620 (match_operand:SVE_FULL_F 1 "register_operand")]
7624 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7628 ;; Predicated float-to-integer conversion, either to the same width or wider.
7629 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
7630 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
7631 (unspec:SVE_FULL_HSDI
7632 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
7633 (match_operand:SI 3 "aarch64_sve_gp_strictness")
7634 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7636 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
7637 "fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
7640 ;; Predicated narrowing float-to-integer conversion.
7641 (define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
7642 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w")
7644 [(match_operand:VNx2BI 1 "register_operand" "Upl")
7645 (match_operand:SI 3 "aarch64_sve_gp_strictness")
7646 (match_operand:VNx2DF_ONLY 2 "register_operand" "w")]
7649 "fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
7652 ;; Predicated float-to-integer conversion with merging, either to the same
7654 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
7655 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
7656 (unspec:SVE_FULL_HSDI
7657 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
7658 (unspec:SVE_FULL_HSDI
7660 (const_int SVE_STRICT_GP)
7661 (match_operand:SVE_FULL_F 2 "register_operand")]
7663 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
7665 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
7668 ;; The first alternative doesn't need the earlyclobber, but the only case
7669 ;; it would help is the uninteresting one in which operands 2 and 3 are
7670 ;; the same register (despite having different modes). Making all the
7671 ;; alternatives earlyclobber makes things more consistent for the
7672 ;; register allocator.
7673 (define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
7674 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
7675 (unspec:SVE_FULL_HSDI
7676 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
7677 (unspec:SVE_FULL_HSDI
7679 (match_operand:SI 5 "aarch64_sve_gp_strictness")
7680 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
7682 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
7685 && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>
7686 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
7688 fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
7689 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
7690 movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
7691 "&& !rtx_equal_p (operands[1], operands[4])"
7693 operands[4] = copy_rtx (operands[1]);
7695 [(set_attr "movprfx" "*,yes,yes")]
7698 ;; Predicated narrowing float-to-integer conversion with merging.
7699 (define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
7700 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
7702 [(match_operand:VNx2BI 1 "register_operand")
7705 (const_int SVE_STRICT_GP)
7706 (match_operand:VNx2DF_ONLY 2 "register_operand")]
7708 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
7713 (define_insn "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
7714 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=&w, &w, ?&w")
7716 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
7719 (match_operand:SI 4 "aarch64_sve_gp_strictness")
7720 (match_operand:VNx2DF_ONLY 2 "register_operand" "w, w, w")]
7722 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
7726 fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
7727 movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
7728 movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
7729 [(set_attr "movprfx" "*,yes,yes")]
7732 ;; -------------------------------------------------------------------------
7733 ;; ---- [INT<-FP] Packs
7734 ;; -------------------------------------------------------------------------
7735 ;; The patterns in this section are synthetic.
7736 ;; -------------------------------------------------------------------------
7738 ;; Convert two vectors of DF to SI and pack the results into a single vector.
7739 (define_expand "vec_pack_<su>fix_trunc_vnx2df"
7743 (const_int SVE_RELAXED_GP)
7744 (match_operand:VNx2DF 1 "register_operand")]
7749 (const_int SVE_RELAXED_GP)
7750 (match_operand:VNx2DF 2 "register_operand")]
7752 (set (match_operand:VNx4SI 0 "register_operand")
7753 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
7756 operands[3] = aarch64_ptrue_reg (VNx2BImode);
7757 operands[4] = gen_reg_rtx (VNx4SImode);
7758 operands[5] = gen_reg_rtx (VNx4SImode);
7762 ;; -------------------------------------------------------------------------
7763 ;; ---- [INT<-FP] Unpacks
7764 ;; -------------------------------------------------------------------------
7765 ;; No patterns here yet!
7766 ;; -------------------------------------------------------------------------
7768 ;; -------------------------------------------------------------------------
7769 ;; ---- [FP<-INT] Conversions
7770 ;; -------------------------------------------------------------------------
7774 ;; -------------------------------------------------------------------------
7776 ;; Unpredicated conversion of integers to floats of the same size
7777 ;; (HI to HF, SI to SF or DI to DF).
7778 (define_expand "<optab><v_int_equiv><mode>2"
7779 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7782 (const_int SVE_RELAXED_GP)
7783 (match_operand:<V_INT_EQUIV> 1 "register_operand")]
7787 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7791 ;; Predicated integer-to-float conversion, either to the same width or
7793 (define_insn "@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
7794 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
7796 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
7797 (match_operand:SI 3 "aarch64_sve_gp_strictness")
7798 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
7800 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
7801 "<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
7804 ;; Predicated widening integer-to-float conversion.
7805 (define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
7806 [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w")
7808 [(match_operand:VNx2BI 1 "register_operand" "Upl")
7809 (match_operand:SI 3 "aarch64_sve_gp_strictness")
7810 (match_operand:VNx4SI_ONLY 2 "register_operand" "w")]
7813 "<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
7816 ;; Predicated integer-to-float conversion with merging, either to the same
7817 ;; width or narrower.
7818 (define_expand "@cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
7819 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7821 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
7824 (const_int SVE_STRICT_GP)
7825 (match_operand:SVE_FULL_HSDI 2 "register_operand")]
7827 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
7829 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
7832 ;; The first alternative doesn't need the earlyclobber, but the only case
7833 ;; it would help is the uninteresting one in which operands 2 and 3 are
7834 ;; the same register (despite having different modes). Making all the
7835 ;; alternatives earlyclobber makes things more consistent for the
7836 ;; register allocator.
7837 (define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
7838 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
7840 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
7843 (match_operand:SI 5 "aarch64_sve_gp_strictness")
7844 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
7846 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
7849 && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>
7850 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
7852 <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
7853 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
7854 movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
7855 "&& !rtx_equal_p (operands[1], operands[4])"
7857 operands[4] = copy_rtx (operands[1]);
7859 [(set_attr "movprfx" "*,yes,yes")]
7862 ;; Predicated widening integer-to-float conversion with merging.
7863 (define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
7864 [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
7866 [(match_operand:VNx2BI 1 "register_operand")
7869 (const_int SVE_STRICT_GP)
7870 (match_operand:VNx4SI_ONLY 2 "register_operand")]
7872 (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")]
7877 (define_insn "*cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
7878 [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
7880 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
7883 (match_operand:SI 4 "aarch64_sve_gp_strictness")
7884 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")]
7886 (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
7890 <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
7891 movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
7892 movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
7893 [(set_attr "movprfx" "*,yes,yes")]
7896 ;; -------------------------------------------------------------------------
7897 ;; ---- [FP<-INT] Packs
7898 ;; -------------------------------------------------------------------------
7899 ;; No patterns here yet!
7900 ;; -------------------------------------------------------------------------
7902 ;; -------------------------------------------------------------------------
7903 ;; ---- [FP<-INT] Unpacks
7904 ;; -------------------------------------------------------------------------
7905 ;; The patterns in this section are synthetic.
7906 ;; -------------------------------------------------------------------------
7908 ;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI
7909 ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
7910 ;; unpacked VNx4SI to VNx2DF.
7911 (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
7912 [(match_operand:VNx2DF 0 "register_operand")
7914 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
7918 /* Use ZIP to do the unpack, since we don't care about the upper halves
7919 and since it has the nice property of not needing any subregs.
7920 If using UUNPK* turns out to be preferable, we could model it as
7921 a ZIP whose first operand is zero. */
7922 rtx temp = gen_reg_rtx (VNx4SImode);
7923 emit_insn ((<hi_lanes_optab>
7924 ? gen_aarch64_sve_zip2vnx4si
7925 : gen_aarch64_sve_zip1vnx4si)
7926 (temp, operands[1], operands[1]));
7927 rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
7928 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
7929 emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df
7930 (operands[0], ptrue, temp, strictness));
7935 ;; -------------------------------------------------------------------------
7936 ;; ---- [FP<-FP] Packs
7937 ;; -------------------------------------------------------------------------
7940 ;; -------------------------------------------------------------------------
7942 ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
7943 ;; the results into a single vector.
7944 (define_expand "vec_pack_trunc_<Vwide>"
7946 (unspec:SVE_FULL_HSF
7948 (const_int SVE_RELAXED_GP)
7949 (match_operand:<VWIDE> 1 "register_operand")]
7952 (unspec:SVE_FULL_HSF
7954 (const_int SVE_RELAXED_GP)
7955 (match_operand:<VWIDE> 2 "register_operand")]
7957 (set (match_operand:SVE_FULL_HSF 0 "register_operand")
7958 (unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
7961 operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
7962 operands[4] = gen_reg_rtx (<MODE>mode);
7963 operands[5] = gen_reg_rtx (<MODE>mode);
7967 ;; Predicated float-to-float truncation.
7968 (define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
7969 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
7970 (unspec:SVE_FULL_HSF
7971 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl")
7972 (match_operand:SI 3 "aarch64_sve_gp_strictness")
7973 (match_operand:SVE_FULL_SDF 2 "register_operand" "w")]
7975 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
7976 "fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
7979 ;; Predicated float-to-float truncation with merging.
7980 (define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
7981 [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
7982 (unspec:SVE_FULL_HSF
7983 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
7984 (unspec:SVE_FULL_HSF
7986 (const_int SVE_STRICT_GP)
7987 (match_operand:SVE_FULL_SDF 2 "register_operand")]
7989 (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")]
7991 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
7994 (define_insn "*cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
7995 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w, ?&w")
7996 (unspec:SVE_FULL_HSF
7997 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
7998 (unspec:SVE_FULL_HSF
8000 (match_operand:SI 4 "aarch64_sve_gp_strictness")
8001 (match_operand:SVE_FULL_SDF 2 "register_operand" "w, w, w")]
8003 (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8005 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8007 fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
8008 movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
8009 movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
8010 [(set_attr "movprfx" "*,yes,yes")]
8013 ;; -------------------------------------------------------------------------
8014 ;; ---- [FP<-FP] Unpacks
8015 ;; -------------------------------------------------------------------------
8018 ;; -------------------------------------------------------------------------
8020 ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
8021 ;; First unpack the source without conversion, then float-convert the
8023 (define_expand "vec_unpacks_<perm_hilo>_<mode>"
8024 [(match_operand:<VWIDE> 0 "register_operand")
8025 (unspec:SVE_FULL_HSF
8026 [(match_operand:SVE_FULL_HSF 1 "register_operand")]
8030 /* Use ZIP to do the unpack, since we don't care about the upper halves
8031 and since it has the nice property of not needing any subregs.
8032 If using UUNPK* turns out to be preferable, we could model it as
8033 a ZIP whose first operand is zero. */
8034 rtx temp = gen_reg_rtx (<MODE>mode);
8035 emit_insn ((<hi_lanes_optab>
8036 ? gen_aarch64_sve_zip2<mode>
8037 : gen_aarch64_sve_zip1<mode>)
8038 (temp, operands[1], operands[1]));
8039 rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
8040 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
8041 emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide>
8042 (operands[0], ptrue, temp, strictness));
8047 ;; Predicated float-to-float extension.
8048 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
8049 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
8050 (unspec:SVE_FULL_SDF
8051 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl")
8052 (match_operand:SI 3 "aarch64_sve_gp_strictness")
8053 (match_operand:SVE_FULL_HSF 2 "register_operand" "w")]
8055 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8056 "fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
8059 ;; Predicated float-to-float extension with merging.
8060 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
8061 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
8062 (unspec:SVE_FULL_SDF
8063 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
8064 (unspec:SVE_FULL_SDF
8066 (const_int SVE_STRICT_GP)
8067 (match_operand:SVE_FULL_HSF 2 "register_operand")]
8069 (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
8071 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8074 (define_insn "*cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
8075 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w, ?&w")
8076 (unspec:SVE_FULL_SDF
8077 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8078 (unspec:SVE_FULL_SDF
8080 (match_operand:SI 4 "aarch64_sve_gp_strictness")
8081 (match_operand:SVE_FULL_HSF 2 "register_operand" "w, w, w")]
8083 (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8085 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8087 fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
8088 movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
8089 movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
8090 [(set_attr "movprfx" "*,yes,yes")]
8093 ;; -------------------------------------------------------------------------
8094 ;; ---- [PRED<-PRED] Packs
8095 ;; -------------------------------------------------------------------------
8098 ;; -------------------------------------------------------------------------
8100 ;; Predicate pack. Use UZP1 on the narrower type, which discards
8101 ;; the high part of each wide element.
8102 (define_insn "vec_pack_trunc_<Vwide>"
8103 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
8105 [(match_operand:<VWIDE> 1 "register_operand" "Upa")
8106 (match_operand:<VWIDE> 2 "register_operand" "Upa")]
8109 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
8112 ;; -------------------------------------------------------------------------
8113 ;; ---- [PRED<-PRED] Unpacks
8114 ;; -------------------------------------------------------------------------
8118 ;; -------------------------------------------------------------------------
8120 ;; Unpack the low or high half of a predicate, where "high" refers to
8121 ;; the low-numbered lanes for big-endian and the high-numbered lanes
8122 ;; for little-endian.
8123 (define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
8124 [(match_operand:<VWIDE> 0 "register_operand")
8125 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
8129 emit_insn ((<hi_lanes_optab>
8130 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
8131 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
8132 (operands[0], operands[1]));
8137 (define_insn "@aarch64_sve_punpk<perm_hilo>_<mode>"
8138 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
8139 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
8142 "punpk<perm_hilo>\t%0.h, %1.b"
8145 ;; =========================================================================
8146 ;; == Vector partitioning
8147 ;; =========================================================================
8149 ;; -------------------------------------------------------------------------
8150 ;; ---- [PRED] Unary partitioning
8151 ;; -------------------------------------------------------------------------
8157 ;; -------------------------------------------------------------------------
8159 ;; Note that unlike most other instructions that have both merging and
8160 ;; zeroing forms, these instructions don't operate elementwise and so
8161 ;; don't fit the IFN_COND model.
8162 (define_insn "@aarch64_brk<brk_op>"
8163 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
8165 [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
8166 (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
8167 (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
8171 brk<brk_op>\t%0.b, %1/z, %2.b
8172 brk<brk_op>\t%0.b, %1/m, %2.b"
8175 ;; Same, but also producing a flags result.
8176 (define_insn "*aarch64_brk<brk_op>_cc"
8177 [(set (reg:CC_NZC CC_REGNUM)
8179 [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
8181 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8184 (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
8185 (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
8188 (set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
8196 brk<brk_op>s\t%0.b, %1/z, %2.b
8197 brk<brk_op>s\t%0.b, %1/m, %2.b"
8200 ;; Same, but with only the flags result being interesting.
8201 (define_insn "*aarch64_brk<brk_op>_ptest"
8202 [(set (reg:CC_NZC CC_REGNUM)
8204 [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
8206 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8209 (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
8210 (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
8213 (clobber (match_scratch:VNx16BI 0 "=Upa, Upa"))]
8216 brk<brk_op>s\t%0.b, %1/z, %2.b
8217 brk<brk_op>s\t%0.b, %1/m, %2.b"
8220 ;; -------------------------------------------------------------------------
8221 ;; ---- [PRED] Binary partitioning
8222 ;; -------------------------------------------------------------------------
8230 ;; -------------------------------------------------------------------------
8232 ;; Binary BRKs (BRKN, BRKPA, BRKPB).
8233 (define_insn "@aarch64_brk<brk_op>"
8234 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
8236 [(match_operand:VNx16BI 1 "register_operand" "Upa")
8237 (match_operand:VNx16BI 2 "register_operand" "Upa")
8238 (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
8241 "brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
8244 ;; Same, but also producing a flags result.
8245 (define_insn "*aarch64_brk<brk_op>_cc"
8246 [(set (reg:CC_NZC CC_REGNUM)
8248 [(match_operand:VNx16BI 1 "register_operand" "Upa")
8250 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8253 (match_operand:VNx16BI 2 "register_operand" "Upa")
8254 (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
8257 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
8264 "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
8267 ;; Same, but with only the flags result being interesting.
8268 (define_insn "*aarch64_brk<brk_op>_ptest"
8269 [(set (reg:CC_NZC CC_REGNUM)
8271 [(match_operand:VNx16BI 1 "register_operand" "Upa")
8273 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8276 (match_operand:VNx16BI 2 "register_operand" "Upa")
8277 (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
8280 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
8282 "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
8285 ;; -------------------------------------------------------------------------
8286 ;; ---- [PRED] Scalarization
8287 ;; -------------------------------------------------------------------------
8291 ;; -------------------------------------------------------------------------
8293 (define_insn "@aarch64_sve_<sve_pred_op><mode>"
8294 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8296 [(match_operand:PRED_ALL 1 "register_operand" "Upa")
8297 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8298 (match_operand:PRED_ALL 3 "register_operand" "0")]
8300 (clobber (reg:CC_NZC CC_REGNUM))]
8301 "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
8302 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
8305 ;; Same, but also producing a flags result.
8306 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc"
8307 [(set (reg:CC_NZC CC_REGNUM)
8309 [(match_operand:VNx16BI 1 "register_operand" "Upa")
8311 (match_operand:SI 3 "aarch64_sve_ptrue_flag")
8314 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8315 (match_operand:PRED_ALL 6 "register_operand" "0")]
8318 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8325 && <max_elem_bits> >= <elem_bits>
8326 && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
8327 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
8328 "&& !rtx_equal_p (operands[2], operands[4])"
8330 operands[4] = operands[2];
8331 operands[5] = operands[3];
8335 ;; Same, but with only the flags result being interesting.
8336 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest"
8337 [(set (reg:CC_NZC CC_REGNUM)
8339 [(match_operand:VNx16BI 1 "register_operand" "Upa")
8341 (match_operand:SI 3 "aarch64_sve_ptrue_flag")
8344 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8345 (match_operand:PRED_ALL 6 "register_operand" "0")]
8348 (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
8350 && <max_elem_bits> >= <elem_bits>
8351 && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
8352 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
8353 "&& !rtx_equal_p (operands[2], operands[4])"
8355 operands[4] = operands[2];
8356 operands[5] = operands[3];
8360 ;; =========================================================================
8361 ;; == Counting elements
8362 ;; =========================================================================
8364 ;; -------------------------------------------------------------------------
8365 ;; ---- [INT] Count elements in a pattern (scalar)
8366 ;; -------------------------------------------------------------------------
8372 ;; -------------------------------------------------------------------------
8374 ;; Count the number of elements in an svpattern. Operand 1 is the pattern,
8375 ;; operand 2 is the number of elements that fit in a 128-bit block, and
8376 ;; operand 3 is a multiplier in the range [1, 16].
8378 ;; Note that this pattern isn't used for SV_ALL (but would work for that too).
8379 (define_insn "aarch64_sve_cnt_pat"
8380 [(set (match_operand:DI 0 "register_operand" "=r")
8382 (unspec:SI [(match_operand:DI 1 "const_int_operand")
8383 (match_operand:DI 2 "const_int_operand")
8384 (match_operand:DI 3 "const_int_operand")]
8385 UNSPEC_SVE_CNT_PAT)))]
8388 return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1);
8392 ;; -------------------------------------------------------------------------
8393 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
8394 ;; -------------------------------------------------------------------------
8399 ;; -------------------------------------------------------------------------
8401 ;; Increment a DImode register by the number of elements in an svpattern.
8402 ;; See aarch64_sve_cnt_pat for the counting behavior.
8403 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8404 [(set (match_operand:DI 0 "register_operand" "=r")
8405 (ANY_PLUS:DI (zero_extend:DI
8406 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8407 (match_operand:DI 3 "const_int_operand")
8408 (match_operand:DI 4 "const_int_operand")]
8409 UNSPEC_SVE_CNT_PAT))
8410 (match_operand:DI_ONLY 1 "register_operand" "0")))]
8413 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
8418 ;; Increment an SImode register by the number of elements in an svpattern
8419 ;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting
8421 (define_insn "*aarch64_sve_incsi_pat"
8422 [(set (match_operand:SI 0 "register_operand" "=r")
8423 (plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand")
8424 (match_operand:DI 3 "const_int_operand")
8425 (match_operand:DI 4 "const_int_operand")]
8427 (match_operand:SI 1 "register_operand" "0")))]
8430 return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2);
8434 ;; Increment an SImode register by the number of elements in an svpattern
8435 ;; using saturating arithmetic, extending the result to 64 bits.
8437 ;; See aarch64_sve_cnt_pat for the counting behavior.
8438 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8439 [(set (match_operand:DI 0 "register_operand" "=r")
8442 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8443 (match_operand:DI 3 "const_int_operand")
8444 (match_operand:DI 4 "const_int_operand")]
8446 (match_operand:SI_ONLY 1 "register_operand" "0"))))]
8449 const char *registers = (<CODE> == SS_PLUS ? "%x0, %w0" : "%w0");
8450 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
8455 ;; -------------------------------------------------------------------------
8456 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
8457 ;; -------------------------------------------------------------------------
8462 ;; -------------------------------------------------------------------------
8464 ;; Increment a vector of DIs by the number of elements in an svpattern.
8465 ;; See aarch64_sve_cnt_pat for the counting behavior.
8466 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8467 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
8469 (vec_duplicate:VNx2DI
8471 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8472 (match_operand:DI 3 "const_int_operand")
8473 (match_operand:DI 4 "const_int_operand")]
8474 UNSPEC_SVE_CNT_PAT)))
8475 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
8478 if (which_alternative == 1)
8479 output_asm_insn ("movprfx\t%0, %1", operands);
8480 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8483 [(set_attr "movprfx" "*,yes")]
8486 ;; Increment a vector of SIs by the number of elements in an svpattern.
8487 ;; See aarch64_sve_cnt_pat for the counting behavior.
8488 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8489 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
8491 (vec_duplicate:VNx4SI
8492 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8493 (match_operand:DI 3 "const_int_operand")
8494 (match_operand:DI 4 "const_int_operand")]
8495 UNSPEC_SVE_CNT_PAT))
8496 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
8499 if (which_alternative == 1)
8500 output_asm_insn ("movprfx\t%0, %1", operands);
8501 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8504 [(set_attr "movprfx" "*,yes")]
8507 ;; Increment a vector of HIs by the number of elements in an svpattern.
8508 ;; See aarch64_sve_cnt_pat for the counting behavior.
8509 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
8510 [(set (match_operand:VNx8HI 0 "register_operand")
8512 (vec_duplicate:VNx8HI
8514 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8515 (match_operand:DI 3 "const_int_operand")
8516 (match_operand:DI 4 "const_int_operand")]
8517 UNSPEC_SVE_CNT_PAT)))
8518 (match_operand:VNx8HI_ONLY 1 "register_operand")))]
8522 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
8523 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
8525 (vec_duplicate:VNx8HI
8526 (match_operator:HI 5 "subreg_lowpart_operator"
8527 [(unspec:SI [(match_operand:DI 2 "const_int_operand")
8528 (match_operand:DI 3 "const_int_operand")
8529 (match_operand:DI 4 "const_int_operand")]
8530 UNSPEC_SVE_CNT_PAT)]))
8531 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
8534 if (which_alternative == 1)
8535 output_asm_insn ("movprfx\t%0, %1", operands);
8536 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8539 [(set_attr "movprfx" "*,yes")]
8542 ;; -------------------------------------------------------------------------
8543 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
8544 ;; -------------------------------------------------------------------------
8549 ;; -------------------------------------------------------------------------
8551 ;; Decrement a DImode register by the number of elements in an svpattern.
8552 ;; See aarch64_sve_cnt_pat for the counting behavior.
8553 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8554 [(set (match_operand:DI 0 "register_operand" "=r")
8555 (ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0")
8557 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8558 (match_operand:DI 3 "const_int_operand")
8559 (match_operand:DI 4 "const_int_operand")]
8560 UNSPEC_SVE_CNT_PAT))))]
8563 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
8568 ;; Decrement an SImode register by the number of elements in an svpattern
8569 ;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting
8571 (define_insn "*aarch64_sve_decsi_pat"
8572 [(set (match_operand:SI 0 "register_operand" "=r")
8573 (minus:SI (match_operand:SI 1 "register_operand" "0")
8574 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8575 (match_operand:DI 3 "const_int_operand")
8576 (match_operand:DI 4 "const_int_operand")]
8577 UNSPEC_SVE_CNT_PAT)))]
8580 return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2);
8584 ;; Decrement an SImode register by the number of elements in an svpattern
8585 ;; using saturating arithmetic, extending the result to 64 bits.
8587 ;; See aarch64_sve_cnt_pat for the counting behavior.
8588 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8589 [(set (match_operand:DI 0 "register_operand" "=r")
8592 (match_operand:SI_ONLY 1 "register_operand" "0")
8593 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8594 (match_operand:DI 3 "const_int_operand")
8595 (match_operand:DI 4 "const_int_operand")]
8596 UNSPEC_SVE_CNT_PAT))))]
8599 const char *registers = (<CODE> == SS_MINUS ? "%x0, %w0" : "%w0");
8600 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
8605 ;; -------------------------------------------------------------------------
8606 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
8607 ;; -------------------------------------------------------------------------
8612 ;; -------------------------------------------------------------------------
8614 ;; Decrement a vector of DIs by the number of elements in an svpattern.
8615 ;; See aarch64_sve_cnt_pat for the counting behavior.
8616 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8617 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
8619 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
8620 (vec_duplicate:VNx2DI
8622 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8623 (match_operand:DI 3 "const_int_operand")
8624 (match_operand:DI 4 "const_int_operand")]
8625 UNSPEC_SVE_CNT_PAT)))))]
8628 if (which_alternative == 1)
8629 output_asm_insn ("movprfx\t%0, %1", operands);
8630 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8633 [(set_attr "movprfx" "*,yes")]
8636 ;; Decrement a vector of SIs by the number of elements in an svpattern.
8637 ;; See aarch64_sve_cnt_pat for the counting behavior.
8638 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8639 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
8641 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
8642 (vec_duplicate:VNx4SI
8643 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8644 (match_operand:DI 3 "const_int_operand")
8645 (match_operand:DI 4 "const_int_operand")]
8646 UNSPEC_SVE_CNT_PAT))))]
8649 if (which_alternative == 1)
8650 output_asm_insn ("movprfx\t%0, %1", operands);
8651 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8654 [(set_attr "movprfx" "*,yes")]
8657 ;; Decrement a vector of HIs by the number of elements in an svpattern.
8658 ;; See aarch64_sve_cnt_pat for the counting behavior.
8659 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
8660 [(set (match_operand:VNx8HI 0 "register_operand")
8662 (match_operand:VNx8HI_ONLY 1 "register_operand")
8663 (vec_duplicate:VNx8HI
8665 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8666 (match_operand:DI 3 "const_int_operand")
8667 (match_operand:DI 4 "const_int_operand")]
8668 UNSPEC_SVE_CNT_PAT)))))]
8672 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
8673 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
8675 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
8676 (vec_duplicate:VNx8HI
8677 (match_operator:HI 5 "subreg_lowpart_operator"
8678 [(unspec:SI [(match_operand:DI 2 "const_int_operand")
8679 (match_operand:DI 3 "const_int_operand")
8680 (match_operand:DI 4 "const_int_operand")]
8681 UNSPEC_SVE_CNT_PAT)]))))]
8684 if (which_alternative == 1)
8685 output_asm_insn ("movprfx\t%0, %1", operands);
8686 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8689 [(set_attr "movprfx" "*,yes")]
8692 ;; -------------------------------------------------------------------------
8693 ;; ---- [INT] Count elements in a predicate (scalar)
8694 ;; -------------------------------------------------------------------------
8697 ;; -------------------------------------------------------------------------
8699 ;; Count the number of set bits in a predicate. Operand 3 is true if
8700 ;; operand 1 is known to be all-true.
8701 (define_insn "@aarch64_pred_cntp<mode>"
8702 [(set (match_operand:DI 0 "register_operand" "=r")
8704 (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
8705 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8706 (match_operand:PRED_ALL 3 "register_operand" "Upa")]
8709 "cntp\t%x0, %1, %3.<Vetype>")
8711 ;; -------------------------------------------------------------------------
8712 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
8713 ;; -------------------------------------------------------------------------
8718 ;; -------------------------------------------------------------------------
8720 ;; Increment a DImode register by the number of set bits in a predicate.
8721 ;; See aarch64_sve_cntp for a description of the operands.
8722 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
8723 [(set (match_operand:DI 0 "register_operand")
8726 (unspec:SI [(match_dup 3)
8727 (const_int SVE_KNOWN_PTRUE)
8728 (match_operand:PRED_ALL 2 "register_operand")]
8730 (match_operand:DI_ONLY 1 "register_operand")))]
8733 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8737 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
8738 [(set (match_operand:DI 0 "register_operand" "=r")
8741 (unspec:SI [(match_operand 3)
8742 (const_int SVE_KNOWN_PTRUE)
8743 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
8745 (match_operand:DI_ONLY 1 "register_operand" "0")))]
8747 "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
8748 "&& !CONSTANT_P (operands[3])"
8750 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8754 ;; Increment an SImode register by the number of set bits in a predicate
8755 ;; using modular arithmetic. See aarch64_sve_cntp for a description of
8757 (define_insn_and_rewrite "*aarch64_incsi<mode>_cntp"
8758 [(set (match_operand:SI 0 "register_operand" "=r")
8760 (unspec:SI [(match_operand 3)
8761 (const_int SVE_KNOWN_PTRUE)
8762 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
8764 (match_operand:SI 1 "register_operand" "0")))]
8766 "incp\t%x0, %2.<Vetype>"
8767 "&& !CONSTANT_P (operands[3])"
8769 operands[3] = CONSTM1_RTX (<MODE>mode);
8773 ;; Increment an SImode register by the number of set bits in a predicate
8774 ;; using saturating arithmetic, extending the result to 64 bits.
8776 ;; See aarch64_sve_cntp for a description of the operands.
8777 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
8778 [(set (match_operand:DI 0 "register_operand")
8781 (unspec:SI [(match_dup 3)
8782 (const_int SVE_KNOWN_PTRUE)
8783 (match_operand:PRED_ALL 2 "register_operand")]
8785 (match_operand:SI_ONLY 1 "register_operand"))))]
8788 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8792 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
8793 [(set (match_operand:DI 0 "register_operand" "=r")
8796 (unspec:SI [(match_operand 3)
8797 (const_int SVE_KNOWN_PTRUE)
8798 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
8800 (match_operand:SI_ONLY 1 "register_operand" "0"))))]
8803 if (<CODE> == SS_PLUS)
8804 return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
8806 return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
8808 "&& !CONSTANT_P (operands[3])"
8810 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8814 ;; -------------------------------------------------------------------------
8815 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
8816 ;; -------------------------------------------------------------------------
8821 ;; -------------------------------------------------------------------------
8823 ;; Increment a vector of DIs by the number of set bits in a predicate.
8824 ;; See aarch64_sve_cntp for a description of the operands.
8825 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
8826 [(set (match_operand:VNx2DI 0 "register_operand")
8828 (vec_duplicate:VNx2DI
8832 (const_int SVE_KNOWN_PTRUE)
8833 (match_operand:<VPRED> 2 "register_operand")]
8835 (match_operand:VNx2DI_ONLY 1 "register_operand")))]
8838 operands[3] = CONSTM1_RTX (<VPRED>mode);
8842 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
8843 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
8845 (vec_duplicate:VNx2DI
8849 (const_int SVE_KNOWN_PTRUE)
8850 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
8852 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
8855 <inc_dec>p\t%0.d, %2
8856 movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
8857 "&& !CONSTANT_P (operands[3])"
8859 operands[3] = CONSTM1_RTX (<VPRED>mode);
8861 [(set_attr "movprfx" "*,yes")]
8864 ;; Increment a vector of SIs by the number of set bits in a predicate.
8865 ;; See aarch64_sve_cntp for a description of the operands.
8866 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
8867 [(set (match_operand:VNx4SI 0 "register_operand")
8869 (vec_duplicate:VNx4SI
8872 (const_int SVE_KNOWN_PTRUE)
8873 (match_operand:<VPRED> 2 "register_operand")]
8875 (match_operand:VNx4SI_ONLY 1 "register_operand")))]
8878 operands[3] = CONSTM1_RTX (<VPRED>mode);
8882 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
8883 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
8885 (vec_duplicate:VNx4SI
8888 (const_int SVE_KNOWN_PTRUE)
8889 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
8891 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
8894 <inc_dec>p\t%0.s, %2
8895 movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
8896 "&& !CONSTANT_P (operands[3])"
8898 operands[3] = CONSTM1_RTX (<VPRED>mode);
8900 [(set_attr "movprfx" "*,yes")]
8903 ;; Increment a vector of HIs by the number of set bits in a predicate.
8904 ;; See aarch64_sve_cntp for a description of the operands.
8905 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
8906 [(set (match_operand:VNx8HI 0 "register_operand")
8908 (vec_duplicate:VNx8HI
8912 (const_int SVE_KNOWN_PTRUE)
8913 (match_operand:<VPRED> 2 "register_operand")]
8915 (match_operand:VNx8HI_ONLY 1 "register_operand")))]
8918 operands[3] = CONSTM1_RTX (<VPRED>mode);
8922 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
8923 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
8925 (vec_duplicate:VNx8HI
8926 (match_operator:HI 3 "subreg_lowpart_operator"
8929 (const_int SVE_KNOWN_PTRUE)
8930 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
8932 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
8935 <inc_dec>p\t%0.h, %2
8936 movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
8937 "&& !CONSTANT_P (operands[4])"
8939 operands[4] = CONSTM1_RTX (<VPRED>mode);
8941 [(set_attr "movprfx" "*,yes")]
8944 ;; -------------------------------------------------------------------------
8945 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
8946 ;; -------------------------------------------------------------------------
8951 ;; -------------------------------------------------------------------------
8953 ;; Decrement a DImode register by the number of set bits in a predicate.
8954 ;; See aarch64_sve_cntp for a description of the operands.
8955 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
8956 [(set (match_operand:DI 0 "register_operand")
8958 (match_operand:DI_ONLY 1 "register_operand")
8960 (unspec:SI [(match_dup 3)
8961 (const_int SVE_KNOWN_PTRUE)
8962 (match_operand:PRED_ALL 2 "register_operand")]
8966 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8970 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
8971 [(set (match_operand:DI 0 "register_operand" "=r")
8973 (match_operand:DI_ONLY 1 "register_operand" "0")
8975 (unspec:SI [(match_operand 3)
8976 (const_int SVE_KNOWN_PTRUE)
8977 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
8980 "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
8981 "&& !CONSTANT_P (operands[3])"
8983 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8987 ;; Decrement an SImode register by the number of set bits in a predicate
8988 ;; using modular arithmetic. See aarch64_sve_cntp for a description of the
8990 (define_insn_and_rewrite "*aarch64_decsi<mode>_cntp"
8991 [(set (match_operand:SI 0 "register_operand" "=r")
8993 (match_operand:SI 1 "register_operand" "0")
8994 (unspec:SI [(match_operand 3)
8995 (const_int SVE_KNOWN_PTRUE)
8996 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
8999 "decp\t%x0, %2.<Vetype>"
9000 "&& !CONSTANT_P (operands[3])"
9002 operands[3] = CONSTM1_RTX (<MODE>mode);
9006 ;; Decrement an SImode register by the number of set bits in a predicate
9007 ;; using saturating arithmetic, extending the result to 64 bits.
9009 ;; See aarch64_sve_cntp for a description of the operands.
9010 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
9011 [(set (match_operand:DI 0 "register_operand")
9014 (match_operand:SI_ONLY 1 "register_operand")
9015 (unspec:SI [(match_dup 3)
9016 (const_int SVE_KNOWN_PTRUE)
9017 (match_operand:PRED_ALL 2 "register_operand")]
9021 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9025 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
9026 [(set (match_operand:DI 0 "register_operand" "=r")
9029 (match_operand:SI_ONLY 1 "register_operand" "0")
9030 (unspec:SI [(match_operand 3)
9031 (const_int SVE_KNOWN_PTRUE)
9032 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9036 if (<CODE> == SS_MINUS)
9037 return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
9039 return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
9041 "&& !CONSTANT_P (operands[3])"
9043 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9047 ;; -------------------------------------------------------------------------
9048 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
9049 ;; -------------------------------------------------------------------------
9054 ;; -------------------------------------------------------------------------
9056 ;; Decrement a vector of DIs by the number of set bits in a predicate.
9057 ;; See aarch64_sve_cntp for a description of the operands.
9058 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9059 [(set (match_operand:VNx2DI 0 "register_operand")
9061 (match_operand:VNx2DI_ONLY 1 "register_operand")
9062 (vec_duplicate:VNx2DI
9066 (const_int SVE_KNOWN_PTRUE)
9067 (match_operand:<VPRED> 2 "register_operand")]
9071 operands[3] = CONSTM1_RTX (<VPRED>mode);
9075 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9076 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
9078 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
9079 (vec_duplicate:VNx2DI
9083 (const_int SVE_KNOWN_PTRUE)
9084 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9088 <inc_dec>p\t%0.d, %2
9089 movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
9090 "&& !CONSTANT_P (operands[3])"
9092 operands[3] = CONSTM1_RTX (<VPRED>mode);
9094 [(set_attr "movprfx" "*,yes")]
9097 ;; Decrement a vector of SIs by the number of set bits in a predicate.
9098 ;; See aarch64_sve_cntp for a description of the operands.
9099 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9100 [(set (match_operand:VNx4SI 0 "register_operand")
9102 (match_operand:VNx4SI_ONLY 1 "register_operand")
9103 (vec_duplicate:VNx4SI
9106 (const_int SVE_KNOWN_PTRUE)
9107 (match_operand:<VPRED> 2 "register_operand")]
9111 operands[3] = CONSTM1_RTX (<VPRED>mode);
9115 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9116 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
9118 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
9119 (vec_duplicate:VNx4SI
9122 (const_int SVE_KNOWN_PTRUE)
9123 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9127 <inc_dec>p\t%0.s, %2
9128 movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
9129 "&& !CONSTANT_P (operands[3])"
9131 operands[3] = CONSTM1_RTX (<VPRED>mode);
9133 [(set_attr "movprfx" "*,yes")]
9136 ;; Decrement a vector of HIs by the number of set bits in a predicate.
9137 ;; See aarch64_sve_cntp for a description of the operands.
9138 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9139 [(set (match_operand:VNx8HI 0 "register_operand")
9141 (match_operand:VNx8HI_ONLY 1 "register_operand")
9142 (vec_duplicate:VNx8HI
9146 (const_int SVE_KNOWN_PTRUE)
9147 (match_operand:<VPRED> 2 "register_operand")]
9151 operands[3] = CONSTM1_RTX (<VPRED>mode);
9155 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9156 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
9158 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
9159 (vec_duplicate:VNx8HI
9160 (match_operator:HI 3 "subreg_lowpart_operator"
9163 (const_int SVE_KNOWN_PTRUE)
9164 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9168 <inc_dec>p\t%0.h, %2
9169 movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
9170 "&& !CONSTANT_P (operands[4])"
9172 operands[4] = CONSTM1_RTX (<VPRED>mode);
9174 [(set_attr "movprfx" "*,yes")]