1 ;; Machine description for AArch64 SVE.
2 ;; Copyright (C) 2009-2022 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; The file is organised into the following sections (search for the full
25 ;; ---- Note on the handling of big-endian SVE
26 ;; ---- Description of UNSPEC_PTEST
27 ;; ---- Description of UNSPEC_PRED_Z
28 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
29 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
30 ;; ---- Note on FFR handling
33 ;; ---- Moves of single vectors
34 ;; ---- Moves of multiple vectors
35 ;; ---- Moves of predicates
36 ;; ---- Moves relating to the FFR
39 ;; ---- Normal contiguous loads
40 ;; ---- Extending contiguous loads
41 ;; ---- First-faulting contiguous loads
42 ;; ---- First-faulting extending contiguous loads
43 ;; ---- Non-temporal contiguous loads
44 ;; ---- Normal gather loads
45 ;; ---- Extending gather loads
46 ;; ---- First-faulting gather loads
47 ;; ---- First-faulting extending gather loads
50 ;; ---- Contiguous prefetches
51 ;; ---- Gather prefetches
54 ;; ---- Normal contiguous stores
55 ;; ---- Truncating contiguous stores
56 ;; ---- Non-temporal contiguous stores
57 ;; ---- Normal scatter stores
58 ;; ---- Truncating scatter stores
61 ;; ---- [INT,FP] Duplicate element
62 ;; ---- [INT,FP] Initialize from individual elements
63 ;; ---- [INT] Linear series
64 ;; ---- [PRED] Duplicate element
66 ;; == Vector decomposition
67 ;; ---- [INT,FP] Extract index
68 ;; ---- [INT,FP] Extract active element
69 ;; ---- [PRED] Extract index
71 ;; == Unary arithmetic
72 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
73 ;; ---- [INT] General unary arithmetic corresponding to unspecs
74 ;; ---- [INT] Sign and zero extension
75 ;; ---- [INT] Truncation
76 ;; ---- [INT] Logical inverse
77 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
78 ;; ---- [FP] General unary arithmetic corresponding to unspecs
79 ;; ---- [FP] Square root
80 ;; ---- [FP] Reciprocal square root
81 ;; ---- [PRED] Inverse
83 ;; == Binary arithmetic
84 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
85 ;; ---- [INT] Addition
86 ;; ---- [INT] Subtraction
87 ;; ---- [INT] Take address
88 ;; ---- [INT] Absolute difference
89 ;; ---- [INT] Saturating addition and subtraction
90 ;; ---- [INT] Highpart multiplication
91 ;; ---- [INT] Division
92 ;; ---- [INT] Binary logical operations
93 ;; ---- [INT] Binary logical operations (inverted second input)
94 ;; ---- [INT] Shifts (rounding towards -Inf)
95 ;; ---- [INT] Shifts (rounding towards 0)
96 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
97 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
98 ;; ---- [FP] General binary arithmetic corresponding to unspecs
100 ;; ---- [FP] Complex addition
101 ;; ---- [FP] Subtraction
102 ;; ---- [FP] Absolute difference
103 ;; ---- [FP] Multiplication
104 ;; ---- [FP] Division
105 ;; ---- [FP] Binary logical operations
106 ;; ---- [FP] Sign copying
107 ;; ---- [FP] Maximum and minimum
108 ;; ---- [PRED] Binary logical operations
109 ;; ---- [PRED] Binary logical operations (inverted second input)
110 ;; ---- [PRED] Binary logical operations (inverted result)
112 ;; == Ternary arithmetic
113 ;; ---- [INT] MLA and MAD
114 ;; ---- [INT] MLS and MSB
115 ;; ---- [INT] Dot product
116 ;; ---- [INT] Sum of absolute differences
117 ;; ---- [INT] Matrix multiply-accumulate
118 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
119 ;; ---- [FP] Complex multiply-add
120 ;; ---- [FP] Trigonometric multiply-add
121 ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
122 ;; ---- [FP] Matrix multiply-accumulate
124 ;; == Comparisons and selects
125 ;; ---- [INT,FP] Select based on predicates
126 ;; ---- [INT,FP] Compare and select
127 ;; ---- [INT] Comparisons
128 ;; ---- [INT] While tests
129 ;; ---- [FP] Direct comparisons
130 ;; ---- [FP] Absolute comparisons
131 ;; ---- [PRED] Select
132 ;; ---- [PRED] Test bits
135 ;; ---- [INT,FP] Conditional reductions
136 ;; ---- [INT] Tree reductions
137 ;; ---- [FP] Tree reductions
138 ;; ---- [FP] Left-to-right reductions
141 ;; ---- [INT,FP] General permutes
142 ;; ---- [INT,FP] Special-purpose unary permutes
143 ;; ---- [INT,FP] Special-purpose binary permutes
144 ;; ---- [PRED] Special-purpose unary permutes
145 ;; ---- [PRED] Special-purpose binary permutes
148 ;; ---- [INT<-INT] Packs
149 ;; ---- [INT<-INT] Unpacks
150 ;; ---- [INT<-FP] Conversions
151 ;; ---- [INT<-FP] Packs
152 ;; ---- [INT<-FP] Unpacks
153 ;; ---- [FP<-INT] Conversions
154 ;; ---- [FP<-INT] Packs
155 ;; ---- [FP<-INT] Unpacks
156 ;; ---- [FP<-FP] Packs
157 ;; ---- [FP<-FP] Packs (bfloat16)
158 ;; ---- [FP<-FP] Unpacks
159 ;; ---- [PRED<-PRED] Packs
160 ;; ---- [PRED<-PRED] Unpacks
162 ;; == Vector partitioning
163 ;; ---- [PRED] Unary partitioning
164 ;; ---- [PRED] Binary partitioning
165 ;; ---- [PRED] Scalarization
167 ;; == Counting elements
168 ;; ---- [INT] Count elements in a pattern (scalar)
169 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
170 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
171 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
172 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
173 ;; ---- [INT] Count elements in a predicate (scalar)
174 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
175 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
176 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
177 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
179 ;; =========================================================================
181 ;; =========================================================================
183 ;; -------------------------------------------------------------------------
184 ;; ---- Note on the handling of big-endian SVE
185 ;; -------------------------------------------------------------------------
187 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
188 ;; same way as movdi or movti would: the first byte of memory goes
189 ;; into the most significant byte of the register and the last byte
190 ;; of memory goes into the least significant byte of the register.
191 ;; This is the most natural ordering for Advanced SIMD and matches
192 ;; the ABI layout for 64-bit and 128-bit vector types.
194 ;; As a result, the order of bytes within the register is what GCC
195 ;; expects for a big-endian target, and subreg offsets therefore work
196 ;; as expected, with the first element in memory having subreg offset 0
197 ;; and the last element in memory having the subreg offset associated
198 ;; with a big-endian lowpart. However, this ordering also means that
199 ;; GCC's lane numbering does not match the architecture's numbering:
200 ;; GCC always treats the element at the lowest address in memory
201 ;; (subreg offset 0) as element 0, while the architecture treats
202 ;; the least significant end of the register as element 0.
204 ;; The situation for SVE is different. We want the layout of the
205 ;; SVE register to be same for mov<mode> as it is for maskload<mode>:
206 ;; logically, a mov<mode> load must be indistinguishable from a
207 ;; maskload<mode> whose mask is all true. We therefore need the
208 ;; register layout to match LD1 rather than LDR. The ABI layout of
209 ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
211 ;; As a result, the architecture lane numbering matches GCC's lane
212 ;; numbering, with element 0 always being the first in memory.
215 ;; - Applying a subreg offset to a register does not give the element
216 ;; that GCC expects: the first element in memory has the subreg offset
217 ;; associated with a big-endian lowpart while the last element in memory
218 ;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
220 ;; - We cannot use LDR and STR for spill slots that might be accessed
221 ;; via subregs, since although the elements have the order GCC expects,
222 ;; the order of the bytes within the elements is different. We instead
223 ;; access spill slots via LD1 and ST1, using secondary reloads to
224 ;; reserve a predicate register.
226 ;; -------------------------------------------------------------------------
227 ;; ---- Description of UNSPEC_PTEST
228 ;; -------------------------------------------------------------------------
230 ;; SVE provides a PTEST instruction for testing the active lanes of a
231 ;; predicate and setting the flags based on the result. The associated
232 ;; condition code tests are:
234 ;; - any (= ne): at least one active bit is set
235 ;; - none (= eq): all active bits are clear (*)
236 ;; - first (= mi): the first active bit is set
237 ;; - nfrst (= pl): the first active bit is clear (*)
238 ;; - last (= cc): the last active bit is set
239 ;; - nlast (= cs): the last active bit is clear (*)
241 ;; where the conditions marked (*) are also true when there are no active
242 ;; lanes (i.e. when the governing predicate is a PFALSE). The flags results
243 ;; of a PTEST use the condition code mode CC_NZC.
245 ;; PTEST is always a .B operation (i.e. it always operates on VNx16BI).
246 ;; This means that for other predicate modes, we need a governing predicate
247 ;; in which all bits are defined.
249 ;; For example, most predicated .H operations ignore the odd bits of the
250 ;; governing predicate, so that an active lane is represented by the
251 ;; bits "1x" and an inactive lane by the bits "0x", where "x" can be
252 ;; any value. To test a .H predicate, we instead need "10" and "00"
253 ;; respectively, so that the condition only tests the even bits of the
256 ;; Several instructions set the flags as a side-effect, in the same way
257 ;; that a separate PTEST would. It's important for code quality that we
258 ;; use these flags results as often as possible, particularly in the case
259 ;; of WHILE* and RDFFR.
261 ;; Also, some of the instructions that set the flags are unpredicated
262 ;; and instead implicitly test all .B, .H, .S or .D elements, as though
263 ;; they were predicated on a PTRUE of that size. For example, a .S
264 ;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE
267 ;; We therefore need to represent PTEST operations in a way that
268 ;; makes it easy to combine them with both predicated and unpredicated
269 ;; operations, while using a VNx16BI governing predicate for all
270 ;; predicate modes. We do this using:
272 ;; (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST)
276 ;; - GP is the real VNx16BI governing predicate
278 ;; - CAST_GP is GP cast to the mode of OP. All bits dropped by casting
279 ;; GP to CAST_GP are guaranteed to be clear in GP.
281 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
282 ;; SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and
283 ;; SVE_MAYBE_NOT_PTRUE otherwise.
285 ;; - OP is the predicate we want to test, of the same mode as CAST_GP.
287 ;; -------------------------------------------------------------------------
288 ;; ---- Description of UNSPEC_PRED_Z
289 ;; -------------------------------------------------------------------------
291 ;; SVE integer comparisons are predicated and return zero for inactive
292 ;; lanes. Sometimes we use them with predicates that are all-true and
293 ;; sometimes we use them with general predicates.
295 ;; The integer comparisons also set the flags and so build-in the effect
296 ;; of a PTEST. We therefore want to be able to combine integer comparison
297 ;; patterns with PTESTs of the result. One difficulty with doing this is
298 ;; that (as noted above) the PTEST is always a .B operation and so can place
299 ;; stronger requirements on the governing predicate than the comparison does.
301 ;; For example, when applying a separate PTEST to the result of a full-vector
302 ;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a
303 ;; .B PTRUE. In constrast, the comparison might be predicated on either
304 ;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate
305 ;; bits don't matter for .H operations.
307 ;; We therefore can't rely on a full-vector comparison using the same
308 ;; predicate register as a following PTEST. We instead need to remember
309 ;; whether a comparison is known to be a full-vector comparison and use
310 ;; this information in addition to a check for equal predicate registers.
311 ;; At the same time, it's useful to have a common representation for all
312 ;; integer comparisons, so that they can be handled by a single set of
315 ;; We therefore take a similar approach to UNSPEC_PTEST above and use:
317 ;; (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z)
321 ;; - GP is the governing predicate, of mode <M:VPRED>
323 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
324 ;; SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE
327 ;; - CODE is the comparison code
329 ;; - OP0 and OP1 are the values being compared, of mode M
331 ;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero.
333 ;; -------------------------------------------------------------------------
334 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
335 ;; -------------------------------------------------------------------------
337 ;; Many SVE integer operations are predicated. We can generate them
338 ;; from four sources:
340 ;; (1) Using normal unpredicated optabs. In this case we need to create
341 ;; an all-true predicate register to act as the governing predicate
342 ;; for the SVE instruction. There are no inactive lanes, and thus
343 ;; the values of inactive lanes don't matter.
345 ;; (2) Using _x ACLE functions. In this case the function provides a
346 ;; specific predicate and some lanes might be inactive. However,
347 ;; as for (1), the values of the inactive lanes don't matter.
348 ;; We can make extra lanes active without changing the behavior
349 ;; (although for code-quality reasons we should avoid doing so
352 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
353 ;; These optabs have a predicate operand that specifies which lanes are
354 ;; active and another operand that provides the values of inactive lanes.
356 ;; (4) Using _m and _z ACLE functions. These functions map to the same
357 ;; patterns as (3), with the _z functions setting inactive lanes to zero
358 ;; and the _m functions setting the inactive lanes to one of the function
361 ;; For (1) and (2) we need a way of attaching the predicate to a normal
362 ;; unpredicated integer operation. We do this using:
364 ;; (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X)
366 ;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED
367 ;; is a predicate of mode <M:VPRED>. PRED might or might not be a PTRUE;
368 ;; it always is for (1), but might not be for (2).
370 ;; The unspec as a whole has the same value as (code:M ...) when PRED is
371 ;; all-true. It is always semantically valid to replace PRED with a PTRUE,
372 ;; but as noted above, we should only do so if there's a specific benefit.
374 ;; (The "_X" in the unspec is named after the ACLE functions in (2).)
376 ;; For (3) and (4) we can simply use the SVE port's normal representation
377 ;; of a predicate-based select:
379 ;; (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL)
381 ;; where INACTIVE specifies the values of inactive lanes.
383 ;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather
384 ;; than inserting the integer operation directly. This is mostly useful
385 ;; if we want the combine pass to merge an integer operation with an explicit
386 ;; vcond_mask (in other words, with a following SEL instruction). However,
387 ;; it's generally better to merge such operations at the gimple level
390 ;; -------------------------------------------------------------------------
391 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
392 ;; -------------------------------------------------------------------------
394 ;; Most SVE floating-point operations are predicated. We can generate
395 ;; them from four sources:
397 ;; (1) Using normal unpredicated optabs. In this case we need to create
398 ;; an all-true predicate register to act as the governing predicate
399 ;; for the SVE instruction. There are no inactive lanes, and thus
400 ;; the values of inactive lanes don't matter.
402 ;; (2) Using _x ACLE functions. In this case the function provides a
403 ;; specific predicate and some lanes might be inactive. However,
404 ;; as for (1), the values of the inactive lanes don't matter.
406 ;; The instruction must have the same exception behavior as the
407 ;; function call unless things like command-line flags specifically
408 ;; allow otherwise. For example, with -ffast-math, it is OK to
409 ;; raise exceptions for inactive lanes, but normally it isn't.
411 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
412 ;; These optabs have a predicate operand that specifies which lanes are
413 ;; active and another operand that provides the values of inactive lanes.
415 ;; (4) Using _m and _z ACLE functions. These functions map to the same
416 ;; patterns as (3), with the _z functions setting inactive lanes to zero
417 ;; and the _m functions setting the inactive lanes to one of the function
422 ;; - In (1), the predicate is known to be all true and the pattern can use
423 ;; unpredicated operations where available.
425 ;; - In (2), the predicate might or might not be all true. The pattern can
426 ;; use unpredicated instructions if the predicate is all-true or if things
427 ;; like command-line flags allow exceptions for inactive lanes.
429 ;; - (3) and (4) represent a native SVE predicated operation. Some lanes
430 ;; might be inactive and inactive lanes of the result must have specific
431 ;; values. There is no scope for using unpredicated instructions (and no
432 ;; reason to want to), so the question about command-line flags doesn't
435 ;; It would be inaccurate to model (2) as an rtx code like (sqrt ...)
436 ;; in combination with a separate predicate operand, e.g.
438 ;; (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl")
439 ;; (sqrt:SVE_FULL_F 2 "register_operand" "w")]
442 ;; because (sqrt ...) can raise an exception for any lane, including
443 ;; inactive ones. We therefore need to use an unspec instead.
445 ;; Also, (2) requires some way of distinguishing the case in which the
446 ;; predicate might have inactive lanes and cannot be changed from the
447 ;; case in which the predicate has no inactive lanes or can be changed.
448 ;; This information is also useful when matching combined FP patterns
449 ;; in which the predicates might not be equal.
451 ;; We therefore model FP operations as an unspec of the form:
453 ;; (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>)
457 ;; - PRED is the governing predicate.
459 ;; - STRICTNESS is a CONST_INT that conceptually has mode SI. It has the
460 ;; value SVE_STRICT_GP if PRED might have inactive lanes and if those
461 ;; lanes must remain inactive. It has the value SVE_RELAXED_GP otherwise.
463 ;; - OP0 OP1 ... are the normal input operands to the operation.
465 ;; - MNEMONIC is the mnemonic of the associated SVE instruction.
467 ;; For (3) and (4), we combine these operations with an UNSPEC_SEL
468 ;; that selects between the result of the FP operation and the "else"
469 ;; value. (This else value is a merge input for _m ACLE functions
470 ;; and zero for _z ACLE functions.) The outer pattern then has the form:
472 ;; (unspec [pred fp_operation else_value] UNSPEC_SEL)
474 ;; This means that the patterns for (3) and (4) have two predicates:
475 ;; one for the FP operation itself and one for the UNSPEC_SEL.
476 ;; This pattern is equivalent to the result of combining an instance
477 ;; of (1) or (2) with a separate vcond instruction, so these patterns
478 ;; are useful as combine targets too.
480 ;; However, in the combine case, the instructions that we want to
481 ;; combine might use different predicates. Then:
483 ;; - Some of the active lanes of the FP operation might be discarded
484 ;; by the UNSPEC_SEL. It's OK to drop the FP operation on those lanes,
485 ;; even for SVE_STRICT_GP, since the operations on those lanes are
486 ;; effectively dead code.
488 ;; - Some of the inactive lanes of the FP operation might be selected
489 ;; by the UNSPEC_SEL, giving unspecified values for those lanes.
490 ;; SVE_RELAXED_GP lets us extend the FP operation to cover these
491 ;; extra lanes, but SVE_STRICT_GP does not.
493 ;; Thus SVE_RELAXED_GP allows us to ignore the predicate on the FP operation
494 ;; and operate on exactly the lanes selected by the UNSPEC_SEL predicate.
495 ;; This typically leads to patterns like:
497 ;; (unspec [(match_operand 1 "register_operand" "Upl")
498 ;; (unspec [(match_operand N)
499 ;; (const_int SVE_RELAXED_GP)
501 ;; UNSPEC_COND_<MNEMONIC>)
504 ;; where operand N is allowed to be anything. These instructions then
505 ;; have rewrite rules to replace operand N with operand 1, which gives the
506 ;; instructions a canonical form and means that the original operand N is
507 ;; not kept live unnecessarily.
509 ;; In contrast, SVE_STRICT_GP only allows the UNSPEC_SEL predicate to be
510 ;; a subset of the FP operation predicate. This case isn't interesting
511 ;; for FP operations that have an all-true predicate, since such operations
512 ;; use SVE_RELAXED_GP instead. And it is not possible for instruction
513 ;; conditions to track the subset relationship for arbitrary registers.
514 ;; So in practice, the only useful case for SVE_STRICT_GP is the one
515 ;; in which the predicates match:
517 ;; (unspec [(match_operand 1 "register_operand" "Upl")
518 ;; (unspec [(match_dup 1)
519 ;; (const_int SVE_STRICT_GP)
521 ;; UNSPEC_COND_<MNEMONIC>)
524 ;; This pattern would also be correct for SVE_RELAXED_GP, but it would
525 ;; be redundant with the one above. However, if the combine pattern
526 ;; has multiple FP operations, using a match_operand allows combinations
527 ;; of SVE_STRICT_GP and SVE_RELAXED_GP in the same operation, provided
528 ;; that the predicates are the same:
530 ;; (unspec [(match_operand 1 "register_operand" "Upl")
532 ;; (unspec [(match_dup 1)
533 ;; (match_operand:SI N "aarch64_sve_gp_strictness")
535 ;; UNSPEC_COND_<MNEMONIC1>)
536 ;; (unspec [(match_dup 1)
537 ;; (match_operand:SI M "aarch64_sve_gp_strictness")
539 ;; UNSPEC_COND_<MNEMONIC2>) ...)
542 ;; The fully-relaxed version of this pattern is:
544 ;; (unspec [(match_operand 1 "register_operand" "Upl")
546 ;; (unspec [(match_operand:SI N)
547 ;; (const_int SVE_RELAXED_GP)
549 ;; UNSPEC_COND_<MNEMONIC1>)
550 ;; (unspec [(match_operand:SI M)
551 ;; (const_int SVE_RELAXED_GP)
553 ;; UNSPEC_COND_<MNEMONIC2>) ...)
556 ;; -------------------------------------------------------------------------
557 ;; ---- Note on FFR handling
558 ;; -------------------------------------------------------------------------
560 ;; Logically we want to divide FFR-related instructions into regions
561 ;; that contain exactly one of:
563 ;; - a single write to the FFR
564 ;; - any number of reads from the FFR (but only one read is likely)
565 ;; - any number of LDFF1 and LDNF1 instructions
567 ;; However, LDFF1 and LDNF1 instructions should otherwise behave like
568 ;; normal loads as far as possible. This means that they should be
569 ;; schedulable within a region in the same way that LD1 would be,
570 ;; and they should be deleted as dead if the result is unused. The loads
571 ;; should therefore not write to the FFR, since that would both serialize
572 ;; the loads with respect to each other and keep the loads live for any
575 ;; We get around this by using a fake "FFR token" (FFRT) to help describe
576 ;; the dependencies. Writing to the FFRT starts a new "FFRT region",
577 ;; while using the FFRT keeps the instruction within its region.
580 ;; - Writes start a new FFRT region as well as setting the FFR:
582 ;; W1: parallel (FFRT = <new value>, FFR = <actual FFR value>)
584 ;; - Loads use an LD1-like instruction that also uses the FFRT, so that the
585 ;; loads stay within the same FFRT region:
587 ;; L1: load data while using the FFRT
589 ;; In addition, any FFRT region that includes a load also has at least one
592 ;; L2: FFR = update(FFR, FFRT) [type == no_insn]
594 ;; to make it clear that the region both reads from and writes to the FFR.
596 ;; - Reads do the following:
598 ;; R1: FFRT = FFR [type == no_insn]
599 ;; R2: read from the FFRT
600 ;; R3: FFRT = update(FFRT) [type == no_insn]
602 ;; R1 and R3 both create new FFRT regions, so that previous LDFF1s and
603 ;; LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s
604 ;; cannot move backwards across R3.
606 ;; This way, writes are only kept alive by later loads or reads,
607 ;; and write/read pairs fold normally. For two consecutive reads,
608 ;; the first R3 is made dead by the second R1, which in turn becomes
609 ;; redundant with the first R1. We then have:
611 ;; first R1: FFRT = FFR
612 ;; first read from the FFRT
613 ;; second read from the FFRT
614 ;; second R3: FFRT = update(FFRT)
616 ;; i.e. the two FFRT regions collapse into a single one with two
617 ;; independent reads.
619 ;; The model still prevents some valid optimizations though. For example,
620 ;; if all loads in an FFRT region are deleted as dead, nothing would remove
621 ;; the L2 instructions.
623 ;; =========================================================================
625 ;; =========================================================================
627 ;; -------------------------------------------------------------------------
628 ;; ---- Moves of single vectors
629 ;; -------------------------------------------------------------------------
631 ;; - MOV (including aliases)
632 ;; - LD1B (contiguous form)
637 ;; - ST1B (contiguous form)
642 ;; -------------------------------------------------------------------------
644 (define_expand "mov<mode>"
645 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
646 (match_operand:SVE_ALL 1 "general_operand"))]
649 /* Use the predicated load and store patterns where possible.
650 This is required for big-endian targets (see the comment at the
651 head of the file) and increases the addressing choices for
653 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
654 && can_create_pseudo_p ())
656 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
660 if (CONSTANT_P (operands[1]))
662 aarch64_expand_mov_immediate (operands[0], operands[1]);
666 /* Optimize subregs on big-endian targets: we can use REV[BHW]
667 instead of going through memory. */
669 && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
674 (define_expand "movmisalign<mode>"
675 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
676 (match_operand:SVE_ALL 1 "general_operand"))]
679 /* Equivalent to a normal move for our purpooses. */
680 emit_move_insn (operands[0], operands[1]);
685 ;; Unpredicated moves that can use LDR and STR, i.e. full vectors for which
686 ;; little-endian ordering is acceptable. Only allow memory operations during
687 ;; and after RA; before RA we want the predicated load and store patterns to
689 (define_insn "*aarch64_sve_mov<mode>_ldr_str"
690 [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
691 (match_operand:SVE_FULL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
693 && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
694 && ((lra_in_progress || reload_completed)
695 || (register_operand (operands[0], <MODE>mode)
696 && nonmemory_operand (operands[1], <MODE>mode)))"
701 * return aarch64_output_sve_mov_immediate (operands[1]);"
704 ;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
705 ;; or vectors for which little-endian ordering isn't acceptable. Memory
706 ;; accesses require secondary reloads.
707 (define_insn "*aarch64_sve_mov<mode>_no_ldr_str"
708 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
709 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
711 && <MODE>mode != VNx16QImode
713 || maybe_ne (BYTES_PER_SVE_VECTOR, GET_MODE_SIZE (<MODE>mode)))"
716 * return aarch64_output_sve_mov_immediate (operands[1]);"
719 ;; Handle memory reloads for modes that can't use LDR and STR. We use
720 ;; byte PTRUE for all modes to try to encourage reuse. This pattern
721 ;; needs constraints because it is returned by TARGET_SECONDARY_RELOAD.
722 (define_expand "aarch64_sve_reload_mem"
724 [(set (match_operand 0)
726 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
729 /* Create a PTRUE. */
730 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
732 /* Refer to the PTRUE in the appropriate mode for this move. */
733 machine_mode mode = GET_MODE (operands[0]);
734 rtx pred = gen_lowpart (aarch64_sve_pred_mode (mode), operands[2]);
736 /* Emit a predicated load or store. */
737 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
742 ;; A predicated move in which the predicate is known to be all-true.
743 ;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
744 ;; so changes to this pattern will need changes there as well.
745 (define_insn_and_split "@aarch64_pred_mov<mode>"
746 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
748 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
749 (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")]
752 && (register_operand (operands[0], <MODE>mode)
753 || register_operand (operands[2], <MODE>mode))"
756 ld1<Vesize>\t%0.<Vctype>, %1/z, %2
757 st1<Vesize>\t%2.<Vctype>, %1, %0"
758 "&& register_operand (operands[0], <MODE>mode)
759 && register_operand (operands[2], <MODE>mode)"
760 [(set (match_dup 0) (match_dup 2))]
763 ;; A pattern for optimizing SUBREGs that have a reinterpreting effect
764 ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
765 ;; for details. We use a special predicate for operand 2 to reduce
766 ;; the number of patterns.
767 (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
768 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
770 [(match_operand:VNx16BI 1 "register_operand" "Upl")
771 (match_operand 2 "aarch64_any_register_operand" "w")]
773 "TARGET_SVE && BYTES_BIG_ENDIAN"
775 "&& reload_completed"
778 aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
783 ;; Reinterpret operand 1 in operand 0's mode, without changing its contents.
784 ;; This is equivalent to a subreg on little-endian targets but not for
785 ;; big-endian; see the comment at the head of the file for details.
786 (define_expand "@aarch64_sve_reinterpret<mode>"
787 [(set (match_operand:SVE_ALL 0 "register_operand")
789 [(match_operand 1 "aarch64_any_register_operand")]
790 UNSPEC_REINTERPRET))]
793 machine_mode src_mode = GET_MODE (operands[1]);
794 if (targetm.can_change_mode_class (<MODE>mode, src_mode, FP_REGS))
796 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1]));
802 ;; A pattern for handling type punning on big-endian targets. We use a
803 ;; special predicate for operand 1 to reduce the number of patterns.
804 (define_insn_and_split "*aarch64_sve_reinterpret<mode>"
805 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
807 [(match_operand 1 "aarch64_any_register_operand" "w")]
808 UNSPEC_REINTERPRET))]
811 "&& reload_completed"
812 [(set (match_dup 0) (match_dup 1))]
814 operands[1] = aarch64_replace_reg_mode (operands[1], <MODE>mode);
818 ;; -------------------------------------------------------------------------
819 ;; ---- Moves of multiple vectors
820 ;; -------------------------------------------------------------------------
821 ;; All patterns in this section are synthetic and split to real
822 ;; instructions after reload.
823 ;; -------------------------------------------------------------------------
825 (define_expand "mov<mode>"
826 [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
827 (match_operand:SVE_STRUCT 1 "general_operand"))]
830 /* Big-endian loads and stores need to be done via LD1 and ST1;
831 see the comment at the head of the file for details. */
832 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
835 gcc_assert (can_create_pseudo_p ());
836 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
840 if (CONSTANT_P (operands[1]))
842 aarch64_expand_mov_immediate (operands[0], operands[1]);
848 ;; Unpredicated structure moves (little-endian).
849 (define_insn "*aarch64_sve_mov<mode>_le"
850 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
851 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
852 "TARGET_SVE && !BYTES_BIG_ENDIAN"
854 [(set_attr "length" "<insn_length>")]
857 ;; Unpredicated structure moves (big-endian). Memory accesses require
858 ;; secondary reloads.
859 (define_insn "*aarch64_sve_mov<mode>_be"
860 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
861 (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
862 "TARGET_SVE && BYTES_BIG_ENDIAN"
864 [(set_attr "length" "<insn_length>")]
867 ;; Split unpredicated structure moves into pieces. This is the same
868 ;; for both big-endian and little-endian code, although it only needs
869 ;; to handle memory operands for little-endian code.
871 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
872 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
873 "TARGET_SVE && reload_completed"
876 rtx dest = operands[0];
877 rtx src = operands[1];
878 if (REG_P (dest) && REG_P (src))
879 aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
881 for (unsigned int i = 0; i < <vector_count>; ++i)
883 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
884 i * BYTES_PER_SVE_VECTOR);
885 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
886 i * BYTES_PER_SVE_VECTOR);
887 emit_insn (gen_rtx_SET (subdest, subsrc));
893 ;; Predicated structure moves. This works for both endiannesses but in
894 ;; practice is only useful for big-endian.
895 (define_insn_and_split "@aarch64_pred_mov<mode>"
896 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
898 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
899 (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
902 && (register_operand (operands[0], <MODE>mode)
903 || register_operand (operands[2], <MODE>mode))"
905 "&& reload_completed"
908 for (unsigned int i = 0; i < <vector_count>; ++i)
910 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
912 i * BYTES_PER_SVE_VECTOR);
913 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
915 i * BYTES_PER_SVE_VECTOR);
916 aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
920 [(set_attr "length" "<insn_length>")]
923 ;; -------------------------------------------------------------------------
924 ;; ---- Moves of predicates
925 ;; -------------------------------------------------------------------------
933 ;; -------------------------------------------------------------------------
935 (define_expand "mov<mode>"
936 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
937 (match_operand:PRED_ALL 1 "general_operand"))]
940 if (GET_CODE (operands[0]) == MEM)
941 operands[1] = force_reg (<MODE>mode, operands[1]);
943 if (CONSTANT_P (operands[1]))
945 aarch64_expand_mov_immediate (operands[0], operands[1]);
951 (define_insn "*aarch64_sve_mov<mode>"
952 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa")
953 (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))]
955 && (register_operand (operands[0], <MODE>mode)
956 || register_operand (operands[1], <MODE>mode))"
961 * return aarch64_output_sve_mov_immediate (operands[1]);"
964 ;; Match PTRUES Pn.B when both the predicate and flags are useful.
965 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc"
966 [(set (reg:CC_NZC CC_REGNUM)
970 (const_int SVE_KNOWN_PTRUE)
971 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
973 [(match_operand:SI 4 "const_int_operand")
974 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
977 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
981 return aarch64_output_sve_ptrues (operands[1]);
983 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
985 operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
989 ;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful.
990 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_cc"
991 [(set (reg:CC_NZC CC_REGNUM)
995 (const_int SVE_KNOWN_PTRUE)
997 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
999 [(match_operand:SI 4 "const_int_operand")
1000 (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
1003 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1007 return aarch64_output_sve_ptrues (operands[1]);
1009 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1011 operands[2] = CONSTM1_RTX (VNx16BImode);
1012 operands[3] = CONSTM1_RTX (<MODE>mode);
1016 ;; Match PTRUES Pn.B when only the flags result is useful (which is
1017 ;; a way of testing VL).
1018 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest"
1019 [(set (reg:CC_NZC CC_REGNUM)
1023 (const_int SVE_KNOWN_PTRUE)
1024 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1026 [(match_operand:SI 4 "const_int_operand")
1027 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
1030 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1033 return aarch64_output_sve_ptrues (operands[1]);
1035 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1037 operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
1041 ;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is
1042 ;; a way of testing VL).
1043 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest"
1044 [(set (reg:CC_NZC CC_REGNUM)
1048 (const_int SVE_KNOWN_PTRUE)
1050 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1052 [(match_operand:SI 4 "const_int_operand")
1053 (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
1056 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1059 return aarch64_output_sve_ptrues (operands[1]);
1061 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1063 operands[2] = CONSTM1_RTX (VNx16BImode);
1064 operands[3] = CONSTM1_RTX (<MODE>mode);
1068 ;; -------------------------------------------------------------------------
1069 ;; ---- Moves relating to the FFR
1070 ;; -------------------------------------------------------------------------
1075 ;; -------------------------------------------------------------------------
1077 ;; [W1 in the block comment above about FFR handling]
1079 ;; Write to the FFR and start a new FFRT scheduling region.
1080 (define_insn "aarch64_wrffr"
1081 [(set (reg:VNx16BI FFR_REGNUM)
1082 (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one" "Dm, Upa"))
1083 (set (reg:VNx16BI FFRT_REGNUM)
1084 (unspec:VNx16BI [(match_dup 0)] UNSPEC_WRFFR))]
1091 ;; [L2 in the block comment above about FFR handling]
1093 ;; Introduce a read from and write to the FFR in the current FFRT region,
1094 ;; so that the FFR value is live on entry to the region and so that the FFR
1095 ;; value visibly changes within the region. This is used (possibly multiple
1096 ;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions.
1097 (define_insn "aarch64_update_ffr_for_load"
1098 [(set (reg:VNx16BI FFR_REGNUM)
1099 (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)
1100 (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))]
1103 [(set_attr "type" "no_insn")]
1106 ;; [R1 in the block comment above about FFR handling]
1108 ;; Notionally copy the FFR to the FFRT, so that the current FFR value
1109 ;; can be read from there by the RDFFR instructions below. This acts
1110 ;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and
1111 ;; creates a natural dependency with earlier writes.
1112 (define_insn "aarch64_copy_ffr_to_ffrt"
1113 [(set (reg:VNx16BI FFRT_REGNUM)
1114 (reg:VNx16BI FFR_REGNUM))]
1117 [(set_attr "type" "no_insn")]
1120 ;; [R2 in the block comment above about FFR handling]
1122 ;; Read the FFR via the FFRT.
1123 (define_insn "aarch64_rdffr"
1124 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1125 (reg:VNx16BI FFRT_REGNUM))]
1130 ;; Likewise with zero predication.
1131 (define_insn "aarch64_rdffr_z"
1132 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1134 (reg:VNx16BI FFRT_REGNUM)
1135 (match_operand:VNx16BI 1 "register_operand" "Upa")))]
1140 ;; Read the FFR to test for a fault, without using the predicate result.
1141 (define_insn "*aarch64_rdffr_z_ptest"
1142 [(set (reg:CC_NZC CC_REGNUM)
1144 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1146 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1148 (reg:VNx16BI FFRT_REGNUM)
1151 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1153 "rdffrs\t%0.b, %1/z"
1156 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1157 (define_insn "*aarch64_rdffr_ptest"
1158 [(set (reg:CC_NZC CC_REGNUM)
1160 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1162 (const_int SVE_KNOWN_PTRUE)
1163 (reg:VNx16BI FFRT_REGNUM)]
1165 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1167 "rdffrs\t%0.b, %1/z"
1170 ;; Read the FFR with zero predication and test the result.
1171 (define_insn "*aarch64_rdffr_z_cc"
1172 [(set (reg:CC_NZC CC_REGNUM)
1174 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1176 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1178 (reg:VNx16BI FFRT_REGNUM)
1181 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1183 (reg:VNx16BI FFRT_REGNUM)
1186 "rdffrs\t%0.b, %1/z"
1189 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1190 (define_insn "*aarch64_rdffr_cc"
1191 [(set (reg:CC_NZC CC_REGNUM)
1193 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1195 (const_int SVE_KNOWN_PTRUE)
1196 (reg:VNx16BI FFRT_REGNUM)]
1198 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1199 (reg:VNx16BI FFRT_REGNUM))]
1201 "rdffrs\t%0.b, %1/z"
1204 ;; [R3 in the block comment above about FFR handling]
1206 ;; Arbitrarily update the FFRT after a read from the FFR. This acts as
1207 ;; a scheduling barrier for later LDFF1 and LDNF1 instructions.
1208 (define_insn "aarch64_update_ffrt"
1209 [(set (reg:VNx16BI FFRT_REGNUM)
1210 (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))]
1213 [(set_attr "type" "no_insn")]
1216 ;; =========================================================================
1218 ;; =========================================================================
1220 ;; -------------------------------------------------------------------------
1221 ;; ---- Normal contiguous loads
1222 ;; -------------------------------------------------------------------------
1223 ;; Includes contiguous forms of:
1240 ;; -------------------------------------------------------------------------
1243 (define_insn "maskload<mode><vpred>"
1244 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1246 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1247 (match_operand:SVE_ALL 1 "memory_operand" "m")]
1250 "ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
1253 ;; Unpredicated LD[234].
1254 (define_expand "vec_load_lanes<mode><vsingle>"
1255 [(set (match_operand:SVE_STRUCT 0 "register_operand")
1258 (match_operand:SVE_STRUCT 1 "memory_operand")]
1262 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1266 ;; Predicated LD[234].
1267 (define_insn "vec_mask_load_lanes<mode><vsingle>"
1268 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
1270 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1271 (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
1274 "ld<vector_count><Vesize>\t%0, %2/z, %1"
1277 ;; -------------------------------------------------------------------------
1278 ;; ---- Extending contiguous loads
1279 ;; -------------------------------------------------------------------------
1280 ;; Includes contiguous forms of:
1287 ;; -------------------------------------------------------------------------
1289 ;; Predicated load and extend, with 8 elements per 128-bit block.
1290 (define_insn_and_rewrite "@aarch64_load<SVE_PRED_LOAD:pred_load>_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1291 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1293 [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1294 (ANY_EXTEND:SVE_HSDI
1295 (unspec:SVE_PARTIAL_I
1296 [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1297 (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
1300 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1301 "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1302 "&& !CONSTANT_P (operands[3])"
1304 operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1308 ;; -------------------------------------------------------------------------
1309 ;; ---- First-faulting contiguous loads
1310 ;; -------------------------------------------------------------------------
1311 ;; Includes contiguous forms of:
1320 ;; -------------------------------------------------------------------------
1322 ;; Contiguous non-extending first-faulting or non-faulting loads.
1323 (define_insn "@aarch64_ld<fn>f1<mode>"
1324 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1326 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1327 (match_operand:SVE_FULL 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1328 (reg:VNx16BI FFRT_REGNUM)]
1331 "ld<fn>f1<Vesize>\t%0.<Vetype>, %2/z, %1"
1334 ;; -------------------------------------------------------------------------
1335 ;; ---- First-faulting extending contiguous loads
1336 ;; -------------------------------------------------------------------------
1337 ;; Includes contiguous forms of:
1350 ;; -------------------------------------------------------------------------
1352 ;; Predicated first-faulting or non-faulting load and extend.
1353 (define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1354 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1356 [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1357 (ANY_EXTEND:SVE_HSDI
1358 (unspec:SVE_PARTIAL_I
1359 [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1360 (match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1361 (reg:VNx16BI FFRT_REGNUM)]
1364 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1365 "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1366 "&& !CONSTANT_P (operands[3])"
1368 operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1372 ;; -------------------------------------------------------------------------
1373 ;; ---- Non-temporal contiguous loads
1374 ;; -------------------------------------------------------------------------
1380 ;; -------------------------------------------------------------------------
1382 ;; Predicated contiguous non-temporal load.
1383 (define_insn "@aarch64_ldnt1<mode>"
1384 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1386 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1387 (match_operand:SVE_FULL 1 "memory_operand" "m")]
1390 "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1"
1393 ;; -------------------------------------------------------------------------
1394 ;; ---- Normal gather loads
1395 ;; -------------------------------------------------------------------------
1396 ;; Includes gather forms of:
1399 ;; -------------------------------------------------------------------------
1401 ;; Unpredicated gather loads.
1402 (define_expand "gather_load<mode><v_int_container>"
1403 [(set (match_operand:SVE_24 0 "register_operand")
1406 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1407 (match_operand:<V_INT_CONTAINER> 2 "register_operand")
1408 (match_operand:DI 3 "const_int_operand")
1409 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1410 (mem:BLK (scratch))]
1411 UNSPEC_LD1_GATHER))]
1414 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
1418 ;; Predicated gather loads for 32-bit elements. Operand 3 is true for
1419 ;; unsigned extension and false for signed extension.
1420 (define_insn "mask_gather_load<mode><v_int_container>"
1421 [(set (match_operand:SVE_4 0 "register_operand" "=w, w, w, w, w, w")
1423 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1424 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
1425 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1426 (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
1427 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1428 (mem:BLK (scratch))]
1429 UNSPEC_LD1_GATHER))]
1432 ld1<Vesize>\t%0.s, %5/z, [%2.s]
1433 ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1434 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1435 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1436 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1437 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1440 ;; Predicated gather loads for 64-bit elements. The value of operand 3
1441 ;; doesn't matter in this case.
1442 (define_insn "mask_gather_load<mode><v_int_container>"
1443 [(set (match_operand:SVE_2 0 "register_operand" "=w, w, w, w")
1445 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1446 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
1447 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1448 (match_operand:DI 3 "const_int_operand")
1449 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
1450 (mem:BLK (scratch))]
1451 UNSPEC_LD1_GATHER))]
1454 ld1<Vesize>\t%0.d, %5/z, [%2.d]
1455 ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1456 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1457 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1460 ;; Likewise, but with the offset being extended from 32 bits.
1461 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked"
1462 [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1464 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1465 (match_operand:DI 1 "register_operand" "rk, rk")
1469 (match_operand:VNx2SI 2 "register_operand" "w, w"))]
1471 (match_operand:DI 3 "const_int_operand")
1472 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1473 (mem:BLK (scratch))]
1474 UNSPEC_LD1_GATHER))]
1477 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw]
1478 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]"
1479 "&& !CONSTANT_P (operands[6])"
1481 operands[6] = CONSTM1_RTX (VNx2BImode);
1485 ;; Likewise, but with the offset being truncated to 32 bits and then
1487 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_sxtw"
1488 [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1490 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1491 (match_operand:DI 1 "register_operand" "rk, rk")
1496 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1498 (match_operand:DI 3 "const_int_operand")
1499 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1500 (mem:BLK (scratch))]
1501 UNSPEC_LD1_GATHER))]
1504 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1505 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1506 "&& !CONSTANT_P (operands[6])"
1508 operands[6] = CONSTM1_RTX (VNx2BImode);
1512 ;; Likewise, but with the offset being truncated to 32 bits and then
1514 (define_insn "*mask_gather_load<mode><v_int_container>_uxtw"
1515 [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1517 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1518 (match_operand:DI 1 "register_operand" "rk, rk")
1520 (match_operand:VNx2DI 2 "register_operand" "w, w")
1521 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1522 (match_operand:DI 3 "const_int_operand")
1523 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1524 (mem:BLK (scratch))]
1525 UNSPEC_LD1_GATHER))]
1528 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1529 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1532 ;; -------------------------------------------------------------------------
1533 ;; ---- Extending gather loads
1534 ;; -------------------------------------------------------------------------
1535 ;; Includes gather forms of:
1542 ;; -------------------------------------------------------------------------
1544 ;; Predicated extending gather loads for 32-bit elements. Operand 3 is
1545 ;; true for unsigned extension and false for signed extension.
1546 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>"
1547 [(set (match_operand:SVE_4HSI 0 "register_operand" "=w, w, w, w, w, w")
1549 [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1550 (ANY_EXTEND:SVE_4HSI
1552 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1553 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_4BHI:Vesize>" "Z, vg<SVE_4BHI:Vesize>, rk, rk, rk, rk")
1554 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1555 (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
1556 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1557 (mem:BLK (scratch))]
1558 UNSPEC_LD1_GATHER))]
1560 "TARGET_SVE && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0"
1562 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s]
1563 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1564 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1565 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1566 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1567 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1568 "&& !CONSTANT_P (operands[6])"
1570 operands[6] = CONSTM1_RTX (VNx4BImode);
1574 ;; Predicated extending gather loads for 64-bit elements. The value of
1575 ;; operand 3 doesn't matter in this case.
1576 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>"
1577 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w, w, w")
1579 [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1580 (ANY_EXTEND:SVE_2HSDI
1582 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1583 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_2BHSI:Vesize>" "Z, vg<SVE_2BHSI:Vesize>, rk, rk")
1584 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1585 (match_operand:DI 3 "const_int_operand")
1586 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, Ui1, Ui1, i")
1587 (mem:BLK (scratch))]
1588 UNSPEC_LD1_GATHER))]
1590 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1592 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d]
1593 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1594 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d]
1595 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1596 "&& !CONSTANT_P (operands[6])"
1598 operands[6] = CONSTM1_RTX (VNx2BImode);
1602 ;; Likewise, but with the offset being extended from 32 bits.
1603 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked"
1604 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1607 (ANY_EXTEND:SVE_2HSDI
1609 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1610 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1614 (match_operand:VNx2SI 2 "register_operand" "w, w"))]
1616 (match_operand:DI 3 "const_int_operand")
1617 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1618 (mem:BLK (scratch))]
1619 UNSPEC_LD1_GATHER))]
1621 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1623 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw]
1624 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]"
1625 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1627 operands[6] = CONSTM1_RTX (VNx2BImode);
1628 operands[7] = CONSTM1_RTX (VNx2BImode);
1632 ;; Likewise, but with the offset being truncated to 32 bits and then
1634 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw"
1635 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1638 (ANY_EXTEND:SVE_2HSDI
1640 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1641 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1646 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1648 (match_operand:DI 3 "const_int_operand")
1649 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1650 (mem:BLK (scratch))]
1651 UNSPEC_LD1_GATHER))]
1653 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1655 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1656 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1657 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1659 operands[6] = CONSTM1_RTX (VNx2BImode);
1660 operands[7] = CONSTM1_RTX (VNx2BImode);
1664 ;; Likewise, but with the offset being truncated to 32 bits and then
1666 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw"
1667 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1670 (ANY_EXTEND:SVE_2HSDI
1672 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1673 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1675 (match_operand:VNx2DI 2 "register_operand" "w, w")
1676 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1677 (match_operand:DI 3 "const_int_operand")
1678 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1679 (mem:BLK (scratch))]
1680 UNSPEC_LD1_GATHER))]
1682 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1684 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1685 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1686 "&& !CONSTANT_P (operands[7])"
1688 operands[7] = CONSTM1_RTX (VNx2BImode);
1692 ;; -------------------------------------------------------------------------
1693 ;; ---- First-faulting gather loads
1694 ;; -------------------------------------------------------------------------
1695 ;; Includes gather forms of:
1698 ;; -------------------------------------------------------------------------
1700 ;; Predicated first-faulting gather loads for 32-bit elements. Operand
1701 ;; 3 is true for unsigned extension and false for signed extension.
1702 (define_insn "@aarch64_ldff1_gather<mode>"
1703 [(set (match_operand:SVE_FULL_S 0 "register_operand" "=w, w, w, w, w, w")
1705 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1706 (match_operand:DI 1 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk")
1707 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1708 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1709 (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i")
1711 (reg:VNx16BI FFRT_REGNUM)]
1712 UNSPEC_LDFF1_GATHER))]
1715 ldff1w\t%0.s, %5/z, [%2.s]
1716 ldff1w\t%0.s, %5/z, [%2.s, #%1]
1717 ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw]
1718 ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw]
1719 ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1720 ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1723 ;; Predicated first-faulting gather loads for 64-bit elements. The value
1724 ;; of operand 3 doesn't matter in this case.
1725 (define_insn "@aarch64_ldff1_gather<mode>"
1726 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w, w, w")
1728 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1729 (match_operand:DI 1 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk")
1730 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1731 (match_operand:DI 3 "const_int_operand")
1732 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i")
1734 (reg:VNx16BI FFRT_REGNUM)]
1735 UNSPEC_LDFF1_GATHER))]
1738 ldff1d\t%0.d, %5/z, [%2.d]
1739 ldff1d\t%0.d, %5/z, [%2.d, #%1]
1740 ldff1d\t%0.d, %5/z, [%1, %2.d]
1741 ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1744 ;; Likewise, but with the offset being sign-extended from 32 bits.
1745 (define_insn_and_rewrite "*aarch64_ldff1_gather<mode>_sxtw"
1746 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1748 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1749 (match_operand:DI 1 "register_operand" "rk, rk")
1754 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1756 (match_operand:DI 3 "const_int_operand")
1757 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1759 (reg:VNx16BI FFRT_REGNUM)]
1760 UNSPEC_LDFF1_GATHER))]
1763 ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw]
1764 ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1765 "&& !CONSTANT_P (operands[6])"
1767 operands[6] = CONSTM1_RTX (VNx2BImode);
1771 ;; Likewise, but with the offset being zero-extended from 32 bits.
1772 (define_insn "*aarch64_ldff1_gather<mode>_uxtw"
1773 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1775 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1776 (match_operand:DI 1 "register_operand" "rk, rk")
1778 (match_operand:VNx2DI 2 "register_operand" "w, w")
1779 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1780 (match_operand:DI 3 "const_int_operand")
1781 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1783 (reg:VNx16BI FFRT_REGNUM)]
1784 UNSPEC_LDFF1_GATHER))]
1787 ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw]
1788 ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1791 ;; -------------------------------------------------------------------------
1792 ;; ---- First-faulting extending gather loads
1793 ;; -------------------------------------------------------------------------
1794 ;; Includes gather forms of:
1801 ;; -------------------------------------------------------------------------
1803 ;; Predicated extending first-faulting gather loads for 32-bit elements.
1804 ;; Operand 3 is true for unsigned extension and false for signed extension.
1805 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
1806 [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w")
1808 [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1809 (ANY_EXTEND:VNx4_WIDE
1811 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1812 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
1813 (match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w")
1814 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1815 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1817 (reg:VNx16BI FFRT_REGNUM)]
1818 UNSPEC_LDFF1_GATHER))]
1822 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s]
1823 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1824 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1825 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1826 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1827 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1828 "&& !CONSTANT_P (operands[6])"
1830 operands[6] = CONSTM1_RTX (VNx4BImode);
1834 ;; Predicated extending first-faulting gather loads for 64-bit elements.
1835 ;; The value of operand 3 doesn't matter in this case.
1836 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
1837 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w")
1839 [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1840 (ANY_EXTEND:VNx2_WIDE
1842 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1843 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
1844 (match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w")
1845 (match_operand:DI 3 "const_int_operand")
1846 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
1848 (reg:VNx16BI FFRT_REGNUM)]
1849 UNSPEC_LDFF1_GATHER))]
1853 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d]
1854 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1855 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d]
1856 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1857 "&& !CONSTANT_P (operands[6])"
1859 operands[6] = CONSTM1_RTX (VNx2BImode);
1863 ;; Likewise, but with the offset being sign-extended from 32 bits.
1864 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw"
1865 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
1868 (ANY_EXTEND:VNx2_WIDE
1870 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1871 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1876 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1878 (match_operand:DI 3 "const_int_operand")
1879 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1881 (reg:VNx16BI FFRT_REGNUM)]
1882 UNSPEC_LDFF1_GATHER))]
1886 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1887 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1888 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1890 operands[6] = CONSTM1_RTX (VNx2BImode);
1891 operands[7] = CONSTM1_RTX (VNx2BImode);
1895 ;; Likewise, but with the offset being zero-extended from 32 bits.
1896 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw"
1897 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
1900 (ANY_EXTEND:VNx2_WIDE
1902 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1903 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1905 (match_operand:VNx2DI 2 "register_operand" "w, w")
1906 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1907 (match_operand:DI 3 "const_int_operand")
1908 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1910 (reg:VNx16BI FFRT_REGNUM)]
1911 UNSPEC_LDFF1_GATHER))]
1915 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1916 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1917 "&& !CONSTANT_P (operands[7])"
1919 operands[7] = CONSTM1_RTX (VNx2BImode);
1923 ;; =========================================================================
1925 ;; =========================================================================
1927 ;; -------------------------------------------------------------------------
1928 ;; ---- Contiguous prefetches
1929 ;; -------------------------------------------------------------------------
1930 ;; Includes contiguous forms of:
1935 ;; -------------------------------------------------------------------------
1937 ;; Contiguous predicated prefetches. Operand 2 gives the real prefetch
1938 ;; operation (as an svprfop), with operands 3 and 4 providing distilled
1940 (define_insn "@aarch64_sve_prefetch<mode>"
1941 [(prefetch (unspec:DI
1942 [(match_operand:<VPRED> 0 "register_operand" "Upl")
1943 (match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP<Vesize>")
1944 (match_operand:DI 2 "const_int_operand")]
1945 UNSPEC_SVE_PREFETCH)
1946 (match_operand:DI 3 "const_int_operand")
1947 (match_operand:DI 4 "const_int_operand"))]
1950 operands[1] = gen_rtx_MEM (<MODE>mode, operands[1]);
1951 return aarch64_output_sve_prefetch ("prf<Vesize>", operands[2], "%0, %1");
1955 ;; -------------------------------------------------------------------------
1956 ;; ---- Gather prefetches
1957 ;; -------------------------------------------------------------------------
1958 ;; Includes gather forms of:
1963 ;; -------------------------------------------------------------------------
1965 ;; Predicated gather prefetches for 32-bit bases and offsets. The operands
1967 ;; 0: the governing predicate
1968 ;; 1: the scalar component of the address
1969 ;; 2: the vector component of the address
1970 ;; 3: 1 for zero extension, 0 for sign extension
1971 ;; 4: the scale multiplier
1972 ;; 5: a vector zero that identifies the mode of data being accessed
1973 ;; 6: the prefetch operator (an svprfop)
1974 ;; 7: the normal RTL prefetch rw flag
1975 ;; 8: the normal RTL prefetch locality value
1976 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>"
1977 [(prefetch (unspec:DI
1978 [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1979 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk, rk, rk")
1980 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w")
1981 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1982 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1983 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1984 (match_operand:DI 6 "const_int_operand")]
1985 UNSPEC_SVE_PREFETCH_GATHER)
1986 (match_operand:DI 7 "const_int_operand")
1987 (match_operand:DI 8 "const_int_operand"))]
1990 static const char *const insns[][2] = {
1991 "prf<SVE_FULL_I:Vesize>", "%0, [%2.s]",
1992 "prf<SVE_FULL_I:Vesize>", "%0, [%2.s, #%1]",
1993 "prfb", "%0, [%1, %2.s, sxtw]",
1994 "prfb", "%0, [%1, %2.s, uxtw]",
1995 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, sxtw %p4]",
1996 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, uxtw %p4]"
1998 const char *const *parts = insns[which_alternative];
1999 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2003 ;; Predicated gather prefetches for 64-bit elements. The value of operand 3
2004 ;; doesn't matter in this case.
2005 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>"
2006 [(prefetch (unspec:DI
2007 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl")
2008 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk")
2009 (match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w")
2010 (match_operand:DI 3 "const_int_operand")
2011 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, i")
2012 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2013 (match_operand:DI 6 "const_int_operand")]
2014 UNSPEC_SVE_PREFETCH_GATHER)
2015 (match_operand:DI 7 "const_int_operand")
2016 (match_operand:DI 8 "const_int_operand"))]
2019 static const char *const insns[][2] = {
2020 "prf<SVE_FULL_I:Vesize>", "%0, [%2.d]",
2021 "prf<SVE_FULL_I:Vesize>", "%0, [%2.d, #%1]",
2022 "prfb", "%0, [%1, %2.d]",
2023 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, lsl %p4]"
2025 const char *const *parts = insns[which_alternative];
2026 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2030 ;; Likewise, but with the offset being sign-extended from 32 bits.
2031 (define_insn_and_rewrite "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw"
2032 [(prefetch (unspec:DI
2033 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
2034 (match_operand:DI 1 "register_operand" "rk, rk")
2039 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
2041 (match_operand:DI 3 "const_int_operand")
2042 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
2043 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2044 (match_operand:DI 6 "const_int_operand")]
2045 UNSPEC_SVE_PREFETCH_GATHER)
2046 (match_operand:DI 7 "const_int_operand")
2047 (match_operand:DI 8 "const_int_operand"))]
2050 static const char *const insns[][2] = {
2051 "prfb", "%0, [%1, %2.d, sxtw]",
2052 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, sxtw %p4]"
2054 const char *const *parts = insns[which_alternative];
2055 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2057 "&& !rtx_equal_p (operands[0], operands[9])"
2059 operands[9] = copy_rtx (operands[0]);
2063 ;; Likewise, but with the offset being zero-extended from 32 bits.
2064 (define_insn "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw"
2065 [(prefetch (unspec:DI
2066 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
2067 (match_operand:DI 1 "register_operand" "rk, rk")
2069 (match_operand:VNx2DI 2 "register_operand" "w, w")
2070 (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate"))
2071 (match_operand:DI 3 "const_int_operand")
2072 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
2073 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2074 (match_operand:DI 6 "const_int_operand")]
2075 UNSPEC_SVE_PREFETCH_GATHER)
2076 (match_operand:DI 7 "const_int_operand")
2077 (match_operand:DI 8 "const_int_operand"))]
2080 static const char *const insns[][2] = {
2081 "prfb", "%0, [%1, %2.d, uxtw]",
2082 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, uxtw %p4]"
2084 const char *const *parts = insns[which_alternative];
2085 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2089 ;; =========================================================================
2091 ;; =========================================================================
2093 ;; -------------------------------------------------------------------------
2094 ;; ---- Normal contiguous stores
2095 ;; -------------------------------------------------------------------------
2096 ;; Includes contiguous forms of:
2113 ;; -------------------------------------------------------------------------
2116 (define_insn "maskstore<mode><vpred>"
2117 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
2119 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2120 (match_operand:SVE_ALL 1 "register_operand" "w")
2124 "st1<Vesize>\t%1.<Vctype>, %2, %0"
2127 ;; Unpredicated ST[234]. This is always a full update, so the dependence
2128 ;; on the old value of the memory location (via (match_dup 0)) is redundant.
2129 ;; There doesn't seem to be any obvious benefit to treating the all-true
2130 ;; case differently though. In particular, it's very unlikely that we'll
2131 ;; only find out during RTL that a store_lanes is dead.
2132 (define_expand "vec_store_lanes<mode><vsingle>"
2133 [(set (match_operand:SVE_STRUCT 0 "memory_operand")
2136 (match_operand:SVE_STRUCT 1 "register_operand")
2141 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2145 ;; Predicated ST[234].
2146 (define_insn "vec_mask_store_lanes<mode><vsingle>"
2147 [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
2149 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2150 (match_operand:SVE_STRUCT 1 "register_operand" "w")
2154 "st<vector_count><Vesize>\t%1, %2, %0"
2157 ;; -------------------------------------------------------------------------
2158 ;; ---- Truncating contiguous stores
2159 ;; -------------------------------------------------------------------------
2164 ;; -------------------------------------------------------------------------
2166 ;; Predicated truncate and store, with 8 elements per 128-bit block.
2167 (define_insn "@aarch64_store_trunc<VNx8_NARROW:mode><VNx8_WIDE:mode>"
2168 [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m")
2170 [(match_operand:VNx8BI 2 "register_operand" "Upl")
2171 (truncate:VNx8_NARROW
2172 (match_operand:VNx8_WIDE 1 "register_operand" "w"))
2176 "st1<VNx8_NARROW:Vesize>\t%1.<VNx8_WIDE:Vetype>, %2, %0"
2179 ;; Predicated truncate and store, with 4 elements per 128-bit block.
2180 (define_insn "@aarch64_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2181 [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m")
2183 [(match_operand:VNx4BI 2 "register_operand" "Upl")
2184 (truncate:VNx4_NARROW
2185 (match_operand:VNx4_WIDE 1 "register_operand" "w"))
2189 "st1<VNx4_NARROW:Vesize>\t%1.<VNx4_WIDE:Vetype>, %2, %0"
2192 ;; Predicated truncate and store, with 2 elements per 128-bit block.
2193 (define_insn "@aarch64_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2194 [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m")
2196 [(match_operand:VNx2BI 2 "register_operand" "Upl")
2197 (truncate:VNx2_NARROW
2198 (match_operand:VNx2_WIDE 1 "register_operand" "w"))
2202 "st1<VNx2_NARROW:Vesize>\t%1.<VNx2_WIDE:Vetype>, %2, %0"
2205 ;; -------------------------------------------------------------------------
2206 ;; ---- Non-temporal contiguous stores
2207 ;; -------------------------------------------------------------------------
2213 ;; -------------------------------------------------------------------------
2215 (define_insn "@aarch64_stnt1<mode>"
2216 [(set (match_operand:SVE_FULL 0 "memory_operand" "+m")
2218 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2219 (match_operand:SVE_FULL 1 "register_operand" "w")
2223 "stnt1<Vesize>\t%1.<Vetype>, %2, %0"
2226 ;; -------------------------------------------------------------------------
2227 ;; ---- Normal scatter stores
2228 ;; -------------------------------------------------------------------------
2229 ;; Includes scatter forms of:
2232 ;; -------------------------------------------------------------------------
2234 ;; Unpredicated scatter stores.
2235 (define_expand "scatter_store<mode><v_int_container>"
2236 [(set (mem:BLK (scratch))
2239 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2240 (match_operand:<V_INT_CONTAINER> 1 "register_operand")
2241 (match_operand:DI 2 "const_int_operand")
2242 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2243 (match_operand:SVE_24 4 "register_operand")]
2244 UNSPEC_ST1_SCATTER))]
2247 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
2251 ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
2252 ;; unsigned extension and false for signed extension.
2253 (define_insn "mask_scatter_store<mode><v_int_container>"
2254 [(set (mem:BLK (scratch))
2256 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2257 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
2258 (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2259 (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
2260 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2261 (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")]
2262 UNSPEC_ST1_SCATTER))]
2265 st1<Vesize>\t%4.s, %5, [%1.s]
2266 st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2267 st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2268 st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2269 st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2270 st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
2273 ;; Predicated scatter stores for 64-bit elements. The value of operand 2
2274 ;; doesn't matter in this case.
2275 (define_insn "mask_scatter_store<mode><v_int_container>"
2276 [(set (mem:BLK (scratch))
2278 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
2279 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
2280 (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
2281 (match_operand:DI 2 "const_int_operand")
2282 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
2283 (match_operand:SVE_2 4 "register_operand" "w, w, w, w")]
2284 UNSPEC_ST1_SCATTER))]
2287 st1<Vesize>\t%4.d, %5, [%1.d]
2288 st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2289 st1<Vesize>\t%4.d, %5, [%0, %1.d]
2290 st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
2293 ;; Likewise, but with the offset being extended from 32 bits.
2294 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked"
2295 [(set (mem:BLK (scratch))
2297 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2298 (match_operand:DI 0 "register_operand" "rk, rk")
2302 (match_operand:VNx2SI 1 "register_operand" "w, w"))]
2304 (match_operand:DI 2 "const_int_operand")
2305 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2306 (match_operand:SVE_2 4 "register_operand" "w, w")]
2307 UNSPEC_ST1_SCATTER))]
2310 st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw]
2311 st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]"
2312 "&& !CONSTANT_P (operands[6])"
2314 operands[6] = CONSTM1_RTX (<VPRED>mode);
2318 ;; Likewise, but with the offset being truncated to 32 bits and then
2320 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw"
2321 [(set (mem:BLK (scratch))
2323 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2324 (match_operand:DI 0 "register_operand" "rk, rk")
2329 (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2331 (match_operand:DI 2 "const_int_operand")
2332 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2333 (match_operand:SVE_2 4 "register_operand" "w, w")]
2334 UNSPEC_ST1_SCATTER))]
2337 st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2338 st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2339 "&& !CONSTANT_P (operands[6])"
2341 operands[6] = CONSTM1_RTX (<VPRED>mode);
2345 ;; Likewise, but with the offset being truncated to 32 bits and then
2347 (define_insn "*mask_scatter_store<mode><v_int_container>_uxtw"
2348 [(set (mem:BLK (scratch))
2350 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2351 (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2353 (match_operand:VNx2DI 1 "register_operand" "w, w")
2354 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2355 (match_operand:DI 2 "const_int_operand")
2356 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2357 (match_operand:SVE_2 4 "register_operand" "w, w")]
2358 UNSPEC_ST1_SCATTER))]
2361 st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2362 st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
2365 ;; -------------------------------------------------------------------------
2366 ;; ---- Truncating scatter stores
2367 ;; -------------------------------------------------------------------------
2368 ;; Includes scatter forms of:
2372 ;; -------------------------------------------------------------------------
2374 ;; Predicated truncating scatter stores for 32-bit elements. Operand 2 is
2375 ;; true for unsigned extension and false for signed extension.
2376 (define_insn "@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2377 [(set (mem:BLK (scratch))
2379 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2380 (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
2381 (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2382 (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
2383 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2384 (truncate:VNx4_NARROW
2385 (match_operand:VNx4_WIDE 4 "register_operand" "w, w, w, w, w, w"))]
2386 UNSPEC_ST1_SCATTER))]
2389 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s]
2390 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s, #%0]
2391 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2392 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2393 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2394 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
2397 ;; Predicated truncating scatter stores for 64-bit elements. The value of
2398 ;; operand 2 doesn't matter in this case.
2399 (define_insn "@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2400 [(set (mem:BLK (scratch))
2402 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
2403 (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
2404 (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
2405 (match_operand:DI 2 "const_int_operand")
2406 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
2407 (truncate:VNx2_NARROW
2408 (match_operand:VNx2_WIDE 4 "register_operand" "w, w, w, w"))]
2409 UNSPEC_ST1_SCATTER))]
2412 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d]
2413 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d, #%0]
2414 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d]
2415 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
2418 ;; Likewise, but with the offset being sign-extended from 32 bits.
2419 (define_insn_and_rewrite "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw"
2420 [(set (mem:BLK (scratch))
2422 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2423 (match_operand:DI 0 "register_operand" "rk, rk")
2428 (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2430 (match_operand:DI 2 "const_int_operand")
2431 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2432 (truncate:VNx2_NARROW
2433 (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2434 UNSPEC_ST1_SCATTER))]
2437 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2438 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2439 "&& !rtx_equal_p (operands[5], operands[6])"
2441 operands[6] = copy_rtx (operands[5]);
2445 ;; Likewise, but with the offset being zero-extended from 32 bits.
2446 (define_insn "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw"
2447 [(set (mem:BLK (scratch))
2449 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2450 (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2452 (match_operand:VNx2DI 1 "register_operand" "w, w")
2453 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2454 (match_operand:DI 2 "const_int_operand")
2455 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2456 (truncate:VNx2_NARROW
2457 (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2458 UNSPEC_ST1_SCATTER))]
2461 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2462 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
2465 ;; =========================================================================
2466 ;; == Vector creation
2467 ;; =========================================================================
2469 ;; -------------------------------------------------------------------------
2470 ;; ---- [INT,FP] Duplicate element
2471 ;; -------------------------------------------------------------------------
2487 ;; -------------------------------------------------------------------------
2489 (define_expand "vec_duplicate<mode>"
2491 [(set (match_operand:SVE_ALL 0 "register_operand")
2492 (vec_duplicate:SVE_ALL
2493 (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
2494 (clobber (scratch:VNx16BI))])]
2497 if (MEM_P (operands[1]))
2499 rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
2500 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
2501 CONST0_RTX (<MODE>mode)));
2507 ;; Accept memory operands for the benefit of combine, and also in case
2508 ;; the scalar input gets spilled to memory during RA. We want to split
2509 ;; the load at the first opportunity in order to allow the PTRUE to be
2510 ;; optimized with surrounding code.
2511 (define_insn_and_split "*vec_duplicate<mode>_reg"
2512 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
2513 (vec_duplicate:SVE_ALL
2514 (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
2515 (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
2518 mov\t%0.<Vetype>, %<vwcore>1
2519 mov\t%0.<Vetype>, %<Vetype>1
2521 "&& MEM_P (operands[1])"
2524 if (GET_CODE (operands[2]) == SCRATCH)
2525 operands[2] = gen_reg_rtx (VNx16BImode);
2526 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
2527 rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
2528 emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1],
2529 CONST0_RTX (<MODE>mode)));
2532 [(set_attr "length" "4,4,8")]
2535 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version).
2536 (define_insn "@aarch64_vec_duplicate_vq<mode>_le"
2537 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2538 (vec_duplicate:SVE_FULL
2539 (match_operand:<V128> 1 "register_operand" "w")))]
2540 "TARGET_SVE && !BYTES_BIG_ENDIAN"
2542 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2543 return "dup\t%0.q, %1.q[0]";
2547 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version).
2548 ;; The SVE register layout puts memory lane N into (architectural)
2549 ;; register lane N, whereas the Advanced SIMD layout puts the memory
2550 ;; lsb into the register lsb. We therefore have to describe this in rtl
2551 ;; terms as a reverse of the V128 vector followed by a duplicate.
2552 (define_insn "@aarch64_vec_duplicate_vq<mode>_be"
2553 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2554 (vec_duplicate:SVE_FULL
2556 (match_operand:<V128> 1 "register_operand" "w")
2557 (match_operand 2 "descending_int_parallel"))))]
2560 && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
2561 GET_MODE_NUNITS (<V128>mode) - 1)"
2563 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2564 return "dup\t%0.q, %1.q[0]";
2568 ;; This is used for vec_duplicate<mode>s from memory, but can also
2569 ;; be used by combine to optimize selects of a vec_duplicate<mode>
2571 (define_insn "sve_ld1r<mode>"
2572 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
2574 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2575 (vec_duplicate:SVE_ALL
2576 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
2577 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
2580 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
2583 ;; Load 128 bits from memory under predicate control and duplicate to
2585 (define_insn "@aarch64_sve_ld1rq<mode>"
2586 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2588 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2589 (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")]
2593 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2594 return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1";
2598 (define_insn "@aarch64_sve_ld1ro<mode>"
2599 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2601 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2602 (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>"
2607 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2608 return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1";
2612 ;; -------------------------------------------------------------------------
2613 ;; ---- [INT,FP] Initialize from individual elements
2614 ;; -------------------------------------------------------------------------
2617 ;; -------------------------------------------------------------------------
2619 (define_expand "vec_init<mode><Vel>"
2620 [(match_operand:SVE_FULL 0 "register_operand")
2621 (match_operand 1 "")]
2624 aarch64_sve_expand_vector_init (operands[0], operands[1]);
2629 ;; Shift an SVE vector left and insert a scalar into element 0.
2630 (define_insn "vec_shl_insert_<mode>"
2631 [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??&w, ?&w")
2633 [(match_operand:SVE_FULL 1 "register_operand" "0, 0, w, w")
2634 (match_operand:<VEL> 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")]
2638 insr\t%0.<Vetype>, %<vwcore>2
2639 insr\t%0.<Vetype>, %<Vetype>2
2640 movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2
2641 movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2"
2642 [(set_attr "movprfx" "*,*,yes,yes")]
2645 ;; -------------------------------------------------------------------------
2646 ;; ---- [INT] Linear series
2647 ;; -------------------------------------------------------------------------
2650 ;; -------------------------------------------------------------------------
2652 (define_insn "vec_series<mode>"
2653 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
2655 (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
2656 (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
2659 index\t%0.<Vctype>, #%1, %<vccore>2
2660 index\t%0.<Vctype>, %<vccore>1, #%2
2661 index\t%0.<Vctype>, %<vccore>1, %<vccore>2"
2664 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
2665 ;; of an INDEX instruction.
2666 (define_insn "*vec_series<mode>_plus"
2667 [(set (match_operand:SVE_I 0 "register_operand" "=w")
2669 (vec_duplicate:SVE_I
2670 (match_operand:<VEL> 1 "register_operand" "r"))
2671 (match_operand:SVE_I 2 "immediate_operand")))]
2672 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
2674 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
2675 return "index\t%0.<Vctype>, %<vccore>1, #%2";
2679 ;; -------------------------------------------------------------------------
2680 ;; ---- [PRED] Duplicate element
2681 ;; -------------------------------------------------------------------------
2682 ;; The patterns in this section are synthetic.
2683 ;; -------------------------------------------------------------------------
2685 ;; Implement a predicate broadcast by shifting the low bit of the scalar
2686 ;; input into the top bit and using a WHILELO. An alternative would be to
2687 ;; duplicate the input and do a compare with zero.
2688 (define_expand "vec_duplicate<mode>"
2689 [(set (match_operand:PRED_ALL 0 "register_operand")
2690 (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))]
2693 rtx tmp = gen_reg_rtx (DImode);
2694 rtx op1 = gen_lowpart (DImode, operands[1]);
2695 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
2696 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
2701 ;; =========================================================================
2702 ;; == Vector decomposition
2703 ;; =========================================================================
2705 ;; -------------------------------------------------------------------------
2706 ;; ---- [INT,FP] Extract index
2707 ;; -------------------------------------------------------------------------
2709 ;; - DUP (Advanced SIMD)
2712 ;; - ST1 (Advanced SIMD)
2713 ;; - UMOV (Advanced SIMD)
2714 ;; -------------------------------------------------------------------------
2716 (define_expand "vec_extract<mode><Vel>"
2717 [(set (match_operand:<VEL> 0 "register_operand")
2719 (match_operand:SVE_FULL 1 "register_operand")
2720 (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
2724 if (poly_int_rtx_p (operands[2], &val)
2725 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
2727 /* The last element can be extracted with a LASTB and a false
2729 rtx sel = aarch64_pfalse_reg (<VPRED>mode);
2730 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2733 if (!CONST_INT_P (operands[2]))
2735 /* Create an index with operand[2] as the base and -1 as the step.
2736 It will then be zero for the element we care about. */
2737 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
2738 index = force_reg (<VEL_INT>mode, index);
2739 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
2740 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
2742 /* Get a predicate that is true for only that element. */
2743 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
2744 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
2745 rtx sel = gen_reg_rtx (<VPRED>mode);
2746 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
2748 /* Select the element using LASTB. */
2749 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2755 ;; Extract element zero. This is a special case because we want to force
2756 ;; the registers to be the same for the second alternative, and then
2757 ;; split the instruction into nothing after RA.
2758 (define_insn_and_split "*vec_extract<mode><Vel>_0"
2759 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2761 (match_operand:SVE_FULL 1 "register_operand" "w, 0, w")
2762 (parallel [(const_int 0)])))]
2765 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2766 switch (which_alternative)
2769 return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
2773 return "st1\\t{%1.<Vetype>}[0], %0";
2778 "&& reload_completed
2779 && REG_P (operands[0])
2780 && REGNO (operands[0]) == REGNO (operands[1])"
2783 emit_note (NOTE_INSN_DELETED);
2786 [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
2789 ;; Extract an element from the Advanced SIMD portion of the register.
2790 ;; We don't just reuse the aarch64-simd.md pattern because we don't
2791 ;; want any change in lane number on big-endian targets.
2792 (define_insn "*vec_extract<mode><Vel>_v128"
2793 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2795 (match_operand:SVE_FULL 1 "register_operand" "w, w, w")
2796 (parallel [(match_operand:SI 2 "const_int_operand")])))]
2798 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
2800 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2801 switch (which_alternative)
2804 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2806 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2808 return "st1\\t{%1.<Vetype>}[%2], %0";
2813 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
2816 ;; Extract an element in the range of DUP. This pattern allows the
2817 ;; source and destination to be different.
2818 (define_insn "*vec_extract<mode><Vel>_dup"
2819 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2821 (match_operand:SVE_FULL 1 "register_operand" "w")
2822 (parallel [(match_operand:SI 2 "const_int_operand")])))]
2824 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
2826 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2827 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
2831 ;; Extract an element outside the range of DUP. This pattern requires the
2832 ;; source and destination to be the same.
2833 (define_insn "*vec_extract<mode><Vel>_ext"
2834 [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w")
2836 (match_operand:SVE_FULL 1 "register_operand" "0, w")
2837 (parallel [(match_operand:SI 2 "const_int_operand")])))]
2838 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
2840 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2841 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
2842 return (which_alternative == 0
2843 ? "ext\t%0.b, %0.b, %0.b, #%2"
2844 : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2");
2846 [(set_attr "movprfx" "*,yes")]
2849 ;; -------------------------------------------------------------------------
2850 ;; ---- [INT,FP] Extract active element
2851 ;; -------------------------------------------------------------------------
2855 ;; -------------------------------------------------------------------------
2857 ;; Extract the last active element of operand 1 into operand 0.
2858 ;; If no elements are active, extract the last inactive element instead.
2859 (define_insn "@extract_<last_op>_<mode>"
2860 [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
2862 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2863 (match_operand:SVE_FULL 2 "register_operand" "w, w")]
2867 last<ab>\t%<vwcore>0, %1, %2.<Vetype>
2868 last<ab>\t%<Vetype>0, %1, %2.<Vetype>"
2871 ;; -------------------------------------------------------------------------
2872 ;; ---- [PRED] Extract index
2873 ;; -------------------------------------------------------------------------
2874 ;; The patterns in this section are synthetic.
2875 ;; -------------------------------------------------------------------------
2877 ;; Handle extractions from a predicate by converting to an integer vector
2878 ;; and extracting from there.
2879 (define_expand "vec_extract<vpred><Vel>"
2880 [(match_operand:<VEL> 0 "register_operand")
2881 (match_operand:<VPRED> 1 "register_operand")
2882 (match_operand:SI 2 "nonmemory_operand")
2883 ;; Dummy operand to which we can attach the iterator.
2884 (reg:SVE_FULL_I V0_REGNUM)]
2887 rtx tmp = gen_reg_rtx (<MODE>mode);
2888 emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
2889 CONST1_RTX (<MODE>mode),
2890 CONST0_RTX (<MODE>mode)));
2891 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
2896 ;; =========================================================================
2897 ;; == Unary arithmetic
2898 ;; =========================================================================
2900 ;; -------------------------------------------------------------------------
2901 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
2902 ;; -------------------------------------------------------------------------
2907 ;; - CNT (= popcount)
2910 ;; -------------------------------------------------------------------------
2912 ;; Unpredicated integer unary arithmetic.
2913 (define_expand "<optab><mode>2"
2914 [(set (match_operand:SVE_I 0 "register_operand")
2917 (SVE_INT_UNARY:SVE_I
2918 (match_operand:SVE_I 1 "register_operand"))]
2922 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2926 ;; Integer unary arithmetic predicated with a PTRUE.
2927 (define_insn "@aarch64_pred_<optab><mode>"
2928 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
2930 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2931 (SVE_INT_UNARY:SVE_I
2932 (match_operand:SVE_I 2 "register_operand" "0, w"))]
2936 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2937 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2938 [(set_attr "movprfx" "*,yes")]
2941 ;; Predicated integer unary arithmetic with merging.
2942 (define_expand "@cond_<optab><mode>"
2943 [(set (match_operand:SVE_I 0 "register_operand")
2945 [(match_operand:<VPRED> 1 "register_operand")
2946 (SVE_INT_UNARY:SVE_I
2947 (match_operand:SVE_I 2 "register_operand"))
2948 (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")]
2953 ;; Predicated integer unary arithmetic, merging with the first input.
2954 (define_insn "*cond_<optab><mode>_2"
2955 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
2957 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2958 (SVE_INT_UNARY:SVE_I
2959 (match_operand:SVE_I 2 "register_operand" "0, w"))
2964 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
2965 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2966 [(set_attr "movprfx" "*,yes")]
2969 ;; Predicated integer unary arithmetic, merging with an independent value.
2971 ;; The earlyclobber isn't needed for the first alternative, but omitting
2972 ;; it would only help the case in which operands 2 and 3 are the same,
2973 ;; which is handled above rather than here. Marking all the alternatives
2974 ;; as earlyclobber helps to make the instruction more regular to the
2975 ;; register allocator.
2976 (define_insn "*cond_<optab><mode>_any"
2977 [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w")
2979 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2980 (SVE_INT_UNARY:SVE_I
2981 (match_operand:SVE_I 2 "register_operand" "w, w, w"))
2982 (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2984 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
2986 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2987 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2988 movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2989 [(set_attr "movprfx" "*,yes,yes")]
2992 ;; -------------------------------------------------------------------------
2993 ;; ---- [INT] General unary arithmetic corresponding to unspecs
2994 ;; -------------------------------------------------------------------------
3000 ;; -------------------------------------------------------------------------
3002 ;; Predicated integer unary operations.
3003 (define_insn "@aarch64_pred_<optab><mode>"
3004 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3006 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3008 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")]
3011 "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
3013 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3014 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3015 [(set_attr "movprfx" "*,yes")]
3018 ;; Another way of expressing the REVB, REVH and REVW patterns, with this
3019 ;; form being easier for permutes. The predicate mode determines the number
3020 ;; of lanes and the data mode decides the granularity of the reversal within
3022 (define_insn "@aarch64_sve_revbhw_<SVE_ALL:mode><PRED_HSD:mode>"
3023 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w")
3025 [(match_operand:PRED_HSD 1 "register_operand" "Upl, Upl")
3027 [(match_operand:SVE_ALL 2 "register_operand" "0, w")]
3030 "TARGET_SVE && <PRED_HSD:elem_bits> > <SVE_ALL:container_bits>"
3032 rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>
3033 movprfx\t%0, %2\;rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>"
3034 [(set_attr "movprfx" "*,yes")]
3037 ;; Predicated integer unary operations with merging.
3038 (define_insn "@cond_<optab><mode>"
3039 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, ?&w")
3041 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3043 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")]
3045 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3047 "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
3049 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3050 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3051 movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3052 [(set_attr "movprfx" "*,yes,yes")]
3055 ;; -------------------------------------------------------------------------
3056 ;; ---- [INT] Sign and zero extension
3057 ;; -------------------------------------------------------------------------
3065 ;; -------------------------------------------------------------------------
3067 ;; Unpredicated sign and zero extension from a narrower mode.
3068 (define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
3069 [(set (match_operand:SVE_HSDI 0 "register_operand")
3072 (ANY_EXTEND:SVE_HSDI
3073 (match_operand:SVE_PARTIAL_I 1 "register_operand"))]
3075 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3077 operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode);
3081 ;; Predicated sign and zero extension from a narrower mode.
3082 (define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
3083 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w, ?&w")
3085 [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
3086 (ANY_EXTEND:SVE_HSDI
3087 (match_operand:SVE_PARTIAL_I 2 "register_operand" "0, w"))]
3089 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3091 <su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
3092 movprfx\t%0, %2\;<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>"
3093 [(set_attr "movprfx" "*,yes")]
3096 ;; Predicated truncate-and-sign-extend operations.
3097 (define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
3098 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
3099 (unspec:SVE_FULL_HSDI
3100 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
3101 (sign_extend:SVE_FULL_HSDI
3102 (truncate:SVE_PARTIAL_I
3103 (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")))]
3106 && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3108 sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3109 movprfx\t%0, %2\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
3110 [(set_attr "movprfx" "*,yes")]
3113 ;; Predicated truncate-and-sign-extend operations with merging.
3114 (define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
3115 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w")
3116 (unspec:SVE_FULL_HSDI
3117 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
3118 (sign_extend:SVE_FULL_HSDI
3119 (truncate:SVE_PARTIAL_I
3120 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")))
3121 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3124 && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3126 sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3127 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3128 movprfx\t%0, %3\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
3129 [(set_attr "movprfx" "*,yes,yes")]
3132 ;; Predicated truncate-and-zero-extend operations, merging with the
3135 ;; The canonical form of this operation is an AND of a constant rather
3136 ;; than (zero_extend (truncate ...)).
3137 (define_insn "*cond_uxt<mode>_2"
3138 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3140 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3142 (match_operand:SVE_I 2 "register_operand" "0, w")
3143 (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate"))
3148 uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype>
3149 movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3150 [(set_attr "movprfx" "*,yes")]
3153 ;; Predicated truncate-and-zero-extend operations, merging with an
3154 ;; independent value.
3156 ;; The earlyclobber isn't needed for the first alternative, but omitting
3157 ;; it would only help the case in which operands 2 and 4 are the same,
3158 ;; which is handled above rather than here. Marking all the alternatives
3159 ;; as early-clobber helps to make the instruction more regular to the
3160 ;; register allocator.
3161 (define_insn "*cond_uxt<mode>_any"
3162 [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w")
3164 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3166 (match_operand:SVE_I 2 "register_operand" "w, w, w")
3167 (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate"))
3168 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3170 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
3172 uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3173 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3174 movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3175 [(set_attr "movprfx" "*,yes,yes")]
3178 ;; -------------------------------------------------------------------------
3179 ;; ---- [INT] Truncation
3180 ;; -------------------------------------------------------------------------
3181 ;; The patterns in this section are synthetic.
3182 ;; -------------------------------------------------------------------------
3184 ;; Truncate to a partial SVE vector from either a full vector or a
3185 ;; wider partial vector. This is a no-op, because we can just ignore
3186 ;; the unused upper bits of the source.
3187 (define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
3188 [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w")
3189 (truncate:SVE_PARTIAL_I
3190 (match_operand:SVE_HSDI 1 "register_operand" "w")))]
3191 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3193 "&& reload_completed"
3194 [(set (match_dup 0) (match_dup 1))]
3196 operands[1] = aarch64_replace_reg_mode (operands[1],
3197 <SVE_PARTIAL_I:MODE>mode);
3201 ;; -------------------------------------------------------------------------
3202 ;; ---- [INT] Logical inverse
3203 ;; -------------------------------------------------------------------------
3206 ;; -------------------------------------------------------------------------
3208 ;; Predicated logical inverse.
3209 (define_expand "@aarch64_pred_cnot<mode>"
3210 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3213 [(match_operand:<VPRED> 1 "register_operand")
3214 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
3216 (match_operand:SVE_FULL_I 3 "register_operand")
3224 operands[4] = CONST0_RTX (<MODE>mode);
3225 operands[5] = CONST1_RTX (<MODE>mode);
3229 (define_insn "*cnot<mode>"
3230 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3233 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3234 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
3236 (match_operand:SVE_I 2 "register_operand" "0, w")
3237 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
3239 (match_operand:SVE_I 4 "aarch64_simd_imm_one")
3244 cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3245 movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3246 [(set_attr "movprfx" "*,yes")]
3249 ;; Predicated logical inverse with merging.
3250 (define_expand "@cond_cnot<mode>"
3251 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3253 [(match_operand:<VPRED> 1 "register_operand")
3257 (const_int SVE_KNOWN_PTRUE)
3259 (match_operand:SVE_FULL_I 2 "register_operand")
3265 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
3269 operands[4] = CONSTM1_RTX (<VPRED>mode);
3270 operands[5] = CONST0_RTX (<MODE>mode);
3271 operands[6] = CONST1_RTX (<MODE>mode);
3275 ;; Predicated logical inverse, merging with the first input.
3276 (define_insn_and_rewrite "*cond_cnot<mode>_2"
3277 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3279 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3280 ;; Logical inverse of operand 2 (as above).
3284 (const_int SVE_KNOWN_PTRUE)
3286 (match_operand:SVE_I 2 "register_operand" "0, w")
3287 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
3289 (match_operand:SVE_I 4 "aarch64_simd_imm_one")
3296 cnot\t%0.<Vetype>, %1/m, %0.<Vetype>
3297 movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3298 "&& !CONSTANT_P (operands[5])"
3300 operands[5] = CONSTM1_RTX (<VPRED>mode);
3302 [(set_attr "movprfx" "*,yes")]
3305 ;; Predicated logical inverse, merging with an independent value.
3307 ;; The earlyclobber isn't needed for the first alternative, but omitting
3308 ;; it would only help the case in which operands 2 and 6 are the same,
3309 ;; which is handled above rather than here. Marking all the alternatives
3310 ;; as earlyclobber helps to make the instruction more regular to the
3311 ;; register allocator.
3312 (define_insn_and_rewrite "*cond_cnot<mode>_any"
3313 [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w")
3315 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3316 ;; Logical inverse of operand 2 (as above).
3320 (const_int SVE_KNOWN_PTRUE)
3322 (match_operand:SVE_I 2 "register_operand" "w, w, w")
3323 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
3325 (match_operand:SVE_I 4 "aarch64_simd_imm_one")
3328 (match_operand:SVE_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3330 "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])"
3332 cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3333 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3334 movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3335 "&& !CONSTANT_P (operands[5])"
3337 operands[5] = CONSTM1_RTX (<VPRED>mode);
3339 [(set_attr "movprfx" "*,yes,yes")]
3342 ;; -------------------------------------------------------------------------
3343 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
3344 ;; -------------------------------------------------------------------------
3347 ;; -------------------------------------------------------------------------
3349 ;; Unpredicated unary operations that take an integer and return a float.
3350 (define_insn "@aarch64_sve_<optab><mode>"
3351 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3353 [(match_operand:<V_INT_EQUIV> 1 "register_operand" "w")]
3356 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3359 ;; -------------------------------------------------------------------------
3360 ;; ---- [FP] General unary arithmetic corresponding to unspecs
3361 ;; -------------------------------------------------------------------------
3376 ;; -------------------------------------------------------------------------
3378 ;; Unpredicated floating-point unary operations.
3379 (define_insn "@aarch64_sve_<optab><mode>"
3380 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3382 [(match_operand:SVE_FULL_F 1 "register_operand" "w")]
3385 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3388 ;; Unpredicated floating-point unary operations.
3389 (define_expand "<optab><mode>2"
3390 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3393 (const_int SVE_RELAXED_GP)
3394 (match_operand:SVE_FULL_F 1 "register_operand")]
3395 SVE_COND_FP_UNARY_OPTAB))]
3398 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3402 ;; Predicated floating-point unary operations.
3403 (define_insn "@aarch64_pred_<optab><mode>"
3404 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
3406 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3407 (match_operand:SI 3 "aarch64_sve_gp_strictness")
3408 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
3409 SVE_COND_FP_UNARY))]
3412 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3413 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3414 [(set_attr "movprfx" "*,yes")]
3417 ;; Predicated floating-point unary arithmetic with merging.
3418 (define_expand "@cond_<optab><mode>"
3419 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3421 [(match_operand:<VPRED> 1 "register_operand")
3424 (const_int SVE_STRICT_GP)
3425 (match_operand:SVE_FULL_F 2 "register_operand")]
3427 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3432 ;; Predicated floating-point unary arithmetic, merging with the first input.
3433 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
3434 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
3436 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3439 (const_int SVE_RELAXED_GP)
3440 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
3446 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3447 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3448 "&& !rtx_equal_p (operands[1], operands[3])"
3450 operands[3] = copy_rtx (operands[1]);
3452 [(set_attr "movprfx" "*,yes")]
3455 (define_insn "*cond_<optab><mode>_2_strict"
3456 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
3458 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3461 (const_int SVE_STRICT_GP)
3462 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
3468 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3469 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3470 [(set_attr "movprfx" "*,yes")]
3473 ;; Predicated floating-point unary arithmetic, merging with an independent
3476 ;; The earlyclobber isn't needed for the first alternative, but omitting
3477 ;; it would only help the case in which operands 2 and 3 are the same,
3478 ;; which is handled above rather than here. Marking all the alternatives
3479 ;; as earlyclobber helps to make the instruction more regular to the
3480 ;; register allocator.
3481 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
3482 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
3484 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3487 (const_int SVE_RELAXED_GP)
3488 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
3490 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3492 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
3494 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3495 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3496 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3497 "&& !rtx_equal_p (operands[1], operands[4])"
3499 operands[4] = copy_rtx (operands[1]);
3501 [(set_attr "movprfx" "*,yes,yes")]
3504 (define_insn "*cond_<optab><mode>_any_strict"
3505 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
3507 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3510 (const_int SVE_STRICT_GP)
3511 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
3513 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3515 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
3517 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3518 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3519 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3520 [(set_attr "movprfx" "*,yes,yes")]
3523 ;; -------------------------------------------------------------------------
3524 ;; ---- [FP] Square root
3525 ;; -------------------------------------------------------------------------
3527 (define_expand "sqrt<mode>2"
3528 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3531 (const_int SVE_RELAXED_GP)
3532 (match_operand:SVE_FULL_F 1 "register_operand")]
3533 UNSPEC_COND_FSQRT))]
3536 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
3538 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3541 ;; -------------------------------------------------------------------------
3542 ;; ---- [FP] Reciprocal square root
3543 ;; -------------------------------------------------------------------------
3545 (define_expand "rsqrt<mode>2"
3546 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3547 (unspec:SVE_FULL_SDF
3548 [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3552 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
3556 (define_expand "@aarch64_rsqrte<mode>"
3557 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3558 (unspec:SVE_FULL_SDF
3559 [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3564 (define_expand "@aarch64_rsqrts<mode>"
3565 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3566 (unspec:SVE_FULL_SDF
3567 [(match_operand:SVE_FULL_SDF 1 "register_operand")
3568 (match_operand:SVE_FULL_SDF 2 "register_operand")]
3573 ;; -------------------------------------------------------------------------
3574 ;; ---- [PRED] Inverse
3575 ;; -------------------------------------------------------------------------
3578 ;; -------------------------------------------------------------------------
3580 ;; Unpredicated predicate inverse.
3581 (define_expand "one_cmpl<mode>2"
3582 [(set (match_operand:PRED_ALL 0 "register_operand")
3584 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
3588 operands[2] = aarch64_ptrue_reg (<MODE>mode);
3592 ;; Predicated predicate inverse.
3593 (define_insn "*one_cmpl<mode>3"
3594 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3596 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
3597 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
3599 "not\t%0.b, %1/z, %2.b"
3602 ;; =========================================================================
3603 ;; == Binary arithmetic
3604 ;; =========================================================================
3606 ;; -------------------------------------------------------------------------
3607 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
3608 ;; -------------------------------------------------------------------------
3610 ;; - ADD (merging form only)
3611 ;; - AND (merging form only)
3612 ;; - ASR (merging form only)
3613 ;; - EOR (merging form only)
3614 ;; - LSL (merging form only)
3615 ;; - LSR (merging form only)
3617 ;; - ORR (merging form only)
3620 ;; - SQADD (SVE2 merging form only)
3621 ;; - SQSUB (SVE2 merging form only)
3622 ;; - SUB (merging form only)
3625 ;; - UQADD (SVE2 merging form only)
3626 ;; - UQSUB (SVE2 merging form only)
3627 ;; -------------------------------------------------------------------------
3629 ;; Unpredicated integer binary operations that have an immediate form.
3630 (define_expand "<optab><mode>3"
3631 [(set (match_operand:SVE_I 0 "register_operand")
3634 (SVE_INT_BINARY_IMM:SVE_I
3635 (match_operand:SVE_I 1 "register_operand")
3636 (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
3640 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3644 ;; Integer binary operations that have an immediate form, predicated
3645 ;; with a PTRUE. We don't actually need the predicate for the first
3646 ;; and third alternatives, but using Upa or X isn't likely to gain much
3647 ;; and would make the instruction seem less uniform to the register
3649 (define_insn_and_split "@aarch64_pred_<optab><mode>"
3650 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w, ?&w")
3652 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
3653 (SVE_INT_BINARY_IMM:SVE_I
3654 (match_operand:SVE_I 2 "register_operand" "%0, 0, w, w")
3655 (match_operand:SVE_I 3 "aarch64_sve_<sve_imm_con>_operand" "<sve_imm_con>, w, <sve_imm_con>, w"))]
3660 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3662 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3663 ; Split the unpredicated form after reload, so that we don't have
3664 ; the unnecessary PTRUE.
3665 "&& reload_completed
3666 && !register_operand (operands[3], <MODE>mode)"
3668 (SVE_INT_BINARY_IMM:SVE_I (match_dup 2) (match_dup 3)))]
3670 [(set_attr "movprfx" "*,*,yes,yes")]
3673 ;; Unpredicated binary operations with a constant (post-RA only).
3674 ;; These are generated by splitting a predicated instruction whose
3675 ;; predicate is unused.
3676 (define_insn "*post_ra_<optab><mode>3"
3677 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3678 (SVE_INT_BINARY_IMM:SVE_I
3679 (match_operand:SVE_I 1 "register_operand" "0, w")
3680 (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_immediate")))]
3681 "TARGET_SVE && reload_completed"
3683 <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2
3684 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2"
3685 [(set_attr "movprfx" "*,yes")]
3688 ;; Predicated integer operations with merging.
3689 (define_expand "@cond_<optab><mode>"
3690 [(set (match_operand:SVE_I 0 "register_operand")
3692 [(match_operand:<VPRED> 1 "register_operand")
3693 (SVE_INT_BINARY:SVE_I
3694 (match_operand:SVE_I 2 "register_operand")
3695 (match_operand:SVE_I 3 "<sve_pred_int_rhs2_operand>"))
3696 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
3701 ;; Predicated integer operations, merging with the first input.
3702 (define_insn "*cond_<optab><mode>_2"
3703 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3705 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3706 (SVE_INT_BINARY:SVE_I
3707 (match_operand:SVE_I 2 "register_operand" "0, w")
3708 (match_operand:SVE_I 3 "register_operand" "w, w"))
3713 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3714 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3715 [(set_attr "movprfx" "*,yes")]
3718 ;; Predicated integer operations, merging with the second input.
3719 (define_insn "*cond_<optab><mode>_3"
3720 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3722 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3723 (SVE_INT_BINARY:SVE_I
3724 (match_operand:SVE_I 2 "register_operand" "w, w")
3725 (match_operand:SVE_I 3 "register_operand" "0, w"))
3730 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3731 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
3732 [(set_attr "movprfx" "*,yes")]
3735 ;; Predicated integer operations, merging with an independent value.
3736 (define_insn_and_rewrite "*cond_<optab><mode>_any"
3737 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
3739 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
3740 (SVE_INT_BINARY:SVE_I
3741 (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
3742 (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))
3743 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
3746 && !rtx_equal_p (operands[2], operands[4])
3747 && !rtx_equal_p (operands[3], operands[4])"
3749 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3750 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3751 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3752 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3754 "&& reload_completed
3755 && register_operand (operands[4], <MODE>mode)
3756 && !rtx_equal_p (operands[0], operands[4])"
3758 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
3759 operands[4], operands[1]));
3760 operands[4] = operands[2] = operands[0];
3762 [(set_attr "movprfx" "yes")]
3765 ;; -------------------------------------------------------------------------
3766 ;; ---- [INT] Addition
3767 ;; -------------------------------------------------------------------------
3779 ;; -------------------------------------------------------------------------
3781 (define_insn "add<mode>3"
3782 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?w, ?w, w")
3784 (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w, w, w")
3785 (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))]
3788 add\t%0.<Vetype>, %0.<Vetype>, #%D2
3789 sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3790 * return aarch64_output_sve_vector_inc_dec (\"%0.<Vetype>\", operands[2]);
3791 movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
3792 movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3793 add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3794 [(set_attr "movprfx" "*,*,*,yes,yes,*")]
3797 ;; Merging forms are handled through SVE_INT_BINARY.
3799 ;; -------------------------------------------------------------------------
3800 ;; ---- [INT] Subtraction
3801 ;; -------------------------------------------------------------------------
3805 ;; -------------------------------------------------------------------------
3807 (define_insn "sub<mode>3"
3808 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
3810 (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa, vsa")
3811 (match_operand:SVE_I 2 "register_operand" "w, 0, w")))]
3814 sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
3815 subr\t%0.<Vetype>, %0.<Vetype>, #%D1
3816 movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
3817 [(set_attr "movprfx" "*,*,yes")]
3820 ;; Merging forms are handled through SVE_INT_BINARY.
3822 ;; -------------------------------------------------------------------------
3823 ;; ---- [INT] Take address
3824 ;; -------------------------------------------------------------------------
3827 ;; -------------------------------------------------------------------------
3829 ;; An unshifted and unscaled ADR. This is functionally equivalent to an ADD,
3830 ;; but the svadrb intrinsics should preserve the user's choice.
3831 (define_insn "@aarch64_adr<mode>"
3832 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
3833 (unspec:SVE_FULL_SDI
3834 [(match_operand:SVE_FULL_SDI 1 "register_operand" "w")
3835 (match_operand:SVE_FULL_SDI 2 "register_operand" "w")]
3838 "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>]"
3841 ;; Same, but with the offset being sign-extended from the low 32 bits.
3842 (define_insn_and_rewrite "*aarch64_adr_sxtw"
3843 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3845 [(match_operand:VNx2DI 1 "register_operand" "w")
3850 (match_operand:VNx2DI 2 "register_operand" "w")))]
3854 "adr\t%0.d, [%1.d, %2.d, sxtw]"
3855 "&& !CONSTANT_P (operands[3])"
3857 operands[3] = CONSTM1_RTX (VNx2BImode);
3861 ;; Same, but with the offset being zero-extended from the low 32 bits.
3862 (define_insn "*aarch64_adr_uxtw_unspec"
3863 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3865 [(match_operand:VNx2DI 1 "register_operand" "w")
3867 (match_operand:VNx2DI 2 "register_operand" "w")
3868 (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))]
3871 "adr\t%0.d, [%1.d, %2.d, uxtw]"
3874 ;; Same, matching as a PLUS rather than unspec.
3875 (define_insn "*aarch64_adr_uxtw_and"
3876 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3879 (match_operand:VNx2DI 2 "register_operand" "w")
3880 (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))
3881 (match_operand:VNx2DI 1 "register_operand" "w")))]
3883 "adr\t%0.d, [%1.d, %2.d, uxtw]"
3886 ;; ADR with a nonzero shift.
3887 (define_expand "@aarch64_adr<mode>_shift"
3888 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
3890 (unspec:SVE_FULL_SDI
3892 (ashift:SVE_FULL_SDI
3893 (match_operand:SVE_FULL_SDI 2 "register_operand")
3894 (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
3896 (match_operand:SVE_FULL_SDI 1 "register_operand")))]
3899 operands[4] = CONSTM1_RTX (<VPRED>mode);
3903 (define_insn_and_rewrite "*aarch64_adr<mode>_shift"
3904 [(set (match_operand:SVE_24I 0 "register_operand" "=w")
3909 (match_operand:SVE_24I 2 "register_operand" "w")
3910 (match_operand:SVE_24I 3 "const_1_to_3_operand"))]
3912 (match_operand:SVE_24I 1 "register_operand" "w")))]
3914 "adr\t%0.<Vctype>, [%1.<Vctype>, %2.<Vctype>, lsl %3]"
3915 "&& !CONSTANT_P (operands[4])"
3917 operands[4] = CONSTM1_RTX (<VPRED>mode);
3921 ;; Same, but with the index being sign-extended from the low 32 bits.
3922 (define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
3923 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3932 (match_operand:VNx2DI 2 "register_operand" "w")))]
3934 (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3936 (match_operand:VNx2DI 1 "register_operand" "w")))]
3938 "adr\t%0.d, [%1.d, %2.d, sxtw %3]"
3939 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
3941 operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
3945 ;; Same, but with the index being zero-extended from the low 32 bits.
3946 (define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
3947 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3953 (match_operand:VNx2DI 2 "register_operand" "w")
3954 (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
3955 (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3957 (match_operand:VNx2DI 1 "register_operand" "w")))]
3959 "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
3960 "&& !CONSTANT_P (operands[5])"
3962 operands[5] = CONSTM1_RTX (VNx2BImode);
3966 ;; -------------------------------------------------------------------------
3967 ;; ---- [INT] Absolute difference
3968 ;; -------------------------------------------------------------------------
3972 ;; -------------------------------------------------------------------------
3974 ;; Unpredicated integer absolute difference.
3975 (define_expand "<su>abd<mode>_3"
3976 [(use (match_operand:SVE_I 0 "register_operand"))
3978 (match_operand:SVE_I 1 "register_operand")
3979 (match_operand:SVE_I 2 "register_operand"))]
3982 rtx pred = aarch64_ptrue_reg (<VPRED>mode);
3983 emit_insn (gen_aarch64_pred_<su>abd<mode> (operands[0], pred, operands[1],
3989 ;; Predicated integer absolute difference.
3990 (define_insn "@aarch64_pred_<su>abd<mode>"
3991 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3994 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3996 (match_operand:SVE_I 2 "register_operand" "%0, w")
3997 (match_operand:SVE_I 3 "register_operand" "w, w"))]
4007 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4008 movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4009 [(set_attr "movprfx" "*,yes")]
4012 (define_expand "@aarch64_cond_<su>abd<mode>"
4013 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4015 [(match_operand:<VPRED> 1 "register_operand")
4020 (match_operand:SVE_FULL_I 2 "register_operand")
4021 (match_operand:SVE_FULL_I 3 "register_operand"))]
4025 (<max_opp>:SVE_FULL_I
4029 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4033 if (rtx_equal_p (operands[3], operands[4]))
4034 std::swap (operands[2], operands[3]);
4037 ;; Predicated integer absolute difference, merging with the first input.
4038 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2"
4039 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4041 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4046 (match_operand:SVE_I 2 "register_operand" "0, w")
4047 (match_operand:SVE_I 3 "register_operand" "w, w"))]
4059 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4060 movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4061 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
4063 operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
4065 [(set_attr "movprfx" "*,yes")]
4068 ;; Predicated integer absolute difference, merging with the second input.
4069 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_3"
4070 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4072 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4077 (match_operand:SVE_I 2 "register_operand" "w, w")
4078 (match_operand:SVE_I 3 "register_operand" "0, w"))]
4090 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4091 movprfx\t%0, %3\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
4092 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
4094 operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
4096 [(set_attr "movprfx" "*,yes")]
4099 ;; Predicated integer absolute difference, merging with an independent value.
4100 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any"
4101 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4103 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
4108 (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
4109 (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))]
4117 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
4120 && !rtx_equal_p (operands[2], operands[4])
4121 && !rtx_equal_p (operands[3], operands[4])"
4123 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4124 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4125 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4126 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4130 if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6]))
4131 operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode);
4132 else if (reload_completed
4133 && register_operand (operands[4], <MODE>mode)
4134 && !rtx_equal_p (operands[0], operands[4]))
4136 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4137 operands[4], operands[1]));
4138 operands[4] = operands[2] = operands[0];
4143 [(set_attr "movprfx" "yes")]
4146 ;; -------------------------------------------------------------------------
4147 ;; ---- [INT] Saturating addition and subtraction
4148 ;; -------------------------------------------------------------------------
4153 ;; -------------------------------------------------------------------------
4155 ;; Unpredicated saturating signed addition and subtraction.
4156 (define_insn "@aarch64_sve_<optab><mode>"
4157 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w, w")
4158 (SBINQOPS:SVE_FULL_I
4159 (match_operand:SVE_FULL_I 1 "register_operand" "0, 0, w, w, w")
4160 (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand" "vsQ, vsS, vsQ, vsS, w")))]
4163 <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4164 <binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
4165 movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4166 movprfx\t%0, %1\;<binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
4167 <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4168 [(set_attr "movprfx" "*,*,yes,yes,*")]
4171 ;; Unpredicated saturating unsigned addition and subtraction.
4172 (define_insn "@aarch64_sve_<optab><mode>"
4173 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, w")
4174 (UBINQOPS:SVE_FULL_I
4175 (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w")
4176 (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand" "vsa, vsa, w")))]
4179 <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4180 movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4181 <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4182 [(set_attr "movprfx" "*,yes,*")]
4185 ;; -------------------------------------------------------------------------
4186 ;; ---- [INT] Highpart multiplication
4187 ;; -------------------------------------------------------------------------
4191 ;; -------------------------------------------------------------------------
4193 ;; Unpredicated highpart multiplication.
4194 (define_expand "<su>mul<mode>3_highpart"
4195 [(set (match_operand:SVE_I 0 "register_operand")
4199 [(match_operand:SVE_I 1 "register_operand")
4200 (match_operand:SVE_I 2 "register_operand")]
4205 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4209 ;; Predicated highpart multiplication.
4210 (define_insn "@aarch64_pred_<optab><mode>"
4211 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4213 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4215 [(match_operand:SVE_I 2 "register_operand" "%0, w")
4216 (match_operand:SVE_I 3 "register_operand" "w, w")]
4221 <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4222 movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4223 [(set_attr "movprfx" "*,yes")]
4226 ;; Predicated highpart multiplications with merging.
4227 (define_expand "@cond_<optab><mode>"
4228 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4230 [(match_operand:<VPRED> 1 "register_operand")
4232 [(match_operand:SVE_FULL_I 2 "register_operand")
4233 (match_operand:SVE_FULL_I 3 "register_operand")]
4235 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4239 /* Only target code is aware of these operations, so we don't need
4240 to handle the fully-general case. */
4241 gcc_assert (rtx_equal_p (operands[2], operands[4])
4242 || CONSTANT_P (operands[4]));
4245 ;; Predicated highpart multiplications, merging with the first input.
4246 (define_insn "*cond_<optab><mode>_2"
4247 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4249 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4251 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4252 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
4258 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4259 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4260 [(set_attr "movprfx" "*,yes")])
4262 ;; Predicated highpart multiplications, merging with zero.
4263 (define_insn "*cond_<optab><mode>_z"
4264 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w")
4266 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4268 [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
4269 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
4271 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
4275 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4276 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4277 [(set_attr "movprfx" "yes")])
4279 ;; -------------------------------------------------------------------------
4280 ;; ---- [INT] Division
4281 ;; -------------------------------------------------------------------------
4287 ;; -------------------------------------------------------------------------
4289 ;; Unpredicated integer division.
4290 (define_expand "<optab><mode>3"
4291 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4292 (unspec:SVE_FULL_SDI
4294 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4295 (match_operand:SVE_FULL_SDI 1 "register_operand")
4296 (match_operand:SVE_FULL_SDI 2 "register_operand"))]
4300 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4304 ;; Integer division predicated with a PTRUE.
4305 (define_insn "@aarch64_pred_<optab><mode>"
4306 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w, ?&w")
4307 (unspec:SVE_FULL_SDI
4308 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4309 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4310 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w")
4311 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w"))]
4315 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4316 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4317 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4318 [(set_attr "movprfx" "*,*,yes")]
4321 ;; Predicated integer division with merging.
4322 (define_expand "@cond_<optab><mode>"
4323 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4324 (unspec:SVE_FULL_SDI
4325 [(match_operand:<VPRED> 1 "register_operand")
4326 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4327 (match_operand:SVE_FULL_SDI 2 "register_operand")
4328 (match_operand:SVE_FULL_SDI 3 "register_operand"))
4329 (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
4334 ;; Predicated integer division, merging with the first input.
4335 (define_insn "*cond_<optab><mode>_2"
4336 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4337 (unspec:SVE_FULL_SDI
4338 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4339 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4340 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w")
4341 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))
4346 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4347 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4348 [(set_attr "movprfx" "*,yes")]
4351 ;; Predicated integer division, merging with the second input.
4352 (define_insn "*cond_<optab><mode>_3"
4353 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4354 (unspec:SVE_FULL_SDI
4355 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4356 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4357 (match_operand:SVE_FULL_SDI 2 "register_operand" "w, w")
4358 (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w"))
4363 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4364 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
4365 [(set_attr "movprfx" "*,yes")]
4368 ;; Predicated integer division, merging with an independent value.
4369 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4370 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4371 (unspec:SVE_FULL_SDI
4372 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
4373 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4374 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w, w, w")
4375 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w, w, w"))
4376 (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
4379 && !rtx_equal_p (operands[2], operands[4])
4380 && !rtx_equal_p (operands[3], operands[4])"
4382 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4383 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4384 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4385 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4387 "&& reload_completed
4388 && register_operand (operands[4], <MODE>mode)
4389 && !rtx_equal_p (operands[0], operands[4])"
4391 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4392 operands[4], operands[1]));
4393 operands[4] = operands[2] = operands[0];
4395 [(set_attr "movprfx" "yes")]
4398 ;; -------------------------------------------------------------------------
4399 ;; ---- [INT] Binary logical operations
4400 ;; -------------------------------------------------------------------------
4405 ;; -------------------------------------------------------------------------
4407 ;; Unpredicated integer binary logical operations.
4408 (define_insn "<optab><mode>3"
4409 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?w, w")
4411 (match_operand:SVE_I 1 "register_operand" "%0, w, w")
4412 (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))]
4415 <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4416 movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4417 <logical>\t%0.d, %1.d, %2.d"
4418 [(set_attr "movprfx" "*,yes,*")]
4421 ;; Merging forms are handled through SVE_INT_BINARY.
4423 ;; -------------------------------------------------------------------------
4424 ;; ---- [INT] Binary logical operations (inverted second input)
4425 ;; -------------------------------------------------------------------------
4428 ;; -------------------------------------------------------------------------
4430 ;; Unpredicated BIC.
4431 (define_expand "@aarch64_bic<mode>"
4432 [(set (match_operand:SVE_I 0 "register_operand")
4436 (not:SVE_I (match_operand:SVE_I 2 "register_operand"))]
4438 (match_operand:SVE_I 1 "register_operand")))]
4441 operands[3] = CONSTM1_RTX (<VPRED>mode);
4446 (define_insn_and_rewrite "*bic<mode>3"
4447 [(set (match_operand:SVE_I 0 "register_operand" "=w")
4452 (match_operand:SVE_I 2 "register_operand" "w"))]
4454 (match_operand:SVE_I 1 "register_operand" "w")))]
4456 "bic\t%0.d, %1.d, %2.d"
4457 "&& !CONSTANT_P (operands[3])"
4459 operands[3] = CONSTM1_RTX (<VPRED>mode);
4463 ;; Predicated BIC with merging.
4464 (define_expand "@cond_bic<mode>"
4465 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4467 [(match_operand:<VPRED> 1 "register_operand")
4469 (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand"))
4470 (match_operand:SVE_FULL_I 2 "register_operand"))
4471 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4476 ;; Predicated integer BIC, merging with the first input.
4477 (define_insn "*cond_bic<mode>_2"
4478 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4480 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4483 (match_operand:SVE_I 3 "register_operand" "w, w"))
4484 (match_operand:SVE_I 2 "register_operand" "0, w"))
4489 bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4490 movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4491 [(set_attr "movprfx" "*,yes")]
4494 ;; Predicated integer BIC, merging with an independent value.
4495 (define_insn_and_rewrite "*cond_bic<mode>_any"
4496 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, ?&w")
4498 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4501 (match_operand:SVE_I 3 "register_operand" "w, w, w, w"))
4502 (match_operand:SVE_I 2 "register_operand" "0, w, w, w"))
4503 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4505 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4507 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4508 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4509 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4511 "&& reload_completed
4512 && register_operand (operands[4], <MODE>mode)
4513 && !rtx_equal_p (operands[0], operands[4])"
4515 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4516 operands[4], operands[1]));
4517 operands[4] = operands[2] = operands[0];
4519 [(set_attr "movprfx" "yes")]
4522 ;; -------------------------------------------------------------------------
4523 ;; ---- [INT] Shifts (rounding towards -Inf)
4524 ;; -------------------------------------------------------------------------
4532 ;; -------------------------------------------------------------------------
4534 ;; Unpredicated shift by a scalar, which expands into one of the vector
4536 (define_expand "<ASHIFT:optab><mode>3"
4537 [(set (match_operand:SVE_I 0 "register_operand")
4539 (match_operand:SVE_I 1 "register_operand")
4540 (match_operand:<VEL> 2 "general_operand")))]
4544 if (CONST_INT_P (operands[2]))
4546 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
4547 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
4548 amount = force_reg (<MODE>mode, amount);
4552 amount = convert_to_mode (<VEL>mode, operands[2], 0);
4553 amount = expand_vector_broadcast (<MODE>mode, amount);
4555 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
4560 ;; Unpredicated shift by a vector.
4561 (define_expand "v<optab><mode>3"
4562 [(set (match_operand:SVE_I 0 "register_operand")
4566 (match_operand:SVE_I 1 "register_operand")
4567 (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
4571 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4575 ;; Shift by a vector, predicated with a PTRUE. We don't actually need
4576 ;; the predicate for the first alternative, but using Upa or X isn't
4577 ;; likely to gain much and would make the instruction seem less uniform
4578 ;; to the register allocator.
4579 (define_insn_and_split "@aarch64_pred_<optab><mode>"
4580 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w")
4582 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4584 (match_operand:SVE_I 2 "register_operand" "w, 0, w, w")
4585 (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))]
4590 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4591 <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype>
4592 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4593 "&& reload_completed
4594 && !register_operand (operands[3], <MODE>mode)"
4595 [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
4597 [(set_attr "movprfx" "*,*,*,yes")]
4600 ;; Unpredicated shift operations by a constant (post-RA only).
4601 ;; These are generated by splitting a predicated instruction whose
4602 ;; predicate is unused.
4603 (define_insn "*post_ra_v<optab><mode>3"
4604 [(set (match_operand:SVE_I 0 "register_operand" "=w")
4606 (match_operand:SVE_I 1 "register_operand" "w")
4607 (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))]
4608 "TARGET_SVE && reload_completed"
4609 "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
4612 ;; Predicated integer shift, merging with the first input.
4613 (define_insn "*cond_<optab><mode>_2_const"
4614 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4616 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4618 (match_operand:SVE_I 2 "register_operand" "0, w")
4619 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
4624 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4625 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4626 [(set_attr "movprfx" "*,yes")]
4629 ;; Predicated integer shift, merging with an independent value.
4630 (define_insn_and_rewrite "*cond_<optab><mode>_any_const"
4631 [(set (match_operand:SVE_I 0 "register_operand" "=w, &w, ?&w")
4633 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4635 (match_operand:SVE_I 2 "register_operand" "w, w, w")
4636 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
4637 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
4639 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4641 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4642 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4644 "&& reload_completed
4645 && register_operand (operands[4], <MODE>mode)
4646 && !rtx_equal_p (operands[0], operands[4])"
4648 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4649 operands[4], operands[1]));
4650 operands[4] = operands[2] = operands[0];
4652 [(set_attr "movprfx" "yes")]
4655 ;; Unpredicated shifts of narrow elements by 64-bit amounts.
4656 (define_insn "@aarch64_sve_<sve_int_op><mode>"
4657 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
4658 (unspec:SVE_FULL_BHSI
4659 [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")
4660 (match_operand:VNx2DI 2 "register_operand" "w")]
4663 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.d"
4666 ;; Merging predicated shifts of narrow elements by 64-bit amounts.
4667 (define_expand "@cond_<sve_int_op><mode>"
4668 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
4669 (unspec:SVE_FULL_BHSI
4670 [(match_operand:<VPRED> 1 "register_operand")
4671 (unspec:SVE_FULL_BHSI
4672 [(match_operand:SVE_FULL_BHSI 2 "register_operand")
4673 (match_operand:VNx2DI 3 "register_operand")]
4675 (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")]
4680 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with
4682 (define_insn "*cond_<sve_int_op><mode>_m"
4683 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w, ?&w")
4684 (unspec:SVE_FULL_BHSI
4685 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4686 (unspec:SVE_FULL_BHSI
4687 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
4688 (match_operand:VNx2DI 3 "register_operand" "w, w")]
4694 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4695 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4696 [(set_attr "movprfx" "*, yes")])
4698 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero.
4699 (define_insn "*cond_<sve_int_op><mode>_z"
4700 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=&w, &w")
4701 (unspec:SVE_FULL_BHSI
4702 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4703 (unspec:SVE_FULL_BHSI
4704 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
4705 (match_operand:VNx2DI 3 "register_operand" "w, w")]
4707 (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")]
4711 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4712 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4713 [(set_attr "movprfx" "yes")])
4715 ;; -------------------------------------------------------------------------
4716 ;; ---- [INT] Shifts (rounding towards 0)
4717 ;; -------------------------------------------------------------------------
4723 ;; -------------------------------------------------------------------------
4725 ;; Unpredicated ASRD.
4726 (define_expand "sdiv_pow2<mode>3"
4727 [(set (match_operand:SVE_I 0 "register_operand")
4731 [(match_operand:SVE_I 1 "register_operand")
4732 (match_operand 2 "aarch64_simd_rshift_imm")]
4737 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4742 (define_insn "*sdiv_pow2<mode>3"
4743 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4745 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4747 [(match_operand:SVE_I 2 "register_operand" "0, w")
4748 (match_operand:SVE_I 3 "aarch64_simd_rshift_imm")]
4753 asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4754 movprfx\t%0, %2\;asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4755 [(set_attr "movprfx" "*,yes")])
4757 ;; Predicated shift with merging.
4758 (define_expand "@cond_<sve_int_op><mode>"
4759 [(set (match_operand:SVE_I 0 "register_operand")
4761 [(match_operand:<VPRED> 1 "register_operand")
4765 [(match_operand:SVE_I 2 "register_operand")
4766 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
4769 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
4773 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
4777 ;; Predicated shift, merging with the first input.
4778 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
4779 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4781 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4785 [(match_operand:SVE_I 2 "register_operand" "0, w")
4786 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
4793 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4794 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4795 "&& !CONSTANT_P (operands[4])"
4797 operands[4] = CONSTM1_RTX (<VPRED>mode);
4799 [(set_attr "movprfx" "*,yes")])
4801 ;; Predicated shift, merging with an independent value.
4802 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
4803 [(set (match_operand:SVE_I 0 "register_operand" "=w, &w, ?&w")
4805 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4809 [(match_operand:SVE_I 2 "register_operand" "w, w, w")
4810 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
4813 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
4815 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4817 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4818 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4820 "&& reload_completed
4821 && register_operand (operands[4], <MODE>mode)
4822 && !rtx_equal_p (operands[0], operands[4])"
4824 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4825 operands[4], operands[1]));
4826 operands[4] = operands[2] = operands[0];
4828 [(set_attr "movprfx" "yes")]
4831 ;; -------------------------------------------------------------------------
4832 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
4833 ;; -------------------------------------------------------------------------
4838 ;; -------------------------------------------------------------------------
4840 ;; Unpredicated floating-point binary operations that take an integer as
4841 ;; their second operand.
4842 (define_insn "@aarch64_sve_<optab><mode>"
4843 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4845 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
4846 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
4847 SVE_FP_BINARY_INT))]
4849 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4852 ;; Predicated floating-point binary operations that take an integer
4853 ;; as their second operand.
4854 (define_insn "@aarch64_pred_<optab><mode>"
4855 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4857 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4858 (match_operand:SI 4 "aarch64_sve_gp_strictness")
4859 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4860 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4861 SVE_COND_FP_BINARY_INT))]
4864 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4865 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4866 [(set_attr "movprfx" "*,yes")]
4869 ;; Predicated floating-point binary operations with merging, taking an
4870 ;; integer as their second operand.
4871 (define_expand "@cond_<optab><mode>"
4872 [(set (match_operand:SVE_FULL_F 0 "register_operand")
4874 [(match_operand:<VPRED> 1 "register_operand")
4877 (const_int SVE_STRICT_GP)
4878 (match_operand:SVE_FULL_F 2 "register_operand")
4879 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
4880 SVE_COND_FP_BINARY_INT)
4881 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
4886 ;; Predicated floating-point binary operations that take an integer as their
4887 ;; second operand, with inactive lanes coming from the first operand.
4888 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
4889 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4891 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4894 (const_int SVE_RELAXED_GP)
4895 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4896 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4897 SVE_COND_FP_BINARY_INT)
4902 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4903 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4904 "&& !rtx_equal_p (operands[1], operands[4])"
4906 operands[4] = copy_rtx (operands[1]);
4908 [(set_attr "movprfx" "*,yes")]
4911 (define_insn "*cond_<optab><mode>_2_strict"
4912 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4914 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4917 (const_int SVE_STRICT_GP)
4918 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4919 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4920 SVE_COND_FP_BINARY_INT)
4925 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4926 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4927 [(set_attr "movprfx" "*,yes")]
4930 ;; Predicated floating-point binary operations that take an integer as
4931 ;; their second operand, with the values of inactive lanes being distinct
4932 ;; from the other inputs.
4933 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
4934 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
4936 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4939 (const_int SVE_RELAXED_GP)
4940 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
4941 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
4942 SVE_COND_FP_BINARY_INT)
4943 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4945 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4947 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4948 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4949 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4953 if (reload_completed
4954 && register_operand (operands[4], <MODE>mode)
4955 && !rtx_equal_p (operands[0], operands[4]))
4957 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4958 operands[4], operands[1]));
4959 operands[4] = operands[2] = operands[0];
4961 else if (!rtx_equal_p (operands[1], operands[5]))
4962 operands[5] = copy_rtx (operands[1]);
4966 [(set_attr "movprfx" "yes")]
4969 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
4970 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
4972 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4975 (const_int SVE_STRICT_GP)
4976 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
4977 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
4978 SVE_COND_FP_BINARY_INT)
4979 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4981 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4983 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4984 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4985 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4987 "&& reload_completed
4988 && register_operand (operands[4], <MODE>mode)
4989 && !rtx_equal_p (operands[0], operands[4])"
4991 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4992 operands[4], operands[1]));
4993 operands[4] = operands[2] = operands[0];
4995 [(set_attr "movprfx" "yes")]
4998 ;; -------------------------------------------------------------------------
4999 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
5000 ;; -------------------------------------------------------------------------
5001 ;; Includes post-RA forms of:
5005 ;; -------------------------------------------------------------------------
5007 ;; Unpredicated floating-point binary operations (post-RA only).
5008 ;; These are generated by splitting a predicated instruction whose
5009 ;; predicate is unused.
5010 (define_insn "*post_ra_<sve_fp_op><mode>3"
5011 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5012 (SVE_UNPRED_FP_BINARY:SVE_FULL_F
5013 (match_operand:SVE_FULL_F 1 "register_operand" "w")
5014 (match_operand:SVE_FULL_F 2 "register_operand" "w")))]
5015 "TARGET_SVE && reload_completed"
5016 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
5018 ;; -------------------------------------------------------------------------
5019 ;; ---- [FP] General binary arithmetic corresponding to unspecs
5020 ;; -------------------------------------------------------------------------
5021 ;; Includes merging forms of:
5022 ;; - FADD (constant forms handled in the "Addition" section)
5026 ;; - FMAXNM (including #0.0 and #1.0)
5028 ;; - FMINNM (including #0.0 and #1.0)
5029 ;; - FMUL (including #0.5 and #2.0)
5033 ;; - FSUB (constant forms handled in the "Addition" section)
5034 ;; - FSUBR (constant forms handled in the "Subtraction" section)
5035 ;; -------------------------------------------------------------------------
5037 ;; Unpredicated floating-point binary operations.
5038 (define_insn "@aarch64_sve_<optab><mode>"
5039 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5041 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
5042 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
5045 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
5048 ;; Unpredicated floating-point binary operations that need to be predicated
5050 (define_expand "<optab><mode>3"
5051 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5054 (const_int SVE_RELAXED_GP)
5055 (match_operand:SVE_FULL_F 1 "<sve_pred_fp_rhs1_operand>")
5056 (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs2_operand>")]
5057 SVE_COND_FP_BINARY_OPTAB))]
5060 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5064 ;; Predicated floating-point binary operations that have no immediate forms.
5065 (define_insn "@aarch64_pred_<optab><mode>"
5066 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
5068 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5069 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5070 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w")
5071 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w")]
5072 SVE_COND_FP_BINARY_REG))]
5075 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5076 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5077 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5078 [(set_attr "movprfx" "*,*,yes")]
5081 ;; Predicated floating-point operations with merging.
5082 (define_expand "@cond_<optab><mode>"
5083 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5085 [(match_operand:<VPRED> 1 "register_operand")
5088 (const_int SVE_STRICT_GP)
5089 (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs1_operand>")
5090 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_operand>")]
5092 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5097 ;; Predicated floating-point operations, merging with the first input.
5098 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
5099 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5101 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5104 (const_int SVE_RELAXED_GP)
5105 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5106 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5112 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5113 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5114 "&& !rtx_equal_p (operands[1], operands[4])"
5116 operands[4] = copy_rtx (operands[1]);
5118 [(set_attr "movprfx" "*,yes")]
5121 (define_insn "*cond_<optab><mode>_2_strict"
5122 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5124 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5127 (const_int SVE_STRICT_GP)
5128 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5129 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5135 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5136 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5137 [(set_attr "movprfx" "*,yes")]
5140 ;; Same for operations that take a 1-bit constant.
5141 (define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed"
5142 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5144 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5147 (const_int SVE_RELAXED_GP)
5148 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5149 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5150 SVE_COND_FP_BINARY_I1)
5155 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5156 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
5157 "&& !rtx_equal_p (operands[1], operands[4])"
5159 operands[4] = copy_rtx (operands[1]);
5161 [(set_attr "movprfx" "*,yes")]
5164 (define_insn "*cond_<optab><mode>_2_const_strict"
5165 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5167 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5170 (const_int SVE_STRICT_GP)
5171 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5172 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5173 SVE_COND_FP_BINARY_I1)
5178 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5179 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
5180 [(set_attr "movprfx" "*,yes")]
5183 ;; Predicated floating-point operations, merging with the second input.
5184 (define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed"
5185 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5187 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5190 (const_int SVE_RELAXED_GP)
5191 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
5192 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5198 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5199 movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
5200 "&& !rtx_equal_p (operands[1], operands[4])"
5202 operands[4] = copy_rtx (operands[1]);
5204 [(set_attr "movprfx" "*,yes")]
5207 (define_insn "*cond_<optab><mode>_3_strict"
5208 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5210 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5213 (const_int SVE_STRICT_GP)
5214 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
5215 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5221 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5222 movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
5223 [(set_attr "movprfx" "*,yes")]
5226 ;; Predicated floating-point operations, merging with an independent value.
5227 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
5228 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
5230 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5233 (const_int SVE_RELAXED_GP)
5234 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
5235 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
5237 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
5240 && !rtx_equal_p (operands[2], operands[4])
5241 && !rtx_equal_p (operands[3], operands[4])"
5243 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5244 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5245 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5246 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5250 if (reload_completed
5251 && register_operand (operands[4], <MODE>mode)
5252 && !rtx_equal_p (operands[0], operands[4]))
5254 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5255 operands[4], operands[1]));
5256 operands[4] = operands[2] = operands[0];
5258 else if (!rtx_equal_p (operands[1], operands[5]))
5259 operands[5] = copy_rtx (operands[1]);
5263 [(set_attr "movprfx" "yes")]
5266 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
5267 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
5269 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5272 (const_int SVE_STRICT_GP)
5273 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
5274 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
5276 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
5279 && !rtx_equal_p (operands[2], operands[4])
5280 && !rtx_equal_p (operands[3], operands[4])"
5282 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5283 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5284 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5285 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5287 "&& reload_completed
5288 && register_operand (operands[4], <MODE>mode)
5289 && !rtx_equal_p (operands[0], operands[4])"
5291 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5292 operands[4], operands[1]));
5293 operands[4] = operands[2] = operands[0];
5295 [(set_attr "movprfx" "yes")]
5298 ;; Same for operations that take a 1-bit constant.
5299 (define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed"
5300 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5302 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5305 (const_int SVE_RELAXED_GP)
5306 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
5307 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5308 SVE_COND_FP_BINARY_I1)
5309 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5311 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5313 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5314 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5318 if (reload_completed
5319 && register_operand (operands[4], <MODE>mode)
5320 && !rtx_equal_p (operands[0], operands[4]))
5322 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5323 operands[4], operands[1]));
5324 operands[4] = operands[2] = operands[0];
5326 else if (!rtx_equal_p (operands[1], operands[5]))
5327 operands[5] = copy_rtx (operands[1]);
5331 [(set_attr "movprfx" "yes")]
5334 (define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict"
5335 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5337 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5340 (const_int SVE_STRICT_GP)
5341 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
5342 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5343 SVE_COND_FP_BINARY_I1)
5344 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5346 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5348 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5349 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5351 "&& reload_completed
5352 && register_operand (operands[4], <MODE>mode)
5353 && !rtx_equal_p (operands[0], operands[4])"
5355 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5356 operands[4], operands[1]));
5357 operands[4] = operands[2] = operands[0];
5359 [(set_attr "movprfx" "yes")]
5362 ;; -------------------------------------------------------------------------
5363 ;; ---- [FP] Addition
5364 ;; -------------------------------------------------------------------------
5368 ;; -------------------------------------------------------------------------
5370 ;; Predicated floating-point addition.
5371 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5372 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w, ?&w")
5374 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl")
5375 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z, Ui1, i, i, Ui1")
5376 (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, 0, w, w, w")
5377 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w, w, vsA, vsN, w")]
5381 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5382 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5384 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5385 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5386 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5387 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5388 ; Split the unpredicated form after reload, so that we don't have
5389 ; the unnecessary PTRUE.
5390 "&& reload_completed
5391 && register_operand (operands[3], <MODE>mode)
5392 && INTVAL (operands[4]) == SVE_RELAXED_GP"
5393 [(set (match_dup 0) (plus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5395 [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
5398 ;; Predicated floating-point addition of a constant, merging with the
5400 (define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed"
5401 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
5403 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5406 (const_int SVE_RELAXED_GP)
5407 (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
5408 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
5414 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5415 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5416 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5417 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
5418 "&& !rtx_equal_p (operands[1], operands[4])"
5420 operands[4] = copy_rtx (operands[1]);
5422 [(set_attr "movprfx" "*,*,yes,yes")]
5425 (define_insn "*cond_add<mode>_2_const_strict"
5426 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
5428 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5431 (const_int SVE_STRICT_GP)
5432 (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
5433 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
5439 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5440 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5441 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5442 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
5443 [(set_attr "movprfx" "*,*,yes,yes")]
5446 ;; Predicated floating-point addition of a constant, merging with an
5447 ;; independent value.
5448 (define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed"
5449 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
5451 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5454 (const_int SVE_RELAXED_GP)
5455 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
5456 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
5458 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
5460 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5462 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5463 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5464 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5465 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5470 if (reload_completed
5471 && register_operand (operands[4], <MODE>mode)
5472 && !rtx_equal_p (operands[0], operands[4]))
5474 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5475 operands[4], operands[1]));
5476 operands[4] = operands[2] = operands[0];
5478 else if (!rtx_equal_p (operands[1], operands[5]))
5479 operands[5] = copy_rtx (operands[1]);
5483 [(set_attr "movprfx" "yes")]
5486 (define_insn_and_rewrite "*cond_add<mode>_any_const_strict"
5487 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
5489 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5492 (const_int SVE_STRICT_GP)
5493 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
5494 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
5496 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
5498 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5500 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5501 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5502 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5503 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5506 "&& reload_completed
5507 && register_operand (operands[4], <MODE>mode)
5508 && !rtx_equal_p (operands[0], operands[4])"
5510 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5511 operands[4], operands[1]));
5512 operands[4] = operands[2] = operands[0];
5514 [(set_attr "movprfx" "yes")]
5517 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
5519 ;; -------------------------------------------------------------------------
5520 ;; ---- [FP] Complex addition
5521 ;; -------------------------------------------------------------------------
5524 ;; -------------------------------------------------------------------------
5526 ;; Predicated FCADD.
5527 (define_insn "@aarch64_pred_<optab><mode>"
5528 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5530 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5531 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5532 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5533 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5537 fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5538 movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5539 [(set_attr "movprfx" "*,yes")]
5542 ;; Predicated FCADD with merging.
5543 (define_expand "@cond_<optab><mode>"
5544 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5546 [(match_operand:<VPRED> 1 "register_operand")
5549 (const_int SVE_STRICT_GP)
5550 (match_operand:SVE_FULL_F 2 "register_operand")
5551 (match_operand:SVE_FULL_F 3 "register_operand")]
5553 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5558 ;; Predicated FCADD using ptrue for unpredicated optab for auto-vectorizer
5559 (define_expand "@cadd<rot><mode>3"
5560 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5563 (const_int SVE_RELAXED_GP)
5564 (match_operand:SVE_FULL_F 1 "register_operand")
5565 (match_operand:SVE_FULL_F 2 "register_operand")]
5569 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5572 ;; Predicated FCADD, merging with the first input.
5573 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
5574 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5576 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5579 (const_int SVE_RELAXED_GP)
5580 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5581 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5587 fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5588 movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5589 "&& !rtx_equal_p (operands[1], operands[4])"
5591 operands[4] = copy_rtx (operands[1]);
5593 [(set_attr "movprfx" "*,yes")]
5596 (define_insn "*cond_<optab><mode>_2_strict"
5597 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5599 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5602 (const_int SVE_STRICT_GP)
5603 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5604 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5610 fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5611 movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5612 [(set_attr "movprfx" "*,yes")]
5615 ;; Predicated FCADD, merging with an independent value.
5616 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
5617 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
5619 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5622 (const_int SVE_RELAXED_GP)
5623 (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
5624 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
5626 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
5628 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5630 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5631 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5632 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5636 if (reload_completed
5637 && register_operand (operands[4], <MODE>mode)
5638 && !rtx_equal_p (operands[0], operands[4]))
5640 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5641 operands[4], operands[1]));
5642 operands[4] = operands[2] = operands[0];
5644 else if (!rtx_equal_p (operands[1], operands[5]))
5645 operands[5] = copy_rtx (operands[1]);
5649 [(set_attr "movprfx" "yes")]
5652 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
5653 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
5655 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5658 (const_int SVE_STRICT_GP)
5659 (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
5660 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
5662 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
5664 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5666 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5667 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5668 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5670 "&& reload_completed
5671 && register_operand (operands[4], <MODE>mode)
5672 && !rtx_equal_p (operands[0], operands[4])"
5674 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5675 operands[4], operands[1]));
5676 operands[4] = operands[2] = operands[0];
5678 [(set_attr "movprfx" "yes")]
5681 ;; -------------------------------------------------------------------------
5682 ;; ---- [FP] Subtraction
5683 ;; -------------------------------------------------------------------------
5687 ;; -------------------------------------------------------------------------
5689 ;; Predicated floating-point subtraction.
5690 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5691 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w")
5693 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5694 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, Ui1, i, Ui1")
5695 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand" "vsA, w, 0, w, vsA, w")
5696 (match_operand:SVE_FULL_F 3 "register_operand" "0, w, w, 0, w, w")]
5700 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5702 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5703 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5704 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5705 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5706 ; Split the unpredicated form after reload, so that we don't have
5707 ; the unnecessary PTRUE.
5708 "&& reload_completed
5709 && register_operand (operands[2], <MODE>mode)
5710 && INTVAL (operands[4]) == SVE_RELAXED_GP"
5711 [(set (match_dup 0) (minus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5713 [(set_attr "movprfx" "*,*,*,*,yes,yes")]
5716 ;; Predicated floating-point subtraction from a constant, merging with the
5718 (define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed"
5719 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5721 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5724 (const_int SVE_RELAXED_GP)
5725 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5726 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5732 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5733 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
5734 "&& !rtx_equal_p (operands[1], operands[4])"
5736 operands[4] = copy_rtx (operands[1]);
5738 [(set_attr "movprfx" "*,yes")]
5741 (define_insn "*cond_sub<mode>_3_const_strict"
5742 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5744 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5747 (const_int SVE_STRICT_GP)
5748 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5749 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5755 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5756 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
5757 [(set_attr "movprfx" "*,yes")]
5760 ;; Predicated floating-point subtraction from a constant, merging with an
5761 ;; independent value.
5762 (define_insn_and_rewrite "*cond_sub<mode>_const_relaxed"
5763 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5765 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5768 (const_int SVE_RELAXED_GP)
5769 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5770 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")]
5772 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5774 "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
5776 movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5777 movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5781 if (reload_completed
5782 && register_operand (operands[4], <MODE>mode)
5783 && !rtx_equal_p (operands[0], operands[4]))
5785 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5786 operands[4], operands[1]));
5787 operands[4] = operands[3] = operands[0];
5789 else if (!rtx_equal_p (operands[1], operands[5]))
5790 operands[5] = copy_rtx (operands[1]);
5794 [(set_attr "movprfx" "yes")]
5797 (define_insn_and_rewrite "*cond_sub<mode>_const_strict"
5798 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5800 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5803 (const_int SVE_STRICT_GP)
5804 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5805 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")]
5807 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5809 "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
5811 movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5812 movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5814 "&& reload_completed
5815 && register_operand (operands[4], <MODE>mode)
5816 && !rtx_equal_p (operands[0], operands[4])"
5818 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5819 operands[4], operands[1]));
5820 operands[4] = operands[3] = operands[0];
5822 [(set_attr "movprfx" "yes")]
5824 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
5826 ;; -------------------------------------------------------------------------
5827 ;; ---- [FP] Absolute difference
5828 ;; -------------------------------------------------------------------------
5831 ;; -------------------------------------------------------------------------
5833 ;; Predicated floating-point absolute difference.
5834 (define_expand "@aarch64_pred_abd<mode>"
5835 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5837 [(match_operand:<VPRED> 1 "register_operand")
5838 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5842 (match_operand:SVE_FULL_F 2 "register_operand")
5843 (match_operand:SVE_FULL_F 3 "register_operand")]
5849 ;; Predicated floating-point absolute difference.
5850 (define_insn_and_rewrite "*aarch64_pred_abd<mode>_relaxed"
5851 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5853 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5854 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5857 (const_int SVE_RELAXED_GP)
5858 (match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
5859 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5864 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5865 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5866 "&& !rtx_equal_p (operands[1], operands[5])"
5868 operands[5] = copy_rtx (operands[1]);
5870 [(set_attr "movprfx" "*,yes")]
5873 (define_insn "*aarch64_pred_abd<mode>_strict"
5874 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5876 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5877 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5880 (const_int SVE_STRICT_GP)
5881 (match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
5882 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5887 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5888 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5889 [(set_attr "movprfx" "*,yes")]
5892 (define_expand "@aarch64_cond_abd<mode>"
5893 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5895 [(match_operand:<VPRED> 1 "register_operand")
5898 (const_int SVE_STRICT_GP)
5901 (const_int SVE_STRICT_GP)
5902 (match_operand:SVE_FULL_F 2 "register_operand")
5903 (match_operand:SVE_FULL_F 3 "register_operand")]
5906 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5910 if (rtx_equal_p (operands[3], operands[4]))
5911 std::swap (operands[2], operands[3]);
5914 ;; Predicated floating-point absolute difference, merging with the first
5916 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_2_relaxed"
5917 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5919 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5922 (const_int SVE_RELAXED_GP)
5925 (const_int SVE_RELAXED_GP)
5926 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5927 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5934 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5935 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5936 "&& (!rtx_equal_p (operands[1], operands[4])
5937 || !rtx_equal_p (operands[1], operands[5]))"
5939 operands[4] = copy_rtx (operands[1]);
5940 operands[5] = copy_rtx (operands[1]);
5942 [(set_attr "movprfx" "*,yes")]
5945 (define_insn "*aarch64_cond_abd<mode>_2_strict"
5946 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5948 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5951 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5954 (match_operand:SI 5 "aarch64_sve_gp_strictness")
5955 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5956 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5963 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5964 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5965 [(set_attr "movprfx" "*,yes")]
5968 ;; Predicated floating-point absolute difference, merging with the second
5970 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_3_relaxed"
5971 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5973 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5976 (const_int SVE_RELAXED_GP)
5979 (const_int SVE_RELAXED_GP)
5980 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
5981 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5988 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5989 movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
5990 "&& (!rtx_equal_p (operands[1], operands[4])
5991 || !rtx_equal_p (operands[1], operands[5]))"
5993 operands[4] = copy_rtx (operands[1]);
5994 operands[5] = copy_rtx (operands[1]);
5996 [(set_attr "movprfx" "*,yes")]
5999 (define_insn "*aarch64_cond_abd<mode>_3_strict"
6000 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6002 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6005 (match_operand:SI 4 "aarch64_sve_gp_strictness")
6008 (match_operand:SI 5 "aarch64_sve_gp_strictness")
6009 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6010 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
6017 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6018 movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
6019 [(set_attr "movprfx" "*,yes")]
6022 ;; Predicated floating-point absolute difference, merging with an
6023 ;; independent value.
6024 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_relaxed"
6025 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
6027 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
6030 (const_int SVE_RELAXED_GP)
6033 (const_int SVE_RELAXED_GP)
6034 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
6035 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
6038 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
6041 && !rtx_equal_p (operands[2], operands[4])
6042 && !rtx_equal_p (operands[3], operands[4])"
6044 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6045 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6046 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6047 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6051 if (reload_completed
6052 && register_operand (operands[4], <MODE>mode)
6053 && !rtx_equal_p (operands[0], operands[4]))
6055 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6056 operands[4], operands[1]));
6057 operands[4] = operands[3] = operands[0];
6059 else if (!rtx_equal_p (operands[1], operands[5])
6060 || !rtx_equal_p (operands[1], operands[6]))
6062 operands[5] = copy_rtx (operands[1]);
6063 operands[6] = copy_rtx (operands[1]);
6068 [(set_attr "movprfx" "yes")]
6071 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_strict"
6072 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
6074 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
6077 (match_operand:SI 5 "aarch64_sve_gp_strictness")
6080 (match_operand:SI 6 "aarch64_sve_gp_strictness")
6081 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
6082 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
6085 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
6088 && !rtx_equal_p (operands[2], operands[4])
6089 && !rtx_equal_p (operands[3], operands[4])"
6091 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6092 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6093 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6094 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6096 "&& reload_completed
6097 && register_operand (operands[4], <MODE>mode)
6098 && !rtx_equal_p (operands[0], operands[4])"
6100 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6101 operands[4], operands[1]));
6102 operands[4] = operands[3] = operands[0];
6104 [(set_attr "movprfx" "yes")]
6107 ;; -------------------------------------------------------------------------
6108 ;; ---- [FP] Multiplication
6109 ;; -------------------------------------------------------------------------
6112 ;; -------------------------------------------------------------------------
6114 ;; Predicated floating-point multiplication.
6115 (define_insn_and_split "@aarch64_pred_<optab><mode>"
6116 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, ?&w, ?&w")
6118 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
6119 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, i, Ui1")
6120 (match_operand:SVE_FULL_F 2 "register_operand" "%0, w, 0, w, w")
6121 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand" "vsM, w, w, vsM, w")]
6125 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6127 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6128 movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6129 movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
6130 ; Split the unpredicated form after reload, so that we don't have
6131 ; the unnecessary PTRUE.
6132 "&& reload_completed
6133 && register_operand (operands[3], <MODE>mode)
6134 && INTVAL (operands[4]) == SVE_RELAXED_GP"
6135 [(set (match_dup 0) (mult:SVE_FULL_F (match_dup 2) (match_dup 3)))]
6137 [(set_attr "movprfx" "*,*,*,yes,yes")]
6140 ;; Merging forms are handled through SVE_COND_FP_BINARY and
6141 ;; SVE_COND_FP_BINARY_I1.
6143 ;; Unpredicated multiplication by selected lanes.
6144 (define_insn "@aarch64_mul_lane_<mode>"
6145 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
6148 [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>")
6149 (match_operand:SI 3 "const_int_operand")]
6150 UNSPEC_SVE_LANE_SELECT)
6151 (match_operand:SVE_FULL_F 1 "register_operand" "w")))]
6153 "fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
6156 ;; -------------------------------------------------------------------------
6157 ;; ---- [FP] Division
6158 ;; -------------------------------------------------------------------------
6159 ;; The patterns in this section are synthetic.
6160 ;; -------------------------------------------------------------------------
6162 (define_expand "div<mode>3"
6163 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6166 (const_int SVE_RELAXED_GP)
6167 (match_operand:SVE_FULL_F 1 "nonmemory_operand")
6168 (match_operand:SVE_FULL_F 2 "register_operand")]
6172 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
6175 operands[1] = force_reg (<MODE>mode, operands[1]);
6176 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
6180 (define_expand "@aarch64_frecpe<mode>"
6181 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6183 [(match_operand:SVE_FULL_F 1 "register_operand")]
6188 (define_expand "@aarch64_frecps<mode>"
6189 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6191 [(match_operand:SVE_FULL_F 1 "register_operand")
6192 (match_operand:SVE_FULL_F 2 "register_operand")]
6197 ;; -------------------------------------------------------------------------
6198 ;; ---- [FP] Binary logical operations
6199 ;; -------------------------------------------------------------------------
6204 ;; -------------------------------------------------------------------------
6206 ;; Binary logical operations on floating-point modes. We avoid subregs
6207 ;; by providing this, but we need to use UNSPECs since rtx logical ops
6208 ;; aren't defined for floating-point modes.
6209 (define_insn "*<optab><mode>3"
6210 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
6212 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
6213 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
6216 "<logicalf_op>\t%0.d, %1.d, %2.d"
6219 ;; -------------------------------------------------------------------------
6220 ;; ---- [FP] Sign copying
6221 ;; -------------------------------------------------------------------------
6222 ;; The patterns in this section are synthetic.
6223 ;; -------------------------------------------------------------------------
6225 (define_expand "copysign<mode>3"
6226 [(match_operand:SVE_FULL_F 0 "register_operand")
6227 (match_operand:SVE_FULL_F 1 "register_operand")
6228 (match_operand:SVE_FULL_F 2 "register_operand")]
6231 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6232 rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
6233 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6234 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
6236 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
6237 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6239 emit_insn (gen_and<v_int_equiv>3
6241 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6244 emit_insn (gen_and<v_int_equiv>3
6246 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6249 emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
6250 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6255 (define_expand "xorsign<mode>3"
6256 [(match_operand:SVE_FULL_F 0 "register_operand")
6257 (match_operand:SVE_FULL_F 1 "register_operand")
6258 (match_operand:SVE_FULL_F 2 "register_operand")]
6261 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6262 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6263 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
6265 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
6266 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6268 emit_insn (gen_and<v_int_equiv>3
6270 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6273 emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
6274 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6279 ;; -------------------------------------------------------------------------
6280 ;; ---- [FP] Maximum and minimum
6281 ;; -------------------------------------------------------------------------
6287 ;; -------------------------------------------------------------------------
6289 ;; Unpredicated fmax/fmin (the libm functions). The optabs for the
6290 ;; smax/smin rtx codes are handled in the generic section above.
6291 (define_expand "<fmaxmin><mode>3"
6292 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6295 (const_int SVE_RELAXED_GP)
6296 (match_operand:SVE_FULL_F 1 "register_operand")
6297 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_maxmin_operand")]
6298 SVE_COND_FP_MAXMIN_PUBLIC))]
6301 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
6305 ;; Predicated fmax/fmin (the libm functions). The optabs for the
6306 ;; smax/smin rtx codes are handled in the generic section above.
6307 (define_expand "cond_<fmaxmin><mode>"
6308 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6310 [(match_operand:<VPRED> 1 "register_operand")
6313 (const_int SVE_RELAXED_GP)
6314 (match_operand:SVE_FULL_F 2 "register_operand")
6315 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")]
6316 SVE_COND_FP_MAXMIN_PUBLIC)
6317 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6322 ;; Predicated floating-point maximum/minimum.
6323 (define_insn "@aarch64_pred_<optab><mode>"
6324 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w, ?&w")
6326 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
6327 (match_operand:SI 4 "aarch64_sve_gp_strictness")
6328 (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, w")
6329 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand" "vsB, w, vsB, w")]
6330 SVE_COND_FP_MAXMIN))]
6333 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6334 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6335 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6336 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
6337 [(set_attr "movprfx" "*,*,yes,yes")]
6340 ;; Merging forms are handled through SVE_COND_FP_BINARY and
6341 ;; SVE_COND_FP_BINARY_I1.
6343 ;; -------------------------------------------------------------------------
6344 ;; ---- [PRED] Binary logical operations
6345 ;; -------------------------------------------------------------------------
6353 ;; -------------------------------------------------------------------------
6355 ;; Predicate AND. We can reuse one of the inputs as the GP.
6356 ;; Doubling the second operand is the preferred implementation
6357 ;; of the MOV alias, so we use that instead of %1/z, %1, %2.
6358 (define_insn "and<mode>3"
6359 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6360 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
6361 (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
6363 "and\t%0.b, %1/z, %2.b, %2.b"
6366 ;; Unpredicated predicate EOR and ORR.
6367 (define_expand "<optab><mode>3"
6368 [(set (match_operand:PRED_ALL 0 "register_operand")
6370 (LOGICAL_OR:PRED_ALL
6371 (match_operand:PRED_ALL 1 "register_operand")
6372 (match_operand:PRED_ALL 2 "register_operand"))
6376 operands[3] = aarch64_ptrue_reg (<MODE>mode);
6380 ;; Predicated predicate AND, EOR and ORR.
6381 (define_insn "@aarch64_pred_<optab><mode>_z"
6382 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6385 (match_operand:PRED_ALL 2 "register_operand" "Upa")
6386 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6387 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
6389 "<logical>\t%0.b, %1/z, %2.b, %3.b"
6392 ;; Perform a logical operation on operands 2 and 3, using operand 1 as
6393 ;; the GP. Store the result in operand 0 and set the flags in the same
6394 ;; way as for PTEST.
6395 (define_insn "*<optab><mode>3_cc"
6396 [(set (reg:CC_NZC CC_REGNUM)
6398 [(match_operand:VNx16BI 1 "register_operand" "Upa")
6400 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6403 (match_operand:PRED_ALL 2 "register_operand" "Upa")
6404 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6407 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6408 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
6411 "<logical>s\t%0.b, %1/z, %2.b, %3.b"
6414 ;; Same with just the flags result.
6415 (define_insn "*<optab><mode>3_ptest"
6416 [(set (reg:CC_NZC CC_REGNUM)
6418 [(match_operand:VNx16BI 1 "register_operand" "Upa")
6420 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6423 (match_operand:PRED_ALL 2 "register_operand" "Upa")
6424 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6427 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
6429 "<logical>s\t%0.b, %1/z, %2.b, %3.b"
6432 ;; -------------------------------------------------------------------------
6433 ;; ---- [PRED] Binary logical operations (inverted second input)
6434 ;; -------------------------------------------------------------------------
6438 ;; -------------------------------------------------------------------------
6440 ;; Predicated predicate BIC and ORN.
6441 (define_insn "aarch64_pred_<nlogical><mode>_z"
6442 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6445 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6446 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6447 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
6449 "<nlogical>\t%0.b, %1/z, %2.b, %3.b"
6452 ;; Same, but set the flags as a side-effect.
6453 (define_insn "*<nlogical><mode>3_cc"
6454 [(set (reg:CC_NZC CC_REGNUM)
6456 [(match_operand:VNx16BI 1 "register_operand" "Upa")
6458 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6462 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6463 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6466 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6467 (and:PRED_ALL (NLOGICAL:PRED_ALL
6468 (not:PRED_ALL (match_dup 3))
6472 "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
6475 ;; Same with just the flags result.
6476 (define_insn "*<nlogical><mode>3_ptest"
6477 [(set (reg:CC_NZC CC_REGNUM)
6479 [(match_operand:VNx16BI 1 "register_operand" "Upa")
6481 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6485 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6486 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6489 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
6491 "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
6494 ;; -------------------------------------------------------------------------
6495 ;; ---- [PRED] Binary logical operations (inverted result)
6496 ;; -------------------------------------------------------------------------
6500 ;; -------------------------------------------------------------------------
6502 ;; Predicated predicate NAND and NOR.
6503 (define_insn "aarch64_pred_<logical_nn><mode>_z"
6504 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6507 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6508 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
6509 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
6511 "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
6514 ;; Same, but set the flags as a side-effect.
6515 (define_insn "*<logical_nn><mode>3_cc"
6516 [(set (reg:CC_NZC CC_REGNUM)
6518 [(match_operand:VNx16BI 1 "register_operand" "Upa")
6520 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6524 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6526 (match_operand:PRED_ALL 3 "register_operand" "Upa")))
6529 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6530 (and:PRED_ALL (NLOGICAL:PRED_ALL
6531 (not:PRED_ALL (match_dup 2))
6532 (not:PRED_ALL (match_dup 3)))
6535 "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
6538 ;; Same with just the flags result.
6539 (define_insn "*<logical_nn><mode>3_ptest"
6540 [(set (reg:CC_NZC CC_REGNUM)
6542 [(match_operand:VNx16BI 1 "register_operand" "Upa")
6544 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6548 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6550 (match_operand:PRED_ALL 3 "register_operand" "Upa")))
6553 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
6555 "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
6558 ;; =========================================================================
6559 ;; == Ternary arithmetic
6560 ;; =========================================================================
6562 ;; -------------------------------------------------------------------------
6563 ;; ---- [INT] MLA and MAD
6564 ;; -------------------------------------------------------------------------
6568 ;; -------------------------------------------------------------------------
6570 ;; Unpredicated integer addition of product.
6571 (define_expand "fma<mode>4"
6572 [(set (match_operand:SVE_I 0 "register_operand")
6577 (match_operand:SVE_I 1 "register_operand")
6578 (match_operand:SVE_I 2 "nonmemory_operand"))]
6580 (match_operand:SVE_I 3 "register_operand")))]
6583 if (aarch64_prepare_sve_int_fma (operands, PLUS))
6585 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6589 ;; Predicated integer addition of product.
6590 (define_insn "@aarch64_pred_fma<mode>"
6591 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
6594 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
6596 (match_operand:SVE_I 2 "register_operand" "%0, w, w")
6597 (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
6599 (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
6602 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6603 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6604 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6605 [(set_attr "movprfx" "*,*,yes")]
6608 ;; Predicated integer addition of product with merging.
6609 (define_expand "cond_fma<mode>"
6610 [(set (match_operand:SVE_I 0 "register_operand")
6612 [(match_operand:<VPRED> 1 "register_operand")
6615 (match_operand:SVE_I 2 "register_operand")
6616 (match_operand:SVE_I 3 "general_operand"))
6617 (match_operand:SVE_I 4 "register_operand"))
6618 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
6622 if (aarch64_prepare_sve_cond_int_fma (operands, PLUS))
6624 /* Swap the multiplication operands if the fallback value is the
6625 second of the two. */
6626 if (rtx_equal_p (operands[3], operands[5]))
6627 std::swap (operands[2], operands[3]);
6631 ;; Predicated integer addition of product, merging with the first input.
6632 (define_insn "*cond_fma<mode>_2"
6633 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
6635 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6638 (match_operand:SVE_I 2 "register_operand" "0, w")
6639 (match_operand:SVE_I 3 "register_operand" "w, w"))
6640 (match_operand:SVE_I 4 "register_operand" "w, w"))
6645 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6646 movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6647 [(set_attr "movprfx" "*,yes")]
6650 ;; Predicated integer addition of product, merging with the third input.
6651 (define_insn "*cond_fma<mode>_4"
6652 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
6654 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6657 (match_operand:SVE_I 2 "register_operand" "w, w")
6658 (match_operand:SVE_I 3 "register_operand" "w, w"))
6659 (match_operand:SVE_I 4 "register_operand" "0, w"))
6664 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6665 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6666 [(set_attr "movprfx" "*,yes")]
6669 ;; Predicated integer addition of product, merging with an independent value.
6670 (define_insn_and_rewrite "*cond_fma<mode>_any"
6671 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6673 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6676 (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w")
6677 (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w"))
6678 (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w"))
6679 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6682 && !rtx_equal_p (operands[2], operands[5])
6683 && !rtx_equal_p (operands[3], operands[5])
6684 && !rtx_equal_p (operands[4], operands[5])"
6686 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6687 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6688 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6689 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6690 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6692 "&& reload_completed
6693 && register_operand (operands[5], <MODE>mode)
6694 && !rtx_equal_p (operands[0], operands[5])"
6696 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6697 operands[5], operands[1]));
6698 operands[5] = operands[4] = operands[0];
6700 [(set_attr "movprfx" "yes")]
6703 ;; -------------------------------------------------------------------------
6704 ;; ---- [INT] MLS and MSB
6705 ;; -------------------------------------------------------------------------
6709 ;; -------------------------------------------------------------------------
6711 ;; Unpredicated integer subtraction of product.
6712 (define_expand "fnma<mode>4"
6713 [(set (match_operand:SVE_I 0 "register_operand")
6715 (match_operand:SVE_I 3 "register_operand")
6719 (match_operand:SVE_I 1 "register_operand")
6720 (match_operand:SVE_I 2 "general_operand"))]
6724 if (aarch64_prepare_sve_int_fma (operands, MINUS))
6726 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6730 ;; Predicated integer subtraction of product.
6731 (define_insn "@aarch64_pred_fnma<mode>"
6732 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
6734 (match_operand:SVE_I 4 "register_operand" "w, 0, w")
6736 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
6738 (match_operand:SVE_I 2 "register_operand" "%0, w, w")
6739 (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
6743 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6744 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6745 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6746 [(set_attr "movprfx" "*,*,yes")]
6749 ;; Predicated integer subtraction of product with merging.
6750 (define_expand "cond_fnma<mode>"
6751 [(set (match_operand:SVE_I 0 "register_operand")
6753 [(match_operand:<VPRED> 1 "register_operand")
6755 (match_operand:SVE_I 4 "register_operand")
6757 (match_operand:SVE_I 2 "register_operand")
6758 (match_operand:SVE_I 3 "general_operand")))
6759 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
6763 if (aarch64_prepare_sve_cond_int_fma (operands, MINUS))
6765 /* Swap the multiplication operands if the fallback value is the
6766 second of the two. */
6767 if (rtx_equal_p (operands[3], operands[5]))
6768 std::swap (operands[2], operands[3]);
6772 ;; Predicated integer subtraction of product, merging with the first input.
6773 (define_insn "*cond_fnma<mode>_2"
6774 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
6776 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6778 (match_operand:SVE_I 4 "register_operand" "w, w")
6780 (match_operand:SVE_I 2 "register_operand" "0, w")
6781 (match_operand:SVE_I 3 "register_operand" "w, w")))
6786 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6787 movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6788 [(set_attr "movprfx" "*,yes")]
6791 ;; Predicated integer subtraction of product, merging with the third input.
6792 (define_insn "*cond_fnma<mode>_4"
6793 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
6795 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6797 (match_operand:SVE_I 4 "register_operand" "0, w")
6799 (match_operand:SVE_I 2 "register_operand" "w, w")
6800 (match_operand:SVE_I 3 "register_operand" "w, w")))
6805 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6806 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6807 [(set_attr "movprfx" "*,yes")]
6810 ;; Predicated integer subtraction of product, merging with an
6811 ;; independent value.
6812 (define_insn_and_rewrite "*cond_fnma<mode>_any"
6813 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6815 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6817 (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w")
6819 (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w")
6820 (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w")))
6821 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6824 && !rtx_equal_p (operands[2], operands[5])
6825 && !rtx_equal_p (operands[3], operands[5])
6826 && !rtx_equal_p (operands[4], operands[5])"
6828 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6829 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6830 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6831 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6832 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6834 "&& reload_completed
6835 && register_operand (operands[5], <MODE>mode)
6836 && !rtx_equal_p (operands[0], operands[5])"
6838 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6839 operands[5], operands[1]));
6840 operands[5] = operands[4] = operands[0];
6842 [(set_attr "movprfx" "yes")]
6845 ;; -------------------------------------------------------------------------
6846 ;; ---- [INT] Dot product
6847 ;; -------------------------------------------------------------------------
6853 ;; -------------------------------------------------------------------------
6855 ;; Four-element integer dot-product with accumulation.
6856 (define_insn "<sur>dot_prod<vsi2qi>"
6857 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6859 (unspec:SVE_FULL_SDI
6860 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6861 (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
6863 (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")))]
6866 <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
6867 movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
6868 [(set_attr "movprfx" "*,yes")]
6871 ;; Four-element integer dot-product by selected lanes with accumulation.
6872 (define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
6873 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6875 (unspec:SVE_FULL_SDI
6876 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6878 [(match_operand:<VSI2QI> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
6879 (match_operand:SI 3 "const_int_operand")]
6880 UNSPEC_SVE_LANE_SELECT)]
6882 (match_operand:SVE_FULL_SDI 4 "register_operand" "0, w")))]
6885 <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]
6886 movprfx\t%0, %4\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]"
6887 [(set_attr "movprfx" "*,yes")]
6890 (define_insn "@<sur>dot_prod<vsi2qi>"
6891 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6894 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6895 (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
6897 (match_operand:VNx4SI_ONLY 3 "register_operand" "0, w")))]
6900 <sur>dot\\t%0.s, %1.b, %2.b
6901 movprfx\t%0, %3\;<sur>dot\\t%0.s, %1.b, %2.b"
6902 [(set_attr "movprfx" "*,yes")]
6905 (define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
6906 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6909 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6911 [(match_operand:<VSI2QI> 2 "register_operand" "y, y")
6912 (match_operand:SI 3 "const_int_operand")]
6913 UNSPEC_SVE_LANE_SELECT)]
6915 (match_operand:VNx4SI_ONLY 4 "register_operand" "0, w")))]
6918 <sur>dot\\t%0.s, %1.b, %2.b[%3]
6919 movprfx\t%0, %4\;<sur>dot\\t%0.s, %1.b, %2.b[%3]"
6920 [(set_attr "movprfx" "*,yes")]
6923 ;; -------------------------------------------------------------------------
6924 ;; ---- [INT] Sum of absolute differences
6925 ;; -------------------------------------------------------------------------
6926 ;; The patterns in this section are synthetic.
6927 ;; -------------------------------------------------------------------------
6929 ;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
6930 ;; operands 1 and 2. The sequence also has to perform a widening reduction of
6931 ;; the difference into a vector and accumulate that into operand 3 before
6932 ;; copying that into the result operand 0.
6933 ;; Perform that with a sequence of:
6935 ;; [SU]ABD diff.b, p0/m, op1.b, op2.b
6936 ;; MOVPRFX op0, op3 // If necessary
6937 ;; UDOT op0.s, diff.b, ones.b
6938 (define_expand "<sur>sad<vsi2qi>"
6939 [(use (match_operand:SVE_FULL_SDI 0 "register_operand"))
6940 (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
6941 (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
6942 (use (match_operand:SVE_FULL_SDI 3 "register_operand"))]
6945 rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
6946 rtx diff = gen_reg_rtx (<VSI2QI>mode);
6947 emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
6948 emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
6953 ;; -------------------------------------------------------------------------
6954 ;; ---- [INT] Matrix multiply-accumulate
6955 ;; -------------------------------------------------------------------------
6960 ;; -------------------------------------------------------------------------
6962 (define_insn "@aarch64_sve_add_<optab><vsi2qi>"
6963 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6966 [(match_operand:<VSI2QI> 2 "register_operand" "w, w")
6967 (match_operand:<VSI2QI> 3 "register_operand" "w, w")]
6969 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
6972 <sur>mmla\\t%0.s, %2.b, %3.b
6973 movprfx\t%0, %1\;<sur>mmla\\t%0.s, %2.b, %3.b"
6974 [(set_attr "movprfx" "*,yes")]
6977 ;; -------------------------------------------------------------------------
6978 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
6979 ;; -------------------------------------------------------------------------
6980 ;; Includes merging patterns for:
6989 ;; -------------------------------------------------------------------------
6991 ;; Unpredicated floating-point ternary operations.
6992 (define_expand "<optab><mode>4"
6993 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6996 (const_int SVE_RELAXED_GP)
6997 (match_operand:SVE_FULL_F 1 "register_operand")
6998 (match_operand:SVE_FULL_F 2 "register_operand")
6999 (match_operand:SVE_FULL_F 3 "register_operand")]
7000 SVE_COND_FP_TERNARY))]
7003 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7007 ;; Predicated floating-point ternary operations.
7008 (define_insn "@aarch64_pred_<optab><mode>"
7009 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
7011 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
7012 (match_operand:SI 5 "aarch64_sve_gp_strictness")
7013 (match_operand:SVE_FULL_F 2 "register_operand" "%w, 0, w")
7014 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")
7015 (match_operand:SVE_FULL_F 4 "register_operand" "0, w, w")]
7016 SVE_COND_FP_TERNARY))]
7019 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7020 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7021 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
7022 [(set_attr "movprfx" "*,*,yes")]
7025 ;; Predicated floating-point ternary operations with merging.
7026 (define_expand "@cond_<optab><mode>"
7027 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7029 [(match_operand:<VPRED> 1 "register_operand")
7032 (const_int SVE_STRICT_GP)
7033 (match_operand:SVE_FULL_F 2 "register_operand")
7034 (match_operand:SVE_FULL_F 3 "register_operand")
7035 (match_operand:SVE_FULL_F 4 "register_operand")]
7036 SVE_COND_FP_TERNARY)
7037 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7041 /* Swap the multiplication operands if the fallback value is the
7042 second of the two. */
7043 if (rtx_equal_p (operands[3], operands[5]))
7044 std::swap (operands[2], operands[3]);
7047 ;; Predicated floating-point ternary operations, merging with the
7049 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
7050 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7052 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7055 (const_int SVE_RELAXED_GP)
7056 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
7057 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7058 (match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
7059 SVE_COND_FP_TERNARY)
7064 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7065 movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
7066 "&& !rtx_equal_p (operands[1], operands[5])"
7068 operands[5] = copy_rtx (operands[1]);
7070 [(set_attr "movprfx" "*,yes")]
7073 (define_insn "*cond_<optab><mode>_2_strict"
7074 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7076 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7079 (const_int SVE_STRICT_GP)
7080 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
7081 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7082 (match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
7083 SVE_COND_FP_TERNARY)
7088 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7089 movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
7090 [(set_attr "movprfx" "*,yes")]
7093 ;; Predicated floating-point ternary operations, merging with the
7095 (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
7096 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7098 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7101 (const_int SVE_RELAXED_GP)
7102 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7103 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7104 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
7105 SVE_COND_FP_TERNARY)
7110 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7111 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
7112 "&& !rtx_equal_p (operands[1], operands[5])"
7114 operands[5] = copy_rtx (operands[1]);
7116 [(set_attr "movprfx" "*,yes")]
7119 (define_insn "*cond_<optab><mode>_4_strict"
7120 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7122 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7125 (const_int SVE_STRICT_GP)
7126 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7127 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7128 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
7129 SVE_COND_FP_TERNARY)
7134 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7135 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
7136 [(set_attr "movprfx" "*,yes")]
7139 ;; Predicated floating-point ternary operations, merging with an
7140 ;; independent value.
7141 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
7142 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
7144 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
7147 (const_int SVE_RELAXED_GP)
7148 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
7149 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
7150 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
7151 SVE_COND_FP_TERNARY)
7152 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
7155 && !rtx_equal_p (operands[2], operands[5])
7156 && !rtx_equal_p (operands[3], operands[5])
7157 && !rtx_equal_p (operands[4], operands[5])"
7159 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7160 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7161 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7162 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7163 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7167 if (reload_completed
7168 && register_operand (operands[5], <MODE>mode)
7169 && !rtx_equal_p (operands[0], operands[5]))
7171 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7172 operands[5], operands[1]));
7173 operands[5] = operands[4] = operands[0];
7175 else if (!rtx_equal_p (operands[1], operands[6]))
7176 operands[6] = copy_rtx (operands[1]);
7180 [(set_attr "movprfx" "yes")]
7183 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
7184 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
7186 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
7189 (const_int SVE_STRICT_GP)
7190 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
7191 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
7192 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
7193 SVE_COND_FP_TERNARY)
7194 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
7197 && !rtx_equal_p (operands[2], operands[5])
7198 && !rtx_equal_p (operands[3], operands[5])
7199 && !rtx_equal_p (operands[4], operands[5])"
7201 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7202 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7203 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7204 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7205 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7207 "&& reload_completed
7208 && register_operand (operands[5], <MODE>mode)
7209 && !rtx_equal_p (operands[0], operands[5])"
7211 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7212 operands[5], operands[1]));
7213 operands[5] = operands[4] = operands[0];
7215 [(set_attr "movprfx" "yes")]
7218 ;; Unpredicated FMLA and FMLS by selected lanes. It doesn't seem worth using
7219 ;; (fma ...) since target-independent code won't understand the indexing.
7220 (define_insn "@aarch64_<optab>_lane_<mode>"
7221 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7223 [(match_operand:SVE_FULL_F 1 "register_operand" "w, w")
7225 [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
7226 (match_operand:SI 3 "const_int_operand")]
7227 UNSPEC_SVE_LANE_SELECT)
7228 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
7229 SVE_FP_TERNARY_LANE))]
7232 <sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
7233 movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
7234 [(set_attr "movprfx" "*,yes")]
7237 ;; -------------------------------------------------------------------------
7238 ;; ---- [FP] Complex multiply-add
7239 ;; -------------------------------------------------------------------------
7240 ;; Includes merging patterns for:
7242 ;; -------------------------------------------------------------------------
7244 ;; Predicated FCMLA.
7245 (define_insn "@aarch64_pred_<optab><mode>"
7246 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7248 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7249 (match_operand:SI 5 "aarch64_sve_gp_strictness")
7250 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7251 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7252 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
7256 fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7257 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
7258 [(set_attr "movprfx" "*,yes")]
7261 ;; unpredicated optab pattern for auto-vectorizer
7262 ;; The complex mla/mls operations always need to expand to two instructions.
7263 ;; The first operation does half the computation and the second does the
7264 ;; remainder. Because of this, expand early.
7265 (define_expand "cml<fcmac1><conj_op><mode>4"
7266 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7270 (match_operand:SVE_FULL_F 1 "register_operand")
7271 (match_operand:SVE_FULL_F 2 "register_operand")
7272 (match_operand:SVE_FULL_F 3 "register_operand")]
7276 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7277 operands[5] = gen_int_mode (SVE_RELAXED_GP, SImode);
7278 rtx tmp = gen_reg_rtx (<MODE>mode);
7280 (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, operands[4],
7281 operands[3], operands[2],
7282 operands[1], operands[5]));
7284 (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], operands[4],
7285 operands[3], operands[2],
7290 ;; unpredicated optab pattern for auto-vectorizer
7291 ;; The complex mul operations always need to expand to two instructions.
7292 ;; The first operation does half the computation and the second does the
7293 ;; remainder. Because of this, expand early.
7294 (define_expand "cmul<conj_op><mode>3"
7295 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7297 [(match_operand:SVE_FULL_F 1 "register_operand")
7298 (match_operand:SVE_FULL_F 2 "register_operand")]
7302 rtx pred_reg = aarch64_ptrue_reg (<VPRED>mode);
7303 rtx gp_mode = gen_int_mode (SVE_RELAXED_GP, SImode);
7304 rtx accum = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
7305 rtx tmp = gen_reg_rtx (<MODE>mode);
7307 (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, pred_reg,
7308 operands[2], operands[1],
7311 (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], pred_reg,
7312 operands[2], operands[1],
7317 ;; Predicated FCMLA with merging.
7318 (define_expand "@cond_<optab><mode>"
7319 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7321 [(match_operand:<VPRED> 1 "register_operand")
7324 (const_int SVE_STRICT_GP)
7325 (match_operand:SVE_FULL_F 2 "register_operand")
7326 (match_operand:SVE_FULL_F 3 "register_operand")
7327 (match_operand:SVE_FULL_F 4 "register_operand")]
7329 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7334 ;; Predicated FCMLA, merging with the third input.
7335 (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
7336 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7338 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7341 (const_int SVE_RELAXED_GP)
7342 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7343 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7344 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
7350 fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7351 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
7352 "&& !rtx_equal_p (operands[1], operands[5])"
7354 operands[5] = copy_rtx (operands[1]);
7356 [(set_attr "movprfx" "*,yes")]
7359 (define_insn "*cond_<optab><mode>_4_strict"
7360 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7362 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7365 (const_int SVE_STRICT_GP)
7366 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7367 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7368 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
7374 fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7375 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
7376 [(set_attr "movprfx" "*,yes")]
7379 ;; Predicated FCMLA, merging with an independent value.
7380 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
7381 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
7383 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
7386 (const_int SVE_RELAXED_GP)
7387 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
7388 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
7389 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
7391 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
7393 "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
7395 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7396 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7397 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7401 if (reload_completed
7402 && register_operand (operands[5], <MODE>mode)
7403 && !rtx_equal_p (operands[0], operands[5]))
7405 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7406 operands[5], operands[1]));
7407 operands[5] = operands[4] = operands[0];
7409 else if (!rtx_equal_p (operands[1], operands[6]))
7410 operands[6] = copy_rtx (operands[1]);
7414 [(set_attr "movprfx" "yes")]
7417 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
7418 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
7420 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
7423 (const_int SVE_STRICT_GP)
7424 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
7425 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
7426 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
7428 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
7430 "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
7432 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7433 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7434 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7436 "&& reload_completed
7437 && register_operand (operands[5], <MODE>mode)
7438 && !rtx_equal_p (operands[0], operands[5])"
7440 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7441 operands[5], operands[1]));
7442 operands[5] = operands[4] = operands[0];
7444 [(set_attr "movprfx" "yes")]
7447 ;; Unpredicated FCMLA with indexing.
7448 (define_insn "@aarch64_<optab>_lane_<mode>"
7449 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w")
7450 (unspec:SVE_FULL_HSF
7451 [(match_operand:SVE_FULL_HSF 1 "register_operand" "w, w")
7452 (unspec:SVE_FULL_HSF
7453 [(match_operand:SVE_FULL_HSF 2 "register_operand" "<sve_lane_pair_con>, <sve_lane_pair_con>")
7454 (match_operand:SI 3 "const_int_operand")]
7455 UNSPEC_SVE_LANE_SELECT)
7456 (match_operand:SVE_FULL_HSF 4 "register_operand" "0, w")]
7460 fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>
7461 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>"
7462 [(set_attr "movprfx" "*,yes")]
7465 ;; -------------------------------------------------------------------------
7466 ;; ---- [FP] Trigonometric multiply-add
7467 ;; -------------------------------------------------------------------------
7470 ;; -------------------------------------------------------------------------
7472 (define_insn "@aarch64_sve_tmad<mode>"
7473 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7475 [(match_operand:SVE_FULL_F 1 "register_operand" "0, w")
7476 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7477 (match_operand:DI 3 "const_int_operand")]
7481 ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
7482 movprfx\t%0, %1\;ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3"
7483 [(set_attr "movprfx" "*,yes")]
7486 ;; -------------------------------------------------------------------------
7487 ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
7488 ;; -------------------------------------------------------------------------
7494 ;; -------------------------------------------------------------------------
7496 (define_insn "@aarch64_sve_<sve_fp_op>vnx4sf"
7497 [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
7499 [(match_operand:VNx4SF 1 "register_operand" "0, w")
7500 (match_operand:VNx8BF 2 "register_operand" "w, w")
7501 (match_operand:VNx8BF 3 "register_operand" "w, w")]
7502 SVE_BFLOAT_TERNARY_LONG))]
7505 <sve_fp_op>\t%0.s, %2.h, %3.h
7506 movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h"
7507 [(set_attr "movprfx" "*,yes")]
7510 ;; The immediate range is enforced before generating the instruction.
7511 (define_insn "@aarch64_sve_<sve_fp_op>_lanevnx4sf"
7512 [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
7514 [(match_operand:VNx4SF 1 "register_operand" "0, w")
7515 (match_operand:VNx8BF 2 "register_operand" "w, w")
7516 (match_operand:VNx8BF 3 "register_operand" "y, y")
7517 (match_operand:SI 4 "const_int_operand")]
7518 SVE_BFLOAT_TERNARY_LONG_LANE))]
7521 <sve_fp_op>\t%0.s, %2.h, %3.h[%4]
7522 movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]"
7523 [(set_attr "movprfx" "*,yes")]
7526 ;; -------------------------------------------------------------------------
7527 ;; ---- [FP] Matrix multiply-accumulate
7528 ;; -------------------------------------------------------------------------
7530 ;; - FMMLA (F32MM,F64MM)
7531 ;; -------------------------------------------------------------------------
7533 ;; The mode iterator enforces the target requirements.
7534 (define_insn "@aarch64_sve_<sve_fp_op><mode>"
7535 [(set (match_operand:SVE_MATMULF 0 "register_operand" "=w, ?&w")
7537 [(match_operand:SVE_MATMULF 2 "register_operand" "w, w")
7538 (match_operand:SVE_MATMULF 3 "register_operand" "w, w")
7539 (match_operand:SVE_MATMULF 1 "register_operand" "0, w")]
7543 <sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
7544 movprfx\t%0, %1\;<sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
7545 [(set_attr "movprfx" "*,yes")]
7548 ;; =========================================================================
7549 ;; == Comparisons and selects
7550 ;; =========================================================================
7552 ;; -------------------------------------------------------------------------
7553 ;; ---- [INT,FP] Select based on predicates
7554 ;; -------------------------------------------------------------------------
7555 ;; Includes merging patterns for:
7559 ;; -------------------------------------------------------------------------
7561 ;; vcond_mask operand order: true, false, mask
7562 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
7563 ;; SEL operand order: mask, true, false
7564 (define_expand "@vcond_mask_<mode><vpred>"
7565 [(set (match_operand:SVE_ALL 0 "register_operand")
7567 [(match_operand:<VPRED> 3 "register_operand")
7568 (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
7569 (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
7573 if (register_operand (operands[1], <MODE>mode))
7574 operands[2] = force_reg (<MODE>mode, operands[2]);
7580 ;; - a duplicated immediate and a register
7581 ;; - a duplicated immediate and zero
7583 ;; For unpacked vectors, it doesn't really matter whether SEL uses the
7584 ;; the container size or the element size. If SEL used the container size,
7585 ;; it would ignore undefined bits of the predicate but would copy the
7586 ;; upper (undefined) bits of each container along with the defined bits.
7587 ;; If SEL used the element size, it would use undefined bits of the predicate
7588 ;; to select between undefined elements in each input vector. Thus the only
7589 ;; difference is whether the undefined bits in a container always come from
7590 ;; the same input as the defined bits, or whether the choice can vary
7591 ;; independently of the defined bits.
7593 ;; For the other instructions, using the element size is more natural,
7594 ;; so we do that for SEL as well.
7595 (define_insn "*vcond_mask_<mode><vpred>"
7596 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
7598 [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl")
7599 (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
7600 (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
7603 && (!register_operand (operands[1], <MODE>mode)
7604 || register_operand (operands[2], <MODE>mode))"
7606 sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
7607 mov\t%0.<Vetype>, %3/m, #%I1
7608 mov\t%0.<Vetype>, %3/z, #%I1
7609 fmov\t%0.<Vetype>, %3/m, #%1
7610 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
7611 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
7612 movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1"
7613 [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
7616 ;; Optimize selects between a duplicated scalar variable and another vector,
7617 ;; the latter of which can be a zero constant or a variable. Treat duplicates
7618 ;; of GPRs as being more expensive than duplicates of FPRs, since they
7619 ;; involve a cross-file move.
7620 (define_insn "@aarch64_sel_dup<mode>"
7621 [(set (match_operand:SVE_ALL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w")
7623 [(match_operand:<VPRED> 3 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
7624 (vec_duplicate:SVE_ALL
7625 (match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w"))
7626 (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")]
7630 mov\t%0.<Vetype>, %3/m, %<vwcore>1
7631 mov\t%0.<Vetype>, %3/m, %<Vetype>1
7632 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
7633 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
7634 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
7635 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1"
7636 [(set_attr "movprfx" "*,*,yes,yes,yes,yes")]
7639 ;; -------------------------------------------------------------------------
7640 ;; ---- [INT,FP] Compare and select
7641 ;; -------------------------------------------------------------------------
7642 ;; The patterns in this section are synthetic.
7643 ;; -------------------------------------------------------------------------
7645 ;; Integer (signed) vcond. Don't enforce an immediate range here, since it
7646 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
7647 (define_expand "vcond<SVE_ALL:mode><SVE_I:mode>"
7648 [(set (match_operand:SVE_ALL 0 "register_operand")
7649 (if_then_else:SVE_ALL
7650 (match_operator 3 "comparison_operator"
7651 [(match_operand:SVE_I 4 "register_operand")
7652 (match_operand:SVE_I 5 "nonmemory_operand")])
7653 (match_operand:SVE_ALL 1 "nonmemory_operand")
7654 (match_operand:SVE_ALL 2 "nonmemory_operand")))]
7655 "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>"
7657 aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands);
7662 ;; Integer vcondu. Don't enforce an immediate range here, since it
7663 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
7664 (define_expand "vcondu<SVE_ALL:mode><SVE_I:mode>"
7665 [(set (match_operand:SVE_ALL 0 "register_operand")
7666 (if_then_else:SVE_ALL
7667 (match_operator 3 "comparison_operator"
7668 [(match_operand:SVE_I 4 "register_operand")
7669 (match_operand:SVE_I 5 "nonmemory_operand")])
7670 (match_operand:SVE_ALL 1 "nonmemory_operand")
7671 (match_operand:SVE_ALL 2 "nonmemory_operand")))]
7672 "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>"
7674 aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands);
7679 ;; Floating-point vcond. All comparisons except FCMUO allow a zero operand;
7680 ;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
7681 (define_expand "vcond<mode><v_fp_equiv>"
7682 [(set (match_operand:SVE_FULL_HSD 0 "register_operand")
7683 (if_then_else:SVE_FULL_HSD
7684 (match_operator 3 "comparison_operator"
7685 [(match_operand:<V_FP_EQUIV> 4 "register_operand")
7686 (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
7687 (match_operand:SVE_FULL_HSD 1 "nonmemory_operand")
7688 (match_operand:SVE_FULL_HSD 2 "nonmemory_operand")))]
7691 aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
7696 ;; -------------------------------------------------------------------------
7697 ;; ---- [INT] Comparisons
7698 ;; -------------------------------------------------------------------------
7710 ;; -------------------------------------------------------------------------
7712 ;; Signed integer comparisons. Don't enforce an immediate range here, since
7713 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
7715 (define_expand "vec_cmp<mode><vpred>"
7717 [(set (match_operand:<VPRED> 0 "register_operand")
7718 (match_operator:<VPRED> 1 "comparison_operator"
7719 [(match_operand:SVE_I 2 "register_operand")
7720 (match_operand:SVE_I 3 "nonmemory_operand")]))
7721 (clobber (reg:CC_NZC CC_REGNUM))])]
7724 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
7725 operands[2], operands[3]);
7730 ;; Unsigned integer comparisons. Don't enforce an immediate range here, since
7731 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
7733 (define_expand "vec_cmpu<mode><vpred>"
7735 [(set (match_operand:<VPRED> 0 "register_operand")
7736 (match_operator:<VPRED> 1 "comparison_operator"
7737 [(match_operand:SVE_I 2 "register_operand")
7738 (match_operand:SVE_I 3 "nonmemory_operand")]))
7739 (clobber (reg:CC_NZC CC_REGNUM))])]
7742 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
7743 operands[2], operands[3]);
7748 ;; Predicated integer comparisons.
7750 ;; For unpacked vectors, only the lowpart element in each input container
7751 ;; has a defined value, and only the predicate bits associated with
7752 ;; those elements are defined. For example, when comparing two VNx2SIs:
7754 ;; - The VNx2SIs can be seem as VNx2DIs in which the low halves of each
7755 ;; DI container store an SI element. The upper bits of each DI container
7758 ;; - Alternatively, the VNx2SIs can be seen as VNx4SIs in which the
7759 ;; even elements are defined and the odd elements are undefined.
7761 ;; - The associated predicate mode is VNx2BI. This means that only the
7762 ;; low bit in each predicate byte is defined (on input and on output).
7764 ;; - We use a .s comparison to compare VNx2SIs, under the control of a
7765 ;; VNx2BI governing predicate, to produce a VNx2BI result. If we view
7766 ;; the .s operation as operating on VNx4SIs then for odd lanes:
7768 ;; - the input governing predicate bit is undefined
7769 ;; - the SI elements being compared are undefined
7770 ;; - the predicate result bit is therefore undefined, but
7771 ;; - the predicate result bit is in the undefined part of a VNx2BI,
7772 ;; so its value doesn't matter anyway.
7773 (define_insn "@aarch64_pred_cmp<cmp_op><mode>"
7774 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7776 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7777 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7778 (SVE_INT_CMP:<VPRED>
7779 (match_operand:SVE_I 3 "register_operand" "w, w")
7780 (match_operand:SVE_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
7782 (clobber (reg:CC_NZC CC_REGNUM))]
7785 cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
7786 cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
7789 ;; Predicated integer comparisons in which both the flag and predicate
7790 ;; results are interesting.
7791 (define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
7792 [(set (reg:CC_NZC CC_REGNUM)
7794 [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
7796 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7799 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7800 (SVE_INT_CMP:<VPRED>
7801 (match_operand:SVE_I 2 "register_operand" "w, w")
7802 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
7805 (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7809 (SVE_INT_CMP:<VPRED>
7814 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7816 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
7817 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
7818 "&& !rtx_equal_p (operands[4], operands[6])"
7820 operands[6] = copy_rtx (operands[4]);
7821 operands[7] = operands[5];
7825 ;; Predicated integer comparisons in which only the flags result is
7827 (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
7828 [(set (reg:CC_NZC CC_REGNUM)
7830 [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
7832 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7835 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7836 (SVE_INT_CMP:<VPRED>
7837 (match_operand:SVE_I 2 "register_operand" "w, w")
7838 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
7841 (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
7843 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7845 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
7846 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
7847 "&& !rtx_equal_p (operands[4], operands[6])"
7849 operands[6] = copy_rtx (operands[4]);
7850 operands[7] = operands[5];
7854 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated
7855 ;; comparison with an AND. Split the instruction into its preferred form
7856 ;; at the earliest opportunity, in order to get rid of the redundant
7858 (define_insn_and_split "*cmp<cmp_op><mode>_and"
7859 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7863 (const_int SVE_KNOWN_PTRUE)
7864 (SVE_INT_CMP:<VPRED>
7865 (match_operand:SVE_I 2 "register_operand" "w, w")
7866 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
7868 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
7869 (clobber (reg:CC_NZC CC_REGNUM))]
7877 (const_int SVE_MAYBE_NOT_PTRUE)
7878 (SVE_INT_CMP:<VPRED>
7882 (clobber (reg:CC_NZC CC_REGNUM))])]
7885 ;; Predicated integer wide comparisons.
7886 (define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide"
7887 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7889 [(match_operand:VNx16BI 1 "register_operand" "Upl")
7890 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7892 [(match_operand:SVE_FULL_BHSI 3 "register_operand" "w")
7893 (match_operand:VNx2DI 4 "register_operand" "w")]
7894 SVE_COND_INT_CMP_WIDE)]
7896 (clobber (reg:CC_NZC CC_REGNUM))]
7898 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d"
7901 ;; Predicated integer wide comparisons in which both the flag and
7902 ;; predicate results are interesting.
7903 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
7904 [(set (reg:CC_NZC CC_REGNUM)
7906 [(match_operand:VNx16BI 1 "register_operand" "Upl")
7908 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7910 [(match_operand:VNx16BI 6 "register_operand" "Upl")
7911 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7913 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
7914 (match_operand:VNx2DI 3 "register_operand" "w")]
7915 SVE_COND_INT_CMP_WIDE)]
7918 (set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7925 SVE_COND_INT_CMP_WIDE)]
7928 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7929 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
7932 ;; Predicated integer wide comparisons in which only the flags result
7934 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
7935 [(set (reg:CC_NZC CC_REGNUM)
7937 [(match_operand:VNx16BI 1 "register_operand" "Upl")
7939 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7941 [(match_operand:VNx16BI 6 "register_operand" "Upl")
7942 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7944 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
7945 (match_operand:VNx2DI 3 "register_operand" "w")]
7946 SVE_COND_INT_CMP_WIDE)]
7949 (clobber (match_scratch:<VPRED> 0 "=Upa"))]
7951 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7952 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
7955 ;; -------------------------------------------------------------------------
7956 ;; ---- [INT] While tests
7957 ;; -------------------------------------------------------------------------
7969 ;; -------------------------------------------------------------------------
7971 ;; Set element I of the result if (cmp (plus operand1 J) operand2) is
7972 ;; true for all J in [0, I].
7973 (define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>"
7974 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7975 (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
7976 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
7978 (clobber (reg:CC_NZC CC_REGNUM))]
7980 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
7983 ;; The WHILE instructions set the flags in the same way as a PTEST with
7984 ;; a PTRUE GP. Handle the case in which both results are useful. The GP
7985 ;; operands to the PTEST aren't needed, so we allow them to be anything.
7986 (define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_cc"
7987 [(set (reg:CC_NZC CC_REGNUM)
7991 (const_int SVE_KNOWN_PTRUE)
7993 [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
7994 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
7997 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7998 (unspec:PRED_ALL [(match_dup 1)
8002 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
8003 ;; Force the compiler to drop the unused predicate operand, so that we
8004 ;; don't have an unnecessary PTRUE.
8005 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
8007 operands[3] = CONSTM1_RTX (VNx16BImode);
8008 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8012 ;; Same, but handle the case in which only the flags result is useful.
8013 (define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest"
8014 [(set (reg:CC_NZC CC_REGNUM)
8018 (const_int SVE_KNOWN_PTRUE)
8020 [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
8021 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
8024 (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
8026 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
8027 ;; Force the compiler to drop the unused predicate operand, so that we
8028 ;; don't have an unnecessary PTRUE.
8029 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
8031 operands[3] = CONSTM1_RTX (VNx16BImode);
8032 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8036 ;; -------------------------------------------------------------------------
8037 ;; ---- [FP] Direct comparisons
8038 ;; -------------------------------------------------------------------------
8047 ;; -------------------------------------------------------------------------
8049 ;; Floating-point comparisons. All comparisons except FCMUO allow a zero
8050 ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
8052 (define_expand "vec_cmp<mode><vpred>"
8053 [(set (match_operand:<VPRED> 0 "register_operand")
8054 (match_operator:<VPRED> 1 "comparison_operator"
8055 [(match_operand:SVE_FULL_F 2 "register_operand")
8056 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))]
8059 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
8060 operands[2], operands[3], false);
8065 ;; Predicated floating-point comparisons.
8066 (define_insn "@aarch64_pred_fcm<cmp_op><mode>"
8067 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
8069 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
8070 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8071 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
8072 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, w")]
8073 SVE_COND_FP_CMP_I0))]
8076 fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0
8077 fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
8080 ;; Same for unordered comparisons.
8081 (define_insn "@aarch64_pred_fcmuo<mode>"
8082 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8084 [(match_operand:<VPRED> 1 "register_operand" "Upl")
8085 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8086 (match_operand:SVE_FULL_F 3 "register_operand" "w")
8087 (match_operand:SVE_FULL_F 4 "register_operand" "w")]
8088 UNSPEC_COND_FCMUO))]
8090 "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
8093 ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
8094 ;; with another predicate P. This does not have the same trapping behavior
8095 ;; as predicating the comparison itself on P, but it's a legitimate fold,
8096 ;; since we can drop any potentially-trapping operations whose results
8099 ;; Split the instruction into its preferred form (below) at the earliest
8100 ;; opportunity, in order to get rid of the redundant operand 1.
8101 (define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
8102 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
8105 [(match_operand:<VPRED> 1)
8106 (const_int SVE_KNOWN_PTRUE)
8107 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
8108 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
8110 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
8117 (const_int SVE_MAYBE_NOT_PTRUE)
8120 SVE_COND_FP_CMP_I0))]
8123 ;; Same for unordered comparisons.
8124 (define_insn_and_split "*fcmuo<mode>_and_combine"
8125 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8128 [(match_operand:<VPRED> 1)
8129 (const_int SVE_KNOWN_PTRUE)
8130 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8131 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8133 (match_operand:<VPRED> 4 "register_operand" "Upl")))]
8140 (const_int SVE_MAYBE_NOT_PTRUE)
8143 UNSPEC_COND_FCMUO))]
8146 ;; Similar to *fcm<cmp_op><mode>_and_combine, but for BIC rather than AND.
8147 ;; In this case, we still need a separate NOT/BIC operation, but predicating
8148 ;; the comparison on the BIC operand removes the need for a PTRUE.
8149 (define_insn_and_split "*fcm<cmp_op><mode>_bic_combine"
8150 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8155 [(match_operand:<VPRED> 1)
8156 (const_int SVE_KNOWN_PTRUE)
8157 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8158 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8159 SVE_COND_FP_CMP_I0))
8160 (match_operand:<VPRED> 4 "register_operand" "Upa"))
8161 (match_dup:<VPRED> 1)))
8162 (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8169 (const_int SVE_MAYBE_NOT_PTRUE)
8172 SVE_COND_FP_CMP_I0))
8179 if (can_create_pseudo_p ())
8180 operands[5] = gen_reg_rtx (<VPRED>mode);
8184 ;; Make sure that we expand to a nor when the operand 4 of
8185 ;; *fcm<cmp_op><mode>_bic_combine is a not.
8186 (define_insn_and_split "*fcm<cmp_op><mode>_nor_combine"
8187 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8192 [(match_operand:<VPRED> 1)
8193 (const_int SVE_KNOWN_PTRUE)
8194 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8195 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8196 SVE_COND_FP_CMP_I0))
8198 (match_operand:<VPRED> 4 "register_operand" "Upa")))
8199 (match_dup:<VPRED> 1)))
8200 (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8207 (const_int SVE_KNOWN_PTRUE)
8210 SVE_COND_FP_CMP_I0))
8220 if (can_create_pseudo_p ())
8221 operands[5] = gen_reg_rtx (<VPRED>mode);
8225 (define_insn_and_split "*fcmuo<mode>_bic_combine"
8226 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8231 [(match_operand:<VPRED> 1)
8232 (const_int SVE_KNOWN_PTRUE)
8233 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8234 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8236 (match_operand:<VPRED> 4 "register_operand" "Upa"))
8237 (match_dup:<VPRED> 1)))
8238 (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8245 (const_int SVE_MAYBE_NOT_PTRUE)
8255 if (can_create_pseudo_p ())
8256 operands[5] = gen_reg_rtx (<VPRED>mode);
8260 ;; Same for unordered comparisons.
8261 (define_insn_and_split "*fcmuo<mode>_nor_combine"
8262 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8267 [(match_operand:<VPRED> 1)
8268 (const_int SVE_KNOWN_PTRUE)
8269 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8270 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8273 (match_operand:<VPRED> 4 "register_operand" "Upa")))
8274 (match_dup:<VPRED> 1)))
8275 (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8282 (const_int SVE_KNOWN_PTRUE)
8295 if (can_create_pseudo_p ())
8296 operands[5] = gen_reg_rtx (<VPRED>mode);
8300 ;; -------------------------------------------------------------------------
8301 ;; ---- [FP] Absolute comparisons
8302 ;; -------------------------------------------------------------------------
8308 ;; -------------------------------------------------------------------------
8310 ;; Predicated floating-point absolute comparisons.
8311 (define_expand "@aarch64_pred_fac<cmp_op><mode>"
8312 [(set (match_operand:<VPRED> 0 "register_operand")
8314 [(match_operand:<VPRED> 1 "register_operand")
8315 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8319 (match_operand:SVE_FULL_F 3 "register_operand")]
8324 (match_operand:SVE_FULL_F 4 "register_operand")]
8326 SVE_COND_FP_ABS_CMP))]
8330 (define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>_relaxed"
8331 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8333 [(match_operand:<VPRED> 1 "register_operand" "Upl")
8334 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8337 (const_int SVE_RELAXED_GP)
8338 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8342 (const_int SVE_RELAXED_GP)
8343 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8345 SVE_COND_FP_ABS_CMP))]
8347 "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
8348 "&& (!rtx_equal_p (operands[1], operands[5])
8349 || !rtx_equal_p (operands[1], operands[6]))"
8351 operands[5] = copy_rtx (operands[1]);
8352 operands[6] = copy_rtx (operands[1]);
8356 (define_insn "*aarch64_pred_fac<cmp_op><mode>_strict"
8357 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8359 [(match_operand:<VPRED> 1 "register_operand" "Upl")
8360 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8363 (match_operand:SI 5 "aarch64_sve_gp_strictness")
8364 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8368 (match_operand:SI 6 "aarch64_sve_gp_strictness")
8369 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8371 SVE_COND_FP_ABS_CMP))]
8373 "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
8376 ;; -------------------------------------------------------------------------
8377 ;; ---- [PRED] Select
8378 ;; -------------------------------------------------------------------------
8381 ;; -------------------------------------------------------------------------
8383 (define_insn "@vcond_mask_<mode><mode>"
8384 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8387 (match_operand:PRED_ALL 3 "register_operand" "Upa")
8388 (match_operand:PRED_ALL 1 "register_operand" "Upa"))
8391 (match_operand:PRED_ALL 2 "register_operand" "Upa"))))]
8393 "sel\t%0.b, %3, %1.b, %2.b"
8396 ;; -------------------------------------------------------------------------
8397 ;; ---- [PRED] Test bits
8398 ;; -------------------------------------------------------------------------
8401 ;; -------------------------------------------------------------------------
8403 ;; Branch based on predicate equality or inequality.
8404 (define_expand "cbranch<mode>4"
8407 (match_operator 0 "aarch64_equality_operator"
8408 [(match_operand:PRED_ALL 1 "register_operand")
8409 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
8410 (label_ref (match_operand 3 ""))
8414 rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
8415 rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
8416 rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
8418 if (operands[2] == CONST0_RTX (<MODE>mode))
8422 pred = gen_reg_rtx (<MODE>mode);
8423 emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
8426 emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
8427 operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
8428 operands[2] = const0_rtx;
8432 ;; See "Description of UNSPEC_PTEST" above for details.
8433 (define_insn "aarch64_ptest<mode>"
8434 [(set (reg:CC_NZC CC_REGNUM)
8435 (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
8437 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8438 (match_operand:PRED_ALL 3 "register_operand" "Upa")]
8444 ;; =========================================================================
8446 ;; =========================================================================
8448 ;; -------------------------------------------------------------------------
8449 ;; ---- [INT,FP] Conditional reductions
8450 ;; -------------------------------------------------------------------------
8454 ;; -------------------------------------------------------------------------
8456 ;; Set operand 0 to the last active element in operand 3, or to tied
8457 ;; operand 1 if no elements are active.
8458 (define_insn "@fold_extract_<last_op>_<mode>"
8459 [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
8461 [(match_operand:<VEL> 1 "register_operand" "0, 0")
8462 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
8463 (match_operand:SVE_FULL 3 "register_operand" "w, w")]
8467 clast<ab>\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
8468 clast<ab>\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
8471 (define_insn "@aarch64_fold_extract_vector_<last_op>_<mode>"
8472 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
8474 [(match_operand:SVE_FULL 1 "register_operand" "0, w")
8475 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
8476 (match_operand:SVE_FULL 3 "register_operand" "w, w")]
8480 clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
8481 movprfx\t%0, %1\;clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>"
8484 ;; -------------------------------------------------------------------------
8485 ;; ---- [INT] Tree reductions
8486 ;; -------------------------------------------------------------------------
8497 ;; -------------------------------------------------------------------------
8499 ;; Unpredicated integer add reduction.
8500 (define_expand "reduc_plus_scal_<mode>"
8501 [(match_operand:<VEL> 0 "register_operand")
8502 (match_operand:SVE_FULL_I 1 "register_operand")]
8505 rtx pred = aarch64_ptrue_reg (<VPRED>mode);
8506 rtx tmp = <VEL>mode == DImode ? operands[0] : gen_reg_rtx (DImode);
8507 emit_insn (gen_aarch64_pred_reduc_uadd_<mode> (tmp, pred, operands[1]));
8508 if (tmp != operands[0])
8509 emit_move_insn (operands[0], gen_lowpart (<VEL>mode, tmp));
8514 ;; Predicated integer add reduction. The result is always 64-bits.
8515 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8516 [(set (match_operand:DI 0 "register_operand" "=w")
8517 (unspec:DI [(match_operand:<VPRED> 1 "register_operand" "Upl")
8518 (match_operand:SVE_FULL_I 2 "register_operand" "w")]
8520 "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
8521 "<su>addv\t%d0, %1, %2.<Vetype>"
8524 ;; Unpredicated integer reductions.
8525 (define_expand "reduc_<optab>_scal_<mode>"
8526 [(set (match_operand:<VEL> 0 "register_operand")
8527 (unspec:<VEL> [(match_dup 2)
8528 (match_operand:SVE_FULL_I 1 "register_operand")]
8529 SVE_INT_REDUCTION))]
8532 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8536 ;; Predicated integer reductions.
8537 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8538 [(set (match_operand:<VEL> 0 "register_operand" "=w")
8539 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
8540 (match_operand:SVE_FULL_I 2 "register_operand" "w")]
8541 SVE_INT_REDUCTION))]
8543 "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>"
8546 ;; -------------------------------------------------------------------------
8547 ;; ---- [FP] Tree reductions
8548 ;; -------------------------------------------------------------------------
8555 ;; -------------------------------------------------------------------------
8557 ;; Unpredicated floating-point tree reductions.
8558 (define_expand "reduc_<optab>_scal_<mode>"
8559 [(set (match_operand:<VEL> 0 "register_operand")
8560 (unspec:<VEL> [(match_dup 2)
8561 (match_operand:SVE_FULL_F 1 "register_operand")]
8565 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8569 (define_expand "reduc_<fmaxmin>_scal_<mode>"
8570 [(match_operand:<VEL> 0 "register_operand")
8571 (unspec:<VEL> [(match_operand:SVE_FULL_F 1 "register_operand")]
8575 emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
8580 ;; Predicated floating-point tree reductions.
8581 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8582 [(set (match_operand:<VEL> 0 "register_operand" "=w")
8583 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
8584 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8587 "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>"
8590 ;; -------------------------------------------------------------------------
8591 ;; ---- [FP] Left-to-right reductions
8592 ;; -------------------------------------------------------------------------
8595 ;; -------------------------------------------------------------------------
8597 ;; Unpredicated in-order FP reductions.
8598 (define_expand "fold_left_plus_<mode>"
8599 [(set (match_operand:<VEL> 0 "register_operand")
8600 (unspec:<VEL> [(match_dup 3)
8601 (match_operand:<VEL> 1 "register_operand")
8602 (match_operand:SVE_FULL_F 2 "register_operand")]
8606 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
8610 ;; Predicated in-order FP reductions.
8611 (define_insn "mask_fold_left_plus_<mode>"
8612 [(set (match_operand:<VEL> 0 "register_operand" "=w")
8613 (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
8614 (match_operand:<VEL> 1 "register_operand" "0")
8615 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8618 "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
8621 ;; =========================================================================
8623 ;; =========================================================================
8625 ;; -------------------------------------------------------------------------
8626 ;; ---- [INT,FP] General permutes
8627 ;; -------------------------------------------------------------------------
8630 ;; -------------------------------------------------------------------------
8632 (define_expand "vec_perm<mode>"
8633 [(match_operand:SVE_FULL 0 "register_operand")
8634 (match_operand:SVE_FULL 1 "register_operand")
8635 (match_operand:SVE_FULL 2 "register_operand")
8636 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
8637 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
8639 aarch64_expand_sve_vec_perm (operands[0], operands[1],
8640 operands[2], operands[3]);
8645 (define_insn "@aarch64_sve_tbl<mode>"
8646 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
8648 [(match_operand:SVE_FULL 1 "register_operand" "w")
8649 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
8652 "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
8655 ;; -------------------------------------------------------------------------
8656 ;; ---- [INT,FP] Special-purpose unary permutes
8657 ;; -------------------------------------------------------------------------
8662 ;; -------------------------------------------------------------------------
8664 ;; Compact active elements and pad with zeros.
8665 (define_insn "@aarch64_sve_compact<mode>"
8666 [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w")
8668 [(match_operand:<VPRED> 1 "register_operand" "Upl")
8669 (match_operand:SVE_FULL_SD 2 "register_operand" "w")]
8670 UNSPEC_SVE_COMPACT))]
8672 "compact\t%0.<Vetype>, %1, %2.<Vetype>"
8675 ;; Duplicate one element of a vector.
8676 (define_insn "@aarch64_sve_dup_lane<mode>"
8677 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
8678 (vec_duplicate:SVE_ALL
8680 (match_operand:SVE_ALL 1 "register_operand" "w")
8681 (parallel [(match_operand:SI 2 "const_int_operand")]))))]
8683 && IN_RANGE (INTVAL (operands[2]) * <container_bits> / 8, 0, 63)"
8684 "dup\t%0.<Vctype>, %1.<Vctype>[%2]"
8687 ;; Use DUP.Q to duplicate a 128-bit segment of a register.
8689 ;; The vec_select:<V128> sets memory lane number N of the V128 to lane
8690 ;; number op2 + N of op1. (We don't need to distinguish between memory
8691 ;; and architectural register lane numbering for op1 or op0, since the
8692 ;; two numbering schemes are the same for SVE.)
8694 ;; The vec_duplicate:SVE_FULL then copies memory lane number N of the
8695 ;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP
8696 ;; of op0. We therefore get the correct result for both endiannesses.
8698 ;; The wrinkle is that for big-endian V128 registers, memory lane numbering
8699 ;; is in the opposite order to architectural register lane numbering.
8700 ;; Thus if we were to do this operation via a V128 temporary register,
8701 ;; the vec_select and vec_duplicate would both involve a reverse operation
8702 ;; for big-endian targets. In this fused pattern the two reverses cancel
8704 (define_insn "@aarch64_sve_dupq_lane<mode>"
8705 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
8706 (vec_duplicate:SVE_FULL
8708 (match_operand:SVE_FULL 1 "register_operand" "w")
8709 (match_operand 2 "ascending_int_parallel"))))]
8711 && (INTVAL (XVECEXP (operands[2], 0, 0))
8712 * GET_MODE_SIZE (<VEL>mode)) % 16 == 0
8713 && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0))
8714 * GET_MODE_SIZE (<VEL>mode), 0, 63)"
8716 unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0))
8717 * GET_MODE_SIZE (<VEL>mode));
8718 operands[2] = gen_int_mode (byte / 16, DImode);
8719 return "dup\t%0.q, %1.q[%2]";
8723 ;; Reverse the order of elements within a full vector.
8724 (define_insn "@aarch64_sve_rev<mode>"
8725 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
8727 [(match_operand:SVE_ALL 1 "register_operand" "w")]
8730 "rev\t%0.<Vctype>, %1.<Vctype>")
8732 ;; -------------------------------------------------------------------------
8733 ;; ---- [INT,FP] Special-purpose binary permutes
8734 ;; -------------------------------------------------------------------------
8744 ;; -------------------------------------------------------------------------
8746 ;; Like EXT, but start at the first active element.
8747 (define_insn "@aarch64_sve_splice<mode>"
8748 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
8750 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
8751 (match_operand:SVE_FULL 2 "register_operand" "0, w")
8752 (match_operand:SVE_FULL 3 "register_operand" "w, w")]
8753 UNSPEC_SVE_SPLICE))]
8756 splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>
8757 movprfx\t%0, %2\;splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>"
8758 [(set_attr "movprfx" "*, yes")]
8761 ;; Permutes that take half the elements from one vector and half the
8762 ;; elements from the other.
8763 (define_insn "@aarch64_sve_<perm_insn><mode>"
8764 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
8766 [(match_operand:SVE_ALL 1 "register_operand" "w")
8767 (match_operand:SVE_ALL 2 "register_operand" "w")]
8770 "<perm_insn>\t%0.<Vctype>, %1.<Vctype>, %2.<Vctype>"
8773 ;; Apply PERMUTE to 128-bit sequences. The behavior of these patterns
8774 ;; doesn't depend on the mode.
8775 (define_insn "@aarch64_sve_<optab><mode>"
8776 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
8778 [(match_operand:SVE_FULL 1 "register_operand" "w")
8779 (match_operand:SVE_FULL 2 "register_operand" "w")]
8782 "<perm_insn>\t%0.q, %1.q, %2.q"
8785 ;; Concatenate two vectors and extract a subvector. Note that the
8786 ;; immediate (third) operand is the lane index not the byte index.
8787 (define_insn "@aarch64_sve_ext<mode>"
8788 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w")
8790 [(match_operand:SVE_ALL 1 "register_operand" "0, w")
8791 (match_operand:SVE_ALL 2 "register_operand" "w, w")
8792 (match_operand:SI 3 "const_int_operand")]
8795 && IN_RANGE (INTVAL (operands[3]) * <container_bits> / 8, 0, 255)"
8797 operands[3] = GEN_INT (INTVAL (operands[3]) * <container_bits> / 8);
8798 return (which_alternative == 0
8799 ? "ext\\t%0.b, %0.b, %2.b, #%3"
8800 : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3");
8802 [(set_attr "movprfx" "*,yes")]
8805 ;; -------------------------------------------------------------------------
8806 ;; ---- [PRED] Special-purpose unary permutes
8807 ;; -------------------------------------------------------------------------
8810 ;; -------------------------------------------------------------------------
8812 (define_insn "@aarch64_sve_rev<mode>"
8813 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8814 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")]
8817 "rev\t%0.<Vetype>, %1.<Vetype>")
8819 ;; -------------------------------------------------------------------------
8820 ;; ---- [PRED] Special-purpose binary permutes
8821 ;; -------------------------------------------------------------------------
8829 ;; -------------------------------------------------------------------------
8831 ;; Permutes that take half the elements from one vector and half the
8832 ;; elements from the other.
8833 (define_insn "@aarch64_sve_<perm_insn><mode>"
8834 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8835 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
8836 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
8839 "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
8842 ;; Special purpose permute used by the predicate generation instructions.
8843 ;; Unlike the normal permute patterns, these instructions operate on VNx16BI
8844 ;; regardless of the element size, so that all input and output bits are
8845 ;; well-defined. Operand 3 then indicates the size of the permute.
8846 (define_insn "@aarch64_sve_trn1_conv<mode>"
8847 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
8848 (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa")
8849 (match_operand:VNx16BI 2 "register_operand" "Upa")
8850 (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")]
8853 "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
8856 ;; =========================================================================
8858 ;; =========================================================================
8860 ;; -------------------------------------------------------------------------
8861 ;; ---- [INT<-INT] Packs
8862 ;; -------------------------------------------------------------------------
8865 ;; -------------------------------------------------------------------------
8867 ;; Integer pack. Use UZP1 on the narrower type, which discards
8868 ;; the high part of each wide element.
8869 (define_insn "vec_pack_trunc_<Vwide>"
8870 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
8871 (unspec:SVE_FULL_BHSI
8872 [(match_operand:<VWIDE> 1 "register_operand" "w")
8873 (match_operand:<VWIDE> 2 "register_operand" "w")]
8876 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
8879 ;; -------------------------------------------------------------------------
8880 ;; ---- [INT<-INT] Unpacks
8881 ;; -------------------------------------------------------------------------
8887 ;; -------------------------------------------------------------------------
8889 ;; Unpack the low or high half of a vector, where "high" refers to
8890 ;; the low-numbered lanes for big-endian and the high-numbered lanes
8891 ;; for little-endian.
8892 (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_FULL_BHSI:mode>"
8893 [(match_operand:<VWIDE> 0 "register_operand")
8895 [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)]
8898 emit_insn ((<hi_lanes_optab>
8899 ? gen_aarch64_sve_<su>unpkhi_<SVE_FULL_BHSI:mode>
8900 : gen_aarch64_sve_<su>unpklo_<SVE_FULL_BHSI:mode>)
8901 (operands[0], operands[1]));
8906 (define_insn "@aarch64_sve_<su>unpk<perm_hilo>_<SVE_FULL_BHSI:mode>"
8907 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
8909 [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")]
8912 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
8915 ;; -------------------------------------------------------------------------
8916 ;; ---- [INT<-FP] Conversions
8917 ;; -------------------------------------------------------------------------
8921 ;; -------------------------------------------------------------------------
8923 ;; Unpredicated conversion of floats to integers of the same size (HF to HI,
8924 ;; SF to SI or DF to DI).
8925 (define_expand "<optab><mode><v_int_equiv>2"
8926 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
8927 (unspec:<V_INT_EQUIV>
8929 (const_int SVE_RELAXED_GP)
8930 (match_operand:SVE_FULL_F 1 "register_operand")]
8934 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8938 ;; Predicated float-to-integer conversion, either to the same width or wider.
8939 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
8940 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
8941 (unspec:SVE_FULL_HSDI
8942 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
8943 (match_operand:SI 3 "aarch64_sve_gp_strictness")
8944 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
8946 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
8948 fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
8949 movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
8950 [(set_attr "movprfx" "*,yes")]
8953 ;; Predicated narrowing float-to-integer conversion.
8954 (define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
8955 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
8957 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl")
8958 (match_operand:SI 3 "aarch64_sve_gp_strictness")
8959 (match_operand:VNx2DF_ONLY 2 "register_operand" "0, w")]
8963 fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
8964 movprfx\t%0, %2\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
8965 [(set_attr "movprfx" "*,yes")]
8968 ;; Predicated float-to-integer conversion with merging, either to the same
8970 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
8971 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
8972 (unspec:SVE_FULL_HSDI
8973 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
8974 (unspec:SVE_FULL_HSDI
8976 (const_int SVE_STRICT_GP)
8977 (match_operand:SVE_FULL_F 2 "register_operand")]
8979 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
8981 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
8984 ;; The first alternative doesn't need the earlyclobber, but the only case
8985 ;; it would help is the uninteresting one in which operands 2 and 3 are
8986 ;; the same register (despite having different modes). Making all the
8987 ;; alternatives earlyclobber makes things more consistent for the
8988 ;; register allocator.
8989 (define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_relaxed"
8990 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
8991 (unspec:SVE_FULL_HSDI
8992 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8993 (unspec:SVE_FULL_HSDI
8995 (const_int SVE_RELAXED_GP)
8996 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
8998 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9000 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9002 fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9003 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9004 movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
9005 "&& !rtx_equal_p (operands[1], operands[4])"
9007 operands[4] = copy_rtx (operands[1]);
9009 [(set_attr "movprfx" "*,yes,yes")]
9012 (define_insn "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_strict"
9013 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
9014 (unspec:SVE_FULL_HSDI
9015 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
9016 (unspec:SVE_FULL_HSDI
9018 (const_int SVE_STRICT_GP)
9019 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
9021 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9023 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9025 fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9026 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9027 movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
9028 [(set_attr "movprfx" "*,yes,yes")]
9031 ;; Predicated narrowing float-to-integer conversion with merging.
9032 (define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
9033 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
9035 [(match_operand:VNx2BI 1 "register_operand")
9038 (const_int SVE_STRICT_GP)
9039 (match_operand:VNx2DF_ONLY 2 "register_operand")]
9041 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
9046 (define_insn "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
9047 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=&w, &w, ?&w")
9049 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
9052 (match_operand:SI 4 "aarch64_sve_gp_strictness")
9053 (match_operand:VNx2DF_ONLY 2 "register_operand" "w, w, w")]
9055 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9059 fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9060 movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9061 movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
9062 [(set_attr "movprfx" "*,yes,yes")]
9065 ;; -------------------------------------------------------------------------
9066 ;; ---- [INT<-FP] Packs
9067 ;; -------------------------------------------------------------------------
9068 ;; The patterns in this section are synthetic.
9069 ;; -------------------------------------------------------------------------
9071 ;; Convert two vectors of DF to SI and pack the results into a single vector.
9072 (define_expand "vec_pack_<su>fix_trunc_vnx2df"
9076 (const_int SVE_RELAXED_GP)
9077 (match_operand:VNx2DF 1 "register_operand")]
9082 (const_int SVE_RELAXED_GP)
9083 (match_operand:VNx2DF 2 "register_operand")]
9085 (set (match_operand:VNx4SI 0 "register_operand")
9086 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
9089 operands[3] = aarch64_ptrue_reg (VNx2BImode);
9090 operands[4] = gen_reg_rtx (VNx4SImode);
9091 operands[5] = gen_reg_rtx (VNx4SImode);
9095 ;; -------------------------------------------------------------------------
9096 ;; ---- [INT<-FP] Unpacks
9097 ;; -------------------------------------------------------------------------
9098 ;; No patterns here yet!
9099 ;; -------------------------------------------------------------------------
9101 ;; -------------------------------------------------------------------------
9102 ;; ---- [FP<-INT] Conversions
9103 ;; -------------------------------------------------------------------------
9107 ;; -------------------------------------------------------------------------
9109 ;; Unpredicated conversion of integers to floats of the same size
9110 ;; (HI to HF, SI to SF or DI to DF).
9111 (define_expand "<optab><v_int_equiv><mode>2"
9112 [(set (match_operand:SVE_FULL_F 0 "register_operand")
9115 (const_int SVE_RELAXED_GP)
9116 (match_operand:<V_INT_EQUIV> 1 "register_operand")]
9120 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
9124 ;; Predicated integer-to-float conversion, either to the same width or
9126 (define_insn "@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
9127 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
9129 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
9130 (match_operand:SI 3 "aarch64_sve_gp_strictness")
9131 (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")]
9133 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9135 <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9136 movprfx\t%0, %2\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
9137 [(set_attr "movprfx" "*,yes")]
9140 ;; Predicated widening integer-to-float conversion.
9141 (define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9142 [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w")
9144 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl")
9145 (match_operand:SI 3 "aarch64_sve_gp_strictness")
9146 (match_operand:VNx4SI_ONLY 2 "register_operand" "0, w")]
9150 <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9151 movprfx\t%0, %2\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
9152 [(set_attr "movprfx" "*,yes")]
9155 ;; Predicated integer-to-float conversion with merging, either to the same
9156 ;; width or narrower.
9157 (define_expand "@cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
9158 [(set (match_operand:SVE_FULL_F 0 "register_operand")
9160 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9163 (const_int SVE_STRICT_GP)
9164 (match_operand:SVE_FULL_HSDI 2 "register_operand")]
9166 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
9168 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9171 ;; The first alternative doesn't need the earlyclobber, but the only case
9172 ;; it would help is the uninteresting one in which operands 2 and 3 are
9173 ;; the same register (despite having different modes). Making all the
9174 ;; alternatives earlyclobber makes things more consistent for the
9175 ;; register allocator.
9176 (define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_relaxed"
9177 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
9179 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
9182 (const_int SVE_RELAXED_GP)
9183 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
9185 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9187 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9189 <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9190 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9191 movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
9192 "&& !rtx_equal_p (operands[1], operands[4])"
9194 operands[4] = copy_rtx (operands[1]);
9196 [(set_attr "movprfx" "*,yes,yes")]
9199 (define_insn "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_strict"
9200 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
9202 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
9205 (const_int SVE_STRICT_GP)
9206 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
9208 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9210 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9212 <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9213 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9214 movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
9215 [(set_attr "movprfx" "*,yes,yes")]
9218 ;; Predicated widening integer-to-float conversion with merging.
9219 (define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9220 [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
9222 [(match_operand:VNx2BI 1 "register_operand")
9225 (const_int SVE_STRICT_GP)
9226 (match_operand:VNx4SI_ONLY 2 "register_operand")]
9228 (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")]
9233 (define_insn "*cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9234 [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
9236 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
9239 (match_operand:SI 4 "aarch64_sve_gp_strictness")
9240 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")]
9242 (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9246 <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9247 movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9248 movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
9249 [(set_attr "movprfx" "*,yes,yes")]
9252 ;; -------------------------------------------------------------------------
9253 ;; ---- [FP<-INT] Packs
9254 ;; -------------------------------------------------------------------------
9255 ;; No patterns here yet!
9256 ;; -------------------------------------------------------------------------
9258 ;; -------------------------------------------------------------------------
9259 ;; ---- [FP<-INT] Unpacks
9260 ;; -------------------------------------------------------------------------
9261 ;; The patterns in this section are synthetic.
9262 ;; -------------------------------------------------------------------------
9264 ;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI
9265 ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
9266 ;; unpacked VNx4SI to VNx2DF.
9267 (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
9268 [(match_operand:VNx2DF 0 "register_operand")
9270 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
9274 /* Use ZIP to do the unpack, since we don't care about the upper halves
9275 and since it has the nice property of not needing any subregs.
9276 If using UUNPK* turns out to be preferable, we could model it as
9277 a ZIP whose first operand is zero. */
9278 rtx temp = gen_reg_rtx (VNx4SImode);
9279 emit_insn ((<hi_lanes_optab>
9280 ? gen_aarch64_sve_zip2vnx4si
9281 : gen_aarch64_sve_zip1vnx4si)
9282 (temp, operands[1], operands[1]));
9283 rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
9284 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
9285 emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df
9286 (operands[0], ptrue, temp, strictness));
9291 ;; -------------------------------------------------------------------------
9292 ;; ---- [FP<-FP] Packs
9293 ;; -------------------------------------------------------------------------
9296 ;; -------------------------------------------------------------------------
9298 ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
9299 ;; the results into a single vector.
9300 (define_expand "vec_pack_trunc_<Vwide>"
9302 (unspec:SVE_FULL_HSF
9304 (const_int SVE_RELAXED_GP)
9305 (match_operand:<VWIDE> 1 "register_operand")]
9308 (unspec:SVE_FULL_HSF
9310 (const_int SVE_RELAXED_GP)
9311 (match_operand:<VWIDE> 2 "register_operand")]
9313 (set (match_operand:SVE_FULL_HSF 0 "register_operand")
9314 (unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
9317 operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
9318 operands[4] = gen_reg_rtx (<MODE>mode);
9319 operands[5] = gen_reg_rtx (<MODE>mode);
9323 ;; Predicated float-to-float truncation.
9324 (define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9325 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w")
9326 (unspec:SVE_FULL_HSF
9327 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl")
9328 (match_operand:SI 3 "aarch64_sve_gp_strictness")
9329 (match_operand:SVE_FULL_SDF 2 "register_operand" "0, w")]
9331 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9333 fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9334 movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
9335 [(set_attr "movprfx" "*,yes")]
9338 ;; Predicated float-to-float truncation with merging.
9339 (define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9340 [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
9341 (unspec:SVE_FULL_HSF
9342 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9343 (unspec:SVE_FULL_HSF
9345 (const_int SVE_STRICT_GP)
9346 (match_operand:SVE_FULL_SDF 2 "register_operand")]
9348 (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")]
9350 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9353 (define_insn "*cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9354 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w, ?&w")
9355 (unspec:SVE_FULL_HSF
9356 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
9357 (unspec:SVE_FULL_HSF
9359 (match_operand:SI 4 "aarch64_sve_gp_strictness")
9360 (match_operand:SVE_FULL_SDF 2 "register_operand" "w, w, w")]
9362 (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9364 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9366 fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9367 movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9368 movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
9369 [(set_attr "movprfx" "*,yes,yes")]
9372 ;; -------------------------------------------------------------------------
9373 ;; ---- [FP<-FP] Packs (bfloat16)
9374 ;; -------------------------------------------------------------------------
9378 ;; -------------------------------------------------------------------------
9380 ;; Predicated BFCVT.
9381 (define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9382 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w")
9384 [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl")
9385 (match_operand:SI 3 "aarch64_sve_gp_strictness")
9386 (match_operand:VNx4SF_ONLY 2 "register_operand" "0, w")]
9390 bfcvt\t%0.h, %1/m, %2.s
9391 movprfx\t%0, %2\;bfcvt\t%0.h, %1/m, %2.s"
9392 [(set_attr "movprfx" "*,yes")]
9395 ;; Predicated BFCVT with merging.
9396 (define_expand "@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9397 [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
9399 [(match_operand:VNx4BI 1 "register_operand")
9402 (const_int SVE_STRICT_GP)
9403 (match_operand:VNx4SF_ONLY 2 "register_operand")]
9405 (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
9410 (define_insn "*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9411 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
9413 [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl, Upl")
9416 (match_operand:SI 4 "aarch64_sve_gp_strictness")
9417 (match_operand:VNx4SF_ONLY 2 "register_operand" "w, w, w")]
9419 (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9423 bfcvt\t%0.h, %1/m, %2.s
9424 movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s
9425 movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s"
9426 [(set_attr "movprfx" "*,yes,yes")]
9429 ;; Predicated BFCVTNT. This doesn't give a natural aarch64_pred_*/cond_*
9430 ;; pair because the even elements always have to be supplied for active
9431 ;; elements, even if the inactive elements don't matter.
9433 ;; This instructions does not take MOVPRFX.
9434 (define_insn "@aarch64_sve_cvtnt<mode>"
9435 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
9437 [(match_operand:VNx4BI 2 "register_operand" "Upl")
9438 (const_int SVE_STRICT_GP)
9439 (match_operand:VNx8BF_ONLY 1 "register_operand" "0")
9440 (match_operand:VNx4SF 3 "register_operand" "w")]
9441 UNSPEC_COND_FCVTNT))]
9443 "bfcvtnt\t%0.h, %2/m, %3.s"
9446 ;; -------------------------------------------------------------------------
9447 ;; ---- [FP<-FP] Unpacks
9448 ;; -------------------------------------------------------------------------
9451 ;; -------------------------------------------------------------------------
9453 ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
9454 ;; First unpack the source without conversion, then float-convert the
9456 (define_expand "vec_unpacks_<perm_hilo>_<mode>"
9457 [(match_operand:<VWIDE> 0 "register_operand")
9458 (unspec:SVE_FULL_HSF
9459 [(match_operand:SVE_FULL_HSF 1 "register_operand")]
9463 /* Use ZIP to do the unpack, since we don't care about the upper halves
9464 and since it has the nice property of not needing any subregs.
9465 If using UUNPK* turns out to be preferable, we could model it as
9466 a ZIP whose first operand is zero. */
9467 rtx temp = gen_reg_rtx (<MODE>mode);
9468 emit_insn ((<hi_lanes_optab>
9469 ? gen_aarch64_sve_zip2<mode>
9470 : gen_aarch64_sve_zip1<mode>)
9471 (temp, operands[1], operands[1]));
9472 rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
9473 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
9474 emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide>
9475 (operands[0], ptrue, temp, strictness));
9480 ;; Predicated float-to-float extension.
9481 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9482 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w")
9483 (unspec:SVE_FULL_SDF
9484 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl")
9485 (match_operand:SI 3 "aarch64_sve_gp_strictness")
9486 (match_operand:SVE_FULL_HSF 2 "register_operand" "0, w")]
9488 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9490 fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9491 movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
9492 [(set_attr "movprfx" "*,yes")]
9495 ;; Predicated float-to-float extension with merging.
9496 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9497 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
9498 (unspec:SVE_FULL_SDF
9499 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9500 (unspec:SVE_FULL_SDF
9502 (const_int SVE_STRICT_GP)
9503 (match_operand:SVE_FULL_HSF 2 "register_operand")]
9505 (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
9507 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9510 (define_insn "*cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9511 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w, ?&w")
9512 (unspec:SVE_FULL_SDF
9513 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
9514 (unspec:SVE_FULL_SDF
9516 (match_operand:SI 4 "aarch64_sve_gp_strictness")
9517 (match_operand:SVE_FULL_HSF 2 "register_operand" "w, w, w")]
9519 (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9521 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9523 fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9524 movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9525 movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
9526 [(set_attr "movprfx" "*,yes,yes")]
9529 ;; -------------------------------------------------------------------------
9530 ;; ---- [PRED<-PRED] Packs
9531 ;; -------------------------------------------------------------------------
9534 ;; -------------------------------------------------------------------------
9536 ;; Predicate pack. Use UZP1 on the narrower type, which discards
9537 ;; the high part of each wide element.
9538 (define_insn "vec_pack_trunc_<Vwide>"
9539 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
9541 [(match_operand:<VWIDE> 1 "register_operand" "Upa")
9542 (match_operand:<VWIDE> 2 "register_operand" "Upa")]
9545 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
9548 ;; -------------------------------------------------------------------------
9549 ;; ---- [PRED<-PRED] Unpacks
9550 ;; -------------------------------------------------------------------------
9554 ;; -------------------------------------------------------------------------
9556 ;; Unpack the low or high half of a predicate, where "high" refers to
9557 ;; the low-numbered lanes for big-endian and the high-numbered lanes
9558 ;; for little-endian.
9559 (define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
9560 [(match_operand:<VWIDE> 0 "register_operand")
9561 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
9565 emit_insn ((<hi_lanes_optab>
9566 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
9567 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
9568 (operands[0], operands[1]));
9573 (define_insn "@aarch64_sve_punpk<perm_hilo>_<mode>"
9574 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
9575 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
9578 "punpk<perm_hilo>\t%0.h, %1.b"
9581 ;; =========================================================================
9582 ;; == Vector partitioning
9583 ;; =========================================================================
9585 ;; -------------------------------------------------------------------------
9586 ;; ---- [PRED] Unary partitioning
9587 ;; -------------------------------------------------------------------------
9593 ;; -------------------------------------------------------------------------
9595 ;; Note that unlike most other instructions that have both merging and
9596 ;; zeroing forms, these instructions don't operate elementwise and so
9597 ;; don't fit the IFN_COND model.
9598 (define_insn "@aarch64_brk<brk_op>"
9599 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
9601 [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
9602 (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
9603 (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
9607 brk<brk_op>\t%0.b, %1/z, %2.b
9608 brk<brk_op>\t%0.b, %1/m, %2.b"
9611 ;; Same, but also producing a flags result.
9612 (define_insn "*aarch64_brk<brk_op>_cc"
9613 [(set (reg:CC_NZC CC_REGNUM)
9615 [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
9617 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
9620 (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
9621 (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
9624 (set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
9632 brk<brk_op>s\t%0.b, %1/z, %2.b
9633 brk<brk_op>s\t%0.b, %1/m, %2.b"
9636 ;; Same, but with only the flags result being interesting.
9637 (define_insn "*aarch64_brk<brk_op>_ptest"
9638 [(set (reg:CC_NZC CC_REGNUM)
9640 [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
9642 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
9645 (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
9646 (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
9649 (clobber (match_scratch:VNx16BI 0 "=Upa, Upa"))]
9652 brk<brk_op>s\t%0.b, %1/z, %2.b
9653 brk<brk_op>s\t%0.b, %1/m, %2.b"
9656 ;; -------------------------------------------------------------------------
9657 ;; ---- [PRED] Binary partitioning
9658 ;; -------------------------------------------------------------------------
9666 ;; -------------------------------------------------------------------------
9668 ;; Binary BRKs (BRKN, BRKPA, BRKPB).
9669 (define_insn "@aarch64_brk<brk_op>"
9670 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
9672 [(match_operand:VNx16BI 1 "register_operand" "Upa")
9673 (match_operand:VNx16BI 2 "register_operand" "Upa")
9674 (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
9677 "brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
9680 ;; Same, but also producing a flags result.
9681 (define_insn "*aarch64_brk<brk_op>_cc"
9682 [(set (reg:CC_NZC CC_REGNUM)
9684 [(match_operand:VNx16BI 1 "register_operand" "Upa")
9686 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
9689 (match_operand:VNx16BI 2 "register_operand" "Upa")
9690 (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
9693 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
9700 "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
9703 ;; Same, but with only the flags result being interesting.
9704 (define_insn "*aarch64_brk<brk_op>_ptest"
9705 [(set (reg:CC_NZC CC_REGNUM)
9707 [(match_operand:VNx16BI 1 "register_operand" "Upa")
9709 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
9712 (match_operand:VNx16BI 2 "register_operand" "Upa")
9713 (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
9716 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
9718 "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
9721 ;; -------------------------------------------------------------------------
9722 ;; ---- [PRED] Scalarization
9723 ;; -------------------------------------------------------------------------
9727 ;; -------------------------------------------------------------------------
9729 (define_insn "@aarch64_sve_<sve_pred_op><mode>"
9730 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
9732 [(match_operand:PRED_ALL 1 "register_operand" "Upa")
9733 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
9734 (match_operand:PRED_ALL 3 "register_operand" "0")]
9736 (clobber (reg:CC_NZC CC_REGNUM))]
9737 "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
9738 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
9741 ;; Same, but also producing a flags result.
9742 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc"
9743 [(set (reg:CC_NZC CC_REGNUM)
9745 [(match_operand:VNx16BI 1 "register_operand" "Upa")
9747 (match_operand:SI 3 "aarch64_sve_ptrue_flag")
9750 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
9751 (match_operand:PRED_ALL 6 "register_operand" "0")]
9754 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
9761 && <max_elem_bits> >= <elem_bits>
9762 && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
9763 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
9764 "&& !rtx_equal_p (operands[2], operands[4])"
9766 operands[4] = operands[2];
9767 operands[5] = operands[3];
9771 ;; Same, but with only the flags result being interesting.
9772 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest"
9773 [(set (reg:CC_NZC CC_REGNUM)
9775 [(match_operand:VNx16BI 1 "register_operand" "Upa")
9777 (match_operand:SI 3 "aarch64_sve_ptrue_flag")
9780 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
9781 (match_operand:PRED_ALL 6 "register_operand" "0")]
9784 (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
9786 && <max_elem_bits> >= <elem_bits>
9787 && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
9788 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
9789 "&& !rtx_equal_p (operands[2], operands[4])"
9791 operands[4] = operands[2];
9792 operands[5] = operands[3];
9796 ;; =========================================================================
9797 ;; == Counting elements
9798 ;; =========================================================================
9800 ;; -------------------------------------------------------------------------
9801 ;; ---- [INT] Count elements in a pattern (scalar)
9802 ;; -------------------------------------------------------------------------
9808 ;; -------------------------------------------------------------------------
9810 ;; Count the number of elements in an svpattern. Operand 1 is the pattern,
9811 ;; operand 2 is the number of elements that fit in a 128-bit block, and
9812 ;; operand 3 is a multiplier in the range [1, 16].
9814 ;; Note that this pattern isn't used for SV_ALL (but would work for that too).
9815 (define_insn "aarch64_sve_cnt_pat"
9816 [(set (match_operand:DI 0 "register_operand" "=r")
9818 (unspec:SI [(match_operand:DI 1 "const_int_operand")
9819 (match_operand:DI 2 "const_int_operand")
9820 (match_operand:DI 3 "const_int_operand")]
9821 UNSPEC_SVE_CNT_PAT)))]
9824 return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1);
9828 ;; -------------------------------------------------------------------------
9829 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
9830 ;; -------------------------------------------------------------------------
9835 ;; -------------------------------------------------------------------------
9837 ;; Increment a DImode register by the number of elements in an svpattern.
9838 ;; See aarch64_sve_cnt_pat for the counting behavior.
9839 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9840 [(set (match_operand:DI 0 "register_operand" "=r")
9841 (ANY_PLUS:DI (zero_extend:DI
9842 (unspec:SI [(match_operand:DI 2 "const_int_operand")
9843 (match_operand:DI 3 "const_int_operand")
9844 (match_operand:DI 4 "const_int_operand")]
9845 UNSPEC_SVE_CNT_PAT))
9846 (match_operand:DI_ONLY 1 "register_operand" "0")))]
9849 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
9854 ;; Increment an SImode register by the number of elements in an svpattern
9855 ;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting
9857 (define_insn "*aarch64_sve_incsi_pat"
9858 [(set (match_operand:SI 0 "register_operand" "=r")
9859 (plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand")
9860 (match_operand:DI 3 "const_int_operand")
9861 (match_operand:DI 4 "const_int_operand")]
9863 (match_operand:SI 1 "register_operand" "0")))]
9866 return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2);
9870 ;; Increment an SImode register by the number of elements in an svpattern
9871 ;; using saturating arithmetic, extending the result to 64 bits.
9873 ;; See aarch64_sve_cnt_pat for the counting behavior.
9874 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9875 [(set (match_operand:DI 0 "register_operand" "=r")
9878 (unspec:SI [(match_operand:DI 2 "const_int_operand")
9879 (match_operand:DI 3 "const_int_operand")
9880 (match_operand:DI 4 "const_int_operand")]
9882 (match_operand:SI_ONLY 1 "register_operand" "0"))))]
9885 const char *registers = (<CODE> == SS_PLUS ? "%x0, %w0" : "%w0");
9886 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
9891 ;; -------------------------------------------------------------------------
9892 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
9893 ;; -------------------------------------------------------------------------
9898 ;; -------------------------------------------------------------------------
9900 ;; Increment a vector of DIs by the number of elements in an svpattern.
9901 ;; See aarch64_sve_cnt_pat for the counting behavior.
9902 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9903 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
9905 (vec_duplicate:VNx2DI
9907 (unspec:SI [(match_operand:DI 2 "const_int_operand")
9908 (match_operand:DI 3 "const_int_operand")
9909 (match_operand:DI 4 "const_int_operand")]
9910 UNSPEC_SVE_CNT_PAT)))
9911 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
9914 if (which_alternative == 1)
9915 output_asm_insn ("movprfx\t%0, %1", operands);
9916 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
9919 [(set_attr "movprfx" "*,yes")]
9922 ;; Increment a vector of SIs by the number of elements in an svpattern.
9923 ;; See aarch64_sve_cnt_pat for the counting behavior.
9924 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9925 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
9927 (vec_duplicate:VNx4SI
9928 (unspec:SI [(match_operand:DI 2 "const_int_operand")
9929 (match_operand:DI 3 "const_int_operand")
9930 (match_operand:DI 4 "const_int_operand")]
9931 UNSPEC_SVE_CNT_PAT))
9932 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
9935 if (which_alternative == 1)
9936 output_asm_insn ("movprfx\t%0, %1", operands);
9937 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
9940 [(set_attr "movprfx" "*,yes")]
9943 ;; Increment a vector of HIs by the number of elements in an svpattern.
9944 ;; See aarch64_sve_cnt_pat for the counting behavior.
9945 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
9946 [(set (match_operand:VNx8HI 0 "register_operand")
9948 (vec_duplicate:VNx8HI
9950 (unspec:SI [(match_operand:DI 2 "const_int_operand")
9951 (match_operand:DI 3 "const_int_operand")
9952 (match_operand:DI 4 "const_int_operand")]
9953 UNSPEC_SVE_CNT_PAT)))
9954 (match_operand:VNx8HI_ONLY 1 "register_operand")))]
9958 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
9959 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
9961 (vec_duplicate:VNx8HI
9962 (match_operator:HI 5 "subreg_lowpart_operator"
9963 [(unspec:SI [(match_operand:DI 2 "const_int_operand")
9964 (match_operand:DI 3 "const_int_operand")
9965 (match_operand:DI 4 "const_int_operand")]
9966 UNSPEC_SVE_CNT_PAT)]))
9967 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
9970 if (which_alternative == 1)
9971 output_asm_insn ("movprfx\t%0, %1", operands);
9972 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
9975 [(set_attr "movprfx" "*,yes")]
9978 ;; -------------------------------------------------------------------------
9979 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
9980 ;; -------------------------------------------------------------------------
9985 ;; -------------------------------------------------------------------------
9987 ;; Decrement a DImode register by the number of elements in an svpattern.
9988 ;; See aarch64_sve_cnt_pat for the counting behavior.
9989 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9990 [(set (match_operand:DI 0 "register_operand" "=r")
9991 (ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0")
9993 (unspec:SI [(match_operand:DI 2 "const_int_operand")
9994 (match_operand:DI 3 "const_int_operand")
9995 (match_operand:DI 4 "const_int_operand")]
9996 UNSPEC_SVE_CNT_PAT))))]
9999 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
10004 ;; Decrement an SImode register by the number of elements in an svpattern
10005 ;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting
10007 (define_insn "*aarch64_sve_decsi_pat"
10008 [(set (match_operand:SI 0 "register_operand" "=r")
10009 (minus:SI (match_operand:SI 1 "register_operand" "0")
10010 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10011 (match_operand:DI 3 "const_int_operand")
10012 (match_operand:DI 4 "const_int_operand")]
10013 UNSPEC_SVE_CNT_PAT)))]
10016 return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2);
10020 ;; Decrement an SImode register by the number of elements in an svpattern
10021 ;; using saturating arithmetic, extending the result to 64 bits.
10023 ;; See aarch64_sve_cnt_pat for the counting behavior.
10024 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10025 [(set (match_operand:DI 0 "register_operand" "=r")
10026 (<paired_extend>:DI
10028 (match_operand:SI_ONLY 1 "register_operand" "0")
10029 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10030 (match_operand:DI 3 "const_int_operand")
10031 (match_operand:DI 4 "const_int_operand")]
10032 UNSPEC_SVE_CNT_PAT))))]
10035 const char *registers = (<CODE> == SS_MINUS ? "%x0, %w0" : "%w0");
10036 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
10041 ;; -------------------------------------------------------------------------
10042 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
10043 ;; -------------------------------------------------------------------------
10048 ;; -------------------------------------------------------------------------
10050 ;; Decrement a vector of DIs by the number of elements in an svpattern.
10051 ;; See aarch64_sve_cnt_pat for the counting behavior.
10052 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10053 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
10055 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
10056 (vec_duplicate:VNx2DI
10058 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10059 (match_operand:DI 3 "const_int_operand")
10060 (match_operand:DI 4 "const_int_operand")]
10061 UNSPEC_SVE_CNT_PAT)))))]
10064 if (which_alternative == 1)
10065 output_asm_insn ("movprfx\t%0, %1", operands);
10066 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10069 [(set_attr "movprfx" "*,yes")]
10072 ;; Decrement a vector of SIs by the number of elements in an svpattern.
10073 ;; See aarch64_sve_cnt_pat for the counting behavior.
10074 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10075 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
10077 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
10078 (vec_duplicate:VNx4SI
10079 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10080 (match_operand:DI 3 "const_int_operand")
10081 (match_operand:DI 4 "const_int_operand")]
10082 UNSPEC_SVE_CNT_PAT))))]
10085 if (which_alternative == 1)
10086 output_asm_insn ("movprfx\t%0, %1", operands);
10087 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10090 [(set_attr "movprfx" "*,yes")]
10093 ;; Decrement a vector of HIs by the number of elements in an svpattern.
10094 ;; See aarch64_sve_cnt_pat for the counting behavior.
10095 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
10096 [(set (match_operand:VNx8HI 0 "register_operand")
10098 (match_operand:VNx8HI_ONLY 1 "register_operand")
10099 (vec_duplicate:VNx8HI
10101 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10102 (match_operand:DI 3 "const_int_operand")
10103 (match_operand:DI 4 "const_int_operand")]
10104 UNSPEC_SVE_CNT_PAT)))))]
10108 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
10109 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
10111 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
10112 (vec_duplicate:VNx8HI
10113 (match_operator:HI 5 "subreg_lowpart_operator"
10114 [(unspec:SI [(match_operand:DI 2 "const_int_operand")
10115 (match_operand:DI 3 "const_int_operand")
10116 (match_operand:DI 4 "const_int_operand")]
10117 UNSPEC_SVE_CNT_PAT)]))))]
10120 if (which_alternative == 1)
10121 output_asm_insn ("movprfx\t%0, %1", operands);
10122 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10125 [(set_attr "movprfx" "*,yes")]
10128 ;; -------------------------------------------------------------------------
10129 ;; ---- [INT] Count elements in a predicate (scalar)
10130 ;; -------------------------------------------------------------------------
10133 ;; -------------------------------------------------------------------------
10135 ;; Count the number of set bits in a predicate. Operand 3 is true if
10136 ;; operand 1 is known to be all-true.
10137 (define_insn "@aarch64_pred_cntp<mode>"
10138 [(set (match_operand:DI 0 "register_operand" "=r")
10140 (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
10141 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
10142 (match_operand:PRED_ALL 3 "register_operand" "Upa")]
10145 "cntp\t%x0, %1, %3.<Vetype>")
10147 ;; -------------------------------------------------------------------------
10148 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
10149 ;; -------------------------------------------------------------------------
10154 ;; -------------------------------------------------------------------------
10156 ;; Increment a DImode register by the number of set bits in a predicate.
10157 ;; See aarch64_sve_cntp for a description of the operands.
10158 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10159 [(set (match_operand:DI 0 "register_operand")
10162 (unspec:SI [(match_dup 3)
10163 (const_int SVE_KNOWN_PTRUE)
10164 (match_operand:PRED_ALL 2 "register_operand")]
10166 (match_operand:DI_ONLY 1 "register_operand")))]
10169 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10173 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10174 [(set (match_operand:DI 0 "register_operand" "=r")
10177 (unspec:SI [(match_operand 3)
10178 (const_int SVE_KNOWN_PTRUE)
10179 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10181 (match_operand:DI_ONLY 1 "register_operand" "0")))]
10183 "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
10184 "&& !CONSTANT_P (operands[3])"
10186 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10190 ;; Increment an SImode register by the number of set bits in a predicate
10191 ;; using modular arithmetic. See aarch64_sve_cntp for a description of
10193 (define_insn_and_rewrite "*aarch64_incsi<mode>_cntp"
10194 [(set (match_operand:SI 0 "register_operand" "=r")
10196 (unspec:SI [(match_operand 3)
10197 (const_int SVE_KNOWN_PTRUE)
10198 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10200 (match_operand:SI 1 "register_operand" "0")))]
10202 "incp\t%x0, %2.<Vetype>"
10203 "&& !CONSTANT_P (operands[3])"
10205 operands[3] = CONSTM1_RTX (<MODE>mode);
10209 ;; Increment an SImode register by the number of set bits in a predicate
10210 ;; using saturating arithmetic, extending the result to 64 bits.
10212 ;; See aarch64_sve_cntp for a description of the operands.
10213 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10214 [(set (match_operand:DI 0 "register_operand")
10215 (<paired_extend>:DI
10217 (unspec:SI [(match_dup 3)
10218 (const_int SVE_KNOWN_PTRUE)
10219 (match_operand:PRED_ALL 2 "register_operand")]
10221 (match_operand:SI_ONLY 1 "register_operand"))))]
10224 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10228 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10229 [(set (match_operand:DI 0 "register_operand" "=r")
10230 (<paired_extend>:DI
10232 (unspec:SI [(match_operand 3)
10233 (const_int SVE_KNOWN_PTRUE)
10234 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10236 (match_operand:SI_ONLY 1 "register_operand" "0"))))]
10239 if (<CODE> == SS_PLUS)
10240 return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
10242 return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
10244 "&& !CONSTANT_P (operands[3])"
10246 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10250 ;; -------------------------------------------------------------------------
10251 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
10252 ;; -------------------------------------------------------------------------
10257 ;; -------------------------------------------------------------------------
10259 ;; Increment a vector of DIs by the number of set bits in a predicate.
10260 ;; See aarch64_sve_cntp for a description of the operands.
10261 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10262 [(set (match_operand:VNx2DI 0 "register_operand")
10264 (vec_duplicate:VNx2DI
10268 (const_int SVE_KNOWN_PTRUE)
10269 (match_operand:<VPRED> 2 "register_operand")]
10271 (match_operand:VNx2DI_ONLY 1 "register_operand")))]
10274 operands[3] = CONSTM1_RTX (<VPRED>mode);
10278 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10279 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
10281 (vec_duplicate:VNx2DI
10285 (const_int SVE_KNOWN_PTRUE)
10286 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10288 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
10291 <inc_dec>p\t%0.d, %2
10292 movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
10293 "&& !CONSTANT_P (operands[3])"
10295 operands[3] = CONSTM1_RTX (<VPRED>mode);
10297 [(set_attr "movprfx" "*,yes")]
10300 ;; Increment a vector of SIs by the number of set bits in a predicate.
10301 ;; See aarch64_sve_cntp for a description of the operands.
10302 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10303 [(set (match_operand:VNx4SI 0 "register_operand")
10305 (vec_duplicate:VNx4SI
10308 (const_int SVE_KNOWN_PTRUE)
10309 (match_operand:<VPRED> 2 "register_operand")]
10311 (match_operand:VNx4SI_ONLY 1 "register_operand")))]
10314 operands[3] = CONSTM1_RTX (<VPRED>mode);
10318 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10319 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
10321 (vec_duplicate:VNx4SI
10324 (const_int SVE_KNOWN_PTRUE)
10325 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10327 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
10330 <inc_dec>p\t%0.s, %2
10331 movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
10332 "&& !CONSTANT_P (operands[3])"
10334 operands[3] = CONSTM1_RTX (<VPRED>mode);
10336 [(set_attr "movprfx" "*,yes")]
10339 ;; Increment a vector of HIs by the number of set bits in a predicate.
10340 ;; See aarch64_sve_cntp for a description of the operands.
10341 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10342 [(set (match_operand:VNx8HI 0 "register_operand")
10344 (vec_duplicate:VNx8HI
10348 (const_int SVE_KNOWN_PTRUE)
10349 (match_operand:<VPRED> 2 "register_operand")]
10351 (match_operand:VNx8HI_ONLY 1 "register_operand")))]
10354 operands[3] = CONSTM1_RTX (<VPRED>mode);
10358 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10359 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
10361 (vec_duplicate:VNx8HI
10362 (match_operator:HI 3 "subreg_lowpart_operator"
10365 (const_int SVE_KNOWN_PTRUE)
10366 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10368 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
10371 <inc_dec>p\t%0.h, %2
10372 movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
10373 "&& !CONSTANT_P (operands[4])"
10375 operands[4] = CONSTM1_RTX (<VPRED>mode);
10377 [(set_attr "movprfx" "*,yes")]
10380 ;; -------------------------------------------------------------------------
10381 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
10382 ;; -------------------------------------------------------------------------
10387 ;; -------------------------------------------------------------------------
10389 ;; Decrement a DImode register by the number of set bits in a predicate.
10390 ;; See aarch64_sve_cntp for a description of the operands.
10391 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10392 [(set (match_operand:DI 0 "register_operand")
10394 (match_operand:DI_ONLY 1 "register_operand")
10396 (unspec:SI [(match_dup 3)
10397 (const_int SVE_KNOWN_PTRUE)
10398 (match_operand:PRED_ALL 2 "register_operand")]
10402 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10406 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10407 [(set (match_operand:DI 0 "register_operand" "=r")
10409 (match_operand:DI_ONLY 1 "register_operand" "0")
10411 (unspec:SI [(match_operand 3)
10412 (const_int SVE_KNOWN_PTRUE)
10413 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10416 "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
10417 "&& !CONSTANT_P (operands[3])"
10419 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10423 ;; Decrement an SImode register by the number of set bits in a predicate
10424 ;; using modular arithmetic. See aarch64_sve_cntp for a description of the
10426 (define_insn_and_rewrite "*aarch64_decsi<mode>_cntp"
10427 [(set (match_operand:SI 0 "register_operand" "=r")
10429 (match_operand:SI 1 "register_operand" "0")
10430 (unspec:SI [(match_operand 3)
10431 (const_int SVE_KNOWN_PTRUE)
10432 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10435 "decp\t%x0, %2.<Vetype>"
10436 "&& !CONSTANT_P (operands[3])"
10438 operands[3] = CONSTM1_RTX (<MODE>mode);
10442 ;; Decrement an SImode register by the number of set bits in a predicate
10443 ;; using saturating arithmetic, extending the result to 64 bits.
10445 ;; See aarch64_sve_cntp for a description of the operands.
10446 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10447 [(set (match_operand:DI 0 "register_operand")
10448 (<paired_extend>:DI
10450 (match_operand:SI_ONLY 1 "register_operand")
10451 (unspec:SI [(match_dup 3)
10452 (const_int SVE_KNOWN_PTRUE)
10453 (match_operand:PRED_ALL 2 "register_operand")]
10457 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10461 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10462 [(set (match_operand:DI 0 "register_operand" "=r")
10463 (<paired_extend>:DI
10465 (match_operand:SI_ONLY 1 "register_operand" "0")
10466 (unspec:SI [(match_operand 3)
10467 (const_int SVE_KNOWN_PTRUE)
10468 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10472 if (<CODE> == SS_MINUS)
10473 return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
10475 return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
10477 "&& !CONSTANT_P (operands[3])"
10479 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10483 ;; -------------------------------------------------------------------------
10484 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
10485 ;; -------------------------------------------------------------------------
10490 ;; -------------------------------------------------------------------------
10492 ;; Decrement a vector of DIs by the number of set bits in a predicate.
10493 ;; See aarch64_sve_cntp for a description of the operands.
10494 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10495 [(set (match_operand:VNx2DI 0 "register_operand")
10497 (match_operand:VNx2DI_ONLY 1 "register_operand")
10498 (vec_duplicate:VNx2DI
10502 (const_int SVE_KNOWN_PTRUE)
10503 (match_operand:<VPRED> 2 "register_operand")]
10507 operands[3] = CONSTM1_RTX (<VPRED>mode);
10511 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10512 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
10514 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
10515 (vec_duplicate:VNx2DI
10519 (const_int SVE_KNOWN_PTRUE)
10520 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10524 <inc_dec>p\t%0.d, %2
10525 movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
10526 "&& !CONSTANT_P (operands[3])"
10528 operands[3] = CONSTM1_RTX (<VPRED>mode);
10530 [(set_attr "movprfx" "*,yes")]
10533 ;; Decrement a vector of SIs by the number of set bits in a predicate.
10534 ;; See aarch64_sve_cntp for a description of the operands.
10535 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10536 [(set (match_operand:VNx4SI 0 "register_operand")
10538 (match_operand:VNx4SI_ONLY 1 "register_operand")
10539 (vec_duplicate:VNx4SI
10542 (const_int SVE_KNOWN_PTRUE)
10543 (match_operand:<VPRED> 2 "register_operand")]
10547 operands[3] = CONSTM1_RTX (<VPRED>mode);
10551 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10552 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
10554 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
10555 (vec_duplicate:VNx4SI
10558 (const_int SVE_KNOWN_PTRUE)
10559 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10563 <inc_dec>p\t%0.s, %2
10564 movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
10565 "&& !CONSTANT_P (operands[3])"
10567 operands[3] = CONSTM1_RTX (<VPRED>mode);
10569 [(set_attr "movprfx" "*,yes")]
10572 ;; Decrement a vector of HIs by the number of set bits in a predicate.
10573 ;; See aarch64_sve_cntp for a description of the operands.
10574 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10575 [(set (match_operand:VNx8HI 0 "register_operand")
10577 (match_operand:VNx8HI_ONLY 1 "register_operand")
10578 (vec_duplicate:VNx8HI
10582 (const_int SVE_KNOWN_PTRUE)
10583 (match_operand:<VPRED> 2 "register_operand")]
10587 operands[3] = CONSTM1_RTX (<VPRED>mode);
10591 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10592 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
10594 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
10595 (vec_duplicate:VNx8HI
10596 (match_operator:HI 3 "subreg_lowpart_operator"
10599 (const_int SVE_KNOWN_PTRUE)
10600 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10604 <inc_dec>p\t%0.h, %2
10605 movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
10606 "&& !CONSTANT_P (operands[4])"
10608 operands[4] = CONSTM1_RTX (<VPRED>mode);
10610 [(set_attr "movprfx" "*,yes")]