]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/config/aarch64/aarch64-sve.md
Update copyright years.
[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-sve.md
CommitLineData
43cacb12 1;; Machine description for AArch64 SVE.
7adcbafe 2;; Copyright (C) 2009-2022 Free Software Foundation, Inc.
43cacb12
RS
3;; Contributed by ARM Ltd.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3. If not see
19;; <http://www.gnu.org/licenses/>.
20
915d28fe
RS
21;; The file is organised into the following sections (search for the full
22;; line):
23;;
24;; == General notes
25;; ---- Note on the handling of big-endian SVE
34467289 26;; ---- Description of UNSPEC_PTEST
00fa90d9 27;; ---- Description of UNSPEC_PRED_Z
06308276 28;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
c9c5a809 29;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
624d0f07 30;; ---- Note on FFR handling
915d28fe
RS
31;;
32;; == Moves
33;; ---- Moves of single vectors
34;; ---- Moves of multiple vectors
35;; ---- Moves of predicates
624d0f07 36;; ---- Moves relating to the FFR
915d28fe
RS
37;;
38;; == Loads
39;; ---- Normal contiguous loads
624d0f07
RS
40;; ---- Extending contiguous loads
41;; ---- First-faulting contiguous loads
42;; ---- First-faulting extending contiguous loads
43;; ---- Non-temporal contiguous loads
915d28fe 44;; ---- Normal gather loads
624d0f07
RS
45;; ---- Extending gather loads
46;; ---- First-faulting gather loads
47;; ---- First-faulting extending gather loads
48;;
49;; == Prefetches
50;; ---- Contiguous prefetches
51;; ---- Gather prefetches
915d28fe
RS
52;;
53;; == Stores
54;; ---- Normal contiguous stores
624d0f07
RS
55;; ---- Truncating contiguous stores
56;; ---- Non-temporal contiguous stores
915d28fe 57;; ---- Normal scatter stores
624d0f07 58;; ---- Truncating scatter stores
915d28fe
RS
59;;
60;; == Vector creation
61;; ---- [INT,FP] Duplicate element
62;; ---- [INT,FP] Initialize from individual elements
63;; ---- [INT] Linear series
64;; ---- [PRED] Duplicate element
65;;
66;; == Vector decomposition
67;; ---- [INT,FP] Extract index
68;; ---- [INT,FP] Extract active element
69;; ---- [PRED] Extract index
70;;
71;; == Unary arithmetic
72;; ---- [INT] General unary arithmetic corresponding to rtx codes
d7a09c44 73;; ---- [INT] General unary arithmetic corresponding to unspecs
e58703e2 74;; ---- [INT] Sign and zero extension
2d56600c 75;; ---- [INT] Truncation
e0a0be93 76;; ---- [INT] Logical inverse
624d0f07 77;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
d45b20a5 78;; ---- [FP] General unary arithmetic corresponding to unspecs
a0ee8352
RS
79;; ---- [FP] Square root
80;; ---- [FP] Reciprocal square root
915d28fe
RS
81;; ---- [PRED] Inverse
82
83;; == Binary arithmetic
84;; ---- [INT] General binary arithmetic corresponding to rtx codes
85;; ---- [INT] Addition
86;; ---- [INT] Subtraction
a229966c 87;; ---- [INT] Take address
915d28fe 88;; ---- [INT] Absolute difference
624d0f07 89;; ---- [INT] Saturating addition and subtraction
915d28fe
RS
90;; ---- [INT] Highpart multiplication
91;; ---- [INT] Division
92;; ---- [INT] Binary logical operations
93;; ---- [INT] Binary logical operations (inverted second input)
624d0f07 94;; ---- [INT] Shifts (rounding towards -Inf)
c0c2f013 95;; ---- [INT] Shifts (rounding towards 0)
624d0f07 96;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
915d28fe
RS
97;; ---- [FP] General binary arithmetic corresponding to rtx codes
98;; ---- [FP] General binary arithmetic corresponding to unspecs
99;; ---- [FP] Addition
624d0f07 100;; ---- [FP] Complex addition
915d28fe
RS
101;; ---- [FP] Subtraction
102;; ---- [FP] Absolute difference
103;; ---- [FP] Multiplication
04f307cb 104;; ---- [FP] Division
915d28fe
RS
105;; ---- [FP] Binary logical operations
106;; ---- [FP] Sign copying
107;; ---- [FP] Maximum and minimum
108;; ---- [PRED] Binary logical operations
109;; ---- [PRED] Binary logical operations (inverted second input)
110;; ---- [PRED] Binary logical operations (inverted result)
111;;
112;; == Ternary arithmetic
113;; ---- [INT] MLA and MAD
114;; ---- [INT] MLS and MSB
115;; ---- [INT] Dot product
116;; ---- [INT] Sum of absolute differences
36696774 117;; ---- [INT] Matrix multiply-accumulate
915d28fe 118;; ---- [FP] General ternary arithmetic corresponding to unspecs
624d0f07
RS
119;; ---- [FP] Complex multiply-add
120;; ---- [FP] Trigonometric multiply-add
896dff99 121;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
36696774 122;; ---- [FP] Matrix multiply-accumulate
915d28fe
RS
123;;
124;; == Comparisons and selects
125;; ---- [INT,FP] Select based on predicates
126;; ---- [INT,FP] Compare and select
127;; ---- [INT] Comparisons
128;; ---- [INT] While tests
42b4e87d
RS
129;; ---- [FP] Direct comparisons
130;; ---- [FP] Absolute comparisons
624d0f07 131;; ---- [PRED] Select
915d28fe
RS
132;; ---- [PRED] Test bits
133;;
134;; == Reductions
135;; ---- [INT,FP] Conditional reductions
136;; ---- [INT] Tree reductions
137;; ---- [FP] Tree reductions
138;; ---- [FP] Left-to-right reductions
139;;
140;; == Permutes
141;; ---- [INT,FP] General permutes
142;; ---- [INT,FP] Special-purpose unary permutes
143;; ---- [INT,FP] Special-purpose binary permutes
28350fd1 144;; ---- [PRED] Special-purpose unary permutes
915d28fe
RS
145;; ---- [PRED] Special-purpose binary permutes
146;;
147;; == Conversions
148;; ---- [INT<-INT] Packs
149;; ---- [INT<-INT] Unpacks
150;; ---- [INT<-FP] Conversions
151;; ---- [INT<-FP] Packs
152;; ---- [INT<-FP] Unpacks
153;; ---- [FP<-INT] Conversions
154;; ---- [FP<-INT] Packs
155;; ---- [FP<-INT] Unpacks
156;; ---- [FP<-FP] Packs
896dff99 157;; ---- [FP<-FP] Packs (bfloat16)
915d28fe
RS
158;; ---- [FP<-FP] Unpacks
159;; ---- [PRED<-PRED] Packs
160;; ---- [PRED<-PRED] Unpacks
624d0f07
RS
161;;
162;; == Vector partitioning
163;; ---- [PRED] Unary partitioning
164;; ---- [PRED] Binary partitioning
165;; ---- [PRED] Scalarization
166;;
167;; == Counting elements
168;; ---- [INT] Count elements in a pattern (scalar)
169;; ---- [INT] Increment by the number of elements in a pattern (scalar)
170;; ---- [INT] Increment by the number of elements in a pattern (vector)
171;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
172;; ---- [INT] Decrement by the number of elements in a pattern (vector)
173;; ---- [INT] Count elements in a predicate (scalar)
174;; ---- [INT] Increment by the number of elements in a predicate (scalar)
175;; ---- [INT] Increment by the number of elements in a predicate (vector)
176;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
177;; ---- [INT] Decrement by the number of elements in a predicate (vector)
915d28fe
RS
178
179;; =========================================================================
180;; == General notes
181;; =========================================================================
182;;
183;; -------------------------------------------------------------------------
184;; ---- Note on the handling of big-endian SVE
185;; -------------------------------------------------------------------------
43cacb12
RS
186;;
187;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
188;; same way as movdi or movti would: the first byte of memory goes
189;; into the most significant byte of the register and the last byte
190;; of memory goes into the least significant byte of the register.
191;; This is the most natural ordering for Advanced SIMD and matches
192;; the ABI layout for 64-bit and 128-bit vector types.
193;;
194;; As a result, the order of bytes within the register is what GCC
195;; expects for a big-endian target, and subreg offsets therefore work
196;; as expected, with the first element in memory having subreg offset 0
197;; and the last element in memory having the subreg offset associated
198;; with a big-endian lowpart. However, this ordering also means that
199;; GCC's lane numbering does not match the architecture's numbering:
200;; GCC always treats the element at the lowest address in memory
201;; (subreg offset 0) as element 0, while the architecture treats
202;; the least significant end of the register as element 0.
203;;
204;; The situation for SVE is different. We want the layout of the
205;; SVE register to be same for mov<mode> as it is for maskload<mode>:
206;; logically, a mov<mode> load must be indistinguishable from a
207;; maskload<mode> whose mask is all true. We therefore need the
208;; register layout to match LD1 rather than LDR. The ABI layout of
209;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
210;;
211;; As a result, the architecture lane numbering matches GCC's lane
212;; numbering, with element 0 always being the first in memory.
213;; However:
214;;
215;; - Applying a subreg offset to a register does not give the element
216;; that GCC expects: the first element in memory has the subreg offset
217;; associated with a big-endian lowpart while the last element in memory
218;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
219;;
220;; - We cannot use LDR and STR for spill slots that might be accessed
221;; via subregs, since although the elements have the order GCC expects,
222;; the order of the bytes within the elements is different. We instead
223;; access spill slots via LD1 and ST1, using secondary reloads to
224;; reserve a predicate register.
34467289
RS
225;;
226;; -------------------------------------------------------------------------
227;; ---- Description of UNSPEC_PTEST
228;; -------------------------------------------------------------------------
229;;
230;; SVE provides a PTEST instruction for testing the active lanes of a
231;; predicate and setting the flags based on the result. The associated
232;; condition code tests are:
233;;
234;; - any (= ne): at least one active bit is set
235;; - none (= eq): all active bits are clear (*)
236;; - first (= mi): the first active bit is set
237;; - nfrst (= pl): the first active bit is clear (*)
238;; - last (= cc): the last active bit is set
239;; - nlast (= cs): the last active bit is clear (*)
240;;
241;; where the conditions marked (*) are also true when there are no active
242;; lanes (i.e. when the governing predicate is a PFALSE). The flags results
243;; of a PTEST use the condition code mode CC_NZC.
244;;
245;; PTEST is always a .B operation (i.e. it always operates on VNx16BI).
246;; This means that for other predicate modes, we need a governing predicate
247;; in which all bits are defined.
248;;
249;; For example, most predicated .H operations ignore the odd bits of the
250;; governing predicate, so that an active lane is represented by the
251;; bits "1x" and an inactive lane by the bits "0x", where "x" can be
252;; any value. To test a .H predicate, we instead need "10" and "00"
253;; respectively, so that the condition only tests the even bits of the
254;; predicate.
255;;
256;; Several instructions set the flags as a side-effect, in the same way
257;; that a separate PTEST would. It's important for code quality that we
258;; use these flags results as often as possible, particularly in the case
259;; of WHILE* and RDFFR.
260;;
261;; Also, some of the instructions that set the flags are unpredicated
262;; and instead implicitly test all .B, .H, .S or .D elements, as though
263;; they were predicated on a PTRUE of that size. For example, a .S
264;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE
265;; would.
266;;
267;; We therefore need to represent PTEST operations in a way that
268;; makes it easy to combine them with both predicated and unpredicated
269;; operations, while using a VNx16BI governing predicate for all
270;; predicate modes. We do this using:
271;;
272;; (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST)
273;;
274;; where:
275;;
276;; - GP is the real VNx16BI governing predicate
277;;
278;; - CAST_GP is GP cast to the mode of OP. All bits dropped by casting
279;; GP to CAST_GP are guaranteed to be clear in GP.
280;;
281;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
282;; SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and
283;; SVE_MAYBE_NOT_PTRUE otherwise.
284;;
285;; - OP is the predicate we want to test, of the same mode as CAST_GP.
c9c5a809
RS
286;;
287;; -------------------------------------------------------------------------
00fa90d9
RS
288;; ---- Description of UNSPEC_PRED_Z
289;; -------------------------------------------------------------------------
290;;
291;; SVE integer comparisons are predicated and return zero for inactive
292;; lanes. Sometimes we use them with predicates that are all-true and
293;; sometimes we use them with general predicates.
294;;
295;; The integer comparisons also set the flags and so build-in the effect
296;; of a PTEST. We therefore want to be able to combine integer comparison
297;; patterns with PTESTs of the result. One difficulty with doing this is
298;; that (as noted above) the PTEST is always a .B operation and so can place
299;; stronger requirements on the governing predicate than the comparison does.
300;;
301;; For example, when applying a separate PTEST to the result of a full-vector
302;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a
303;; .B PTRUE. In constrast, the comparison might be predicated on either
304;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate
305;; bits don't matter for .H operations.
306;;
307;; We therefore can't rely on a full-vector comparison using the same
308;; predicate register as a following PTEST. We instead need to remember
309;; whether a comparison is known to be a full-vector comparison and use
310;; this information in addition to a check for equal predicate registers.
311;; At the same time, it's useful to have a common representation for all
312;; integer comparisons, so that they can be handled by a single set of
313;; patterns.
314;;
315;; We therefore take a similar approach to UNSPEC_PTEST above and use:
316;;
317;; (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z)
318;;
319;; where:
320;;
321;; - GP is the governing predicate, of mode <M:VPRED>
322;;
323;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
324;; SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE
325;; otherwise
326;;
327;; - CODE is the comparison code
328;;
329;; - OP0 and OP1 are the values being compared, of mode M
330;;
331;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero.
332;;
333;; -------------------------------------------------------------------------
06308276
RS
334;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
335;; -------------------------------------------------------------------------
336;;
337;; Many SVE integer operations are predicated. We can generate them
338;; from four sources:
339;;
340;; (1) Using normal unpredicated optabs. In this case we need to create
341;; an all-true predicate register to act as the governing predicate
342;; for the SVE instruction. There are no inactive lanes, and thus
343;; the values of inactive lanes don't matter.
344;;
345;; (2) Using _x ACLE functions. In this case the function provides a
346;; specific predicate and some lanes might be inactive. However,
347;; as for (1), the values of the inactive lanes don't matter.
348;; We can make extra lanes active without changing the behavior
349;; (although for code-quality reasons we should avoid doing so
350;; needlessly).
351;;
352;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
353;; These optabs have a predicate operand that specifies which lanes are
354;; active and another operand that provides the values of inactive lanes.
355;;
356;; (4) Using _m and _z ACLE functions. These functions map to the same
357;; patterns as (3), with the _z functions setting inactive lanes to zero
358;; and the _m functions setting the inactive lanes to one of the function
359;; arguments.
360;;
361;; For (1) and (2) we need a way of attaching the predicate to a normal
362;; unpredicated integer operation. We do this using:
363;;
364;; (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X)
365;;
366;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED
367;; is a predicate of mode <M:VPRED>. PRED might or might not be a PTRUE;
368;; it always is for (1), but might not be for (2).
369;;
370;; The unspec as a whole has the same value as (code:M ...) when PRED is
371;; all-true. It is always semantically valid to replace PRED with a PTRUE,
372;; but as noted above, we should only do so if there's a specific benefit.
373;;
374;; (The "_X" in the unspec is named after the ACLE functions in (2).)
375;;
376;; For (3) and (4) we can simply use the SVE port's normal representation
377;; of a predicate-based select:
378;;
379;; (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL)
380;;
381;; where INACTIVE specifies the values of inactive lanes.
382;;
383;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather
384;; than inserting the integer operation directly. This is mostly useful
385;; if we want the combine pass to merge an integer operation with an explicit
386;; vcond_mask (in other words, with a following SEL instruction). However,
387;; it's generally better to merge such operations at the gimple level
388;; using (3).
389;;
390;; -------------------------------------------------------------------------
c9c5a809
RS
391;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
392;; -------------------------------------------------------------------------
393;;
394;; Most SVE floating-point operations are predicated. We can generate
395;; them from four sources:
396;;
397;; (1) Using normal unpredicated optabs. In this case we need to create
398;; an all-true predicate register to act as the governing predicate
399;; for the SVE instruction. There are no inactive lanes, and thus
400;; the values of inactive lanes don't matter.
401;;
402;; (2) Using _x ACLE functions. In this case the function provides a
403;; specific predicate and some lanes might be inactive. However,
404;; as for (1), the values of the inactive lanes don't matter.
405;;
406;; The instruction must have the same exception behavior as the
407;; function call unless things like command-line flags specifically
408;; allow otherwise. For example, with -ffast-math, it is OK to
409;; raise exceptions for inactive lanes, but normally it isn't.
410;;
411;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
412;; These optabs have a predicate operand that specifies which lanes are
413;; active and another operand that provides the values of inactive lanes.
414;;
415;; (4) Using _m and _z ACLE functions. These functions map to the same
416;; patterns as (3), with the _z functions setting inactive lanes to zero
417;; and the _m functions setting the inactive lanes to one of the function
418;; arguments.
419;;
420;; So:
421;;
422;; - In (1), the predicate is known to be all true and the pattern can use
423;; unpredicated operations where available.
424;;
425;; - In (2), the predicate might or might not be all true. The pattern can
426;; use unpredicated instructions if the predicate is all-true or if things
427;; like command-line flags allow exceptions for inactive lanes.
428;;
429;; - (3) and (4) represent a native SVE predicated operation. Some lanes
430;; might be inactive and inactive lanes of the result must have specific
431;; values. There is no scope for using unpredicated instructions (and no
432;; reason to want to), so the question about command-line flags doesn't
433;; arise.
434;;
435;; It would be inaccurate to model (2) as an rtx code like (sqrt ...)
436;; in combination with a separate predicate operand, e.g.
437;;
438;; (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl")
f75cdd2c 439;; (sqrt:SVE_FULL_F 2 "register_operand" "w")]
c9c5a809
RS
440;; ....)
441;;
442;; because (sqrt ...) can raise an exception for any lane, including
443;; inactive ones. We therefore need to use an unspec instead.
444;;
445;; Also, (2) requires some way of distinguishing the case in which the
446;; predicate might have inactive lanes and cannot be changed from the
447;; case in which the predicate has no inactive lanes or can be changed.
448;; This information is also useful when matching combined FP patterns
449;; in which the predicates might not be equal.
450;;
451;; We therefore model FP operations as an unspec of the form:
452;;
453;; (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>)
454;;
455;; where:
456;;
457;; - PRED is the governing predicate.
458;;
459;; - STRICTNESS is a CONST_INT that conceptually has mode SI. It has the
460;; value SVE_STRICT_GP if PRED might have inactive lanes and if those
461;; lanes must remain inactive. It has the value SVE_RELAXED_GP otherwise.
462;;
463;; - OP0 OP1 ... are the normal input operands to the operation.
464;;
465;; - MNEMONIC is the mnemonic of the associated SVE instruction.
624d0f07 466;;
0eb5e901
RS
467;; For (3) and (4), we combine these operations with an UNSPEC_SEL
468;; that selects between the result of the FP operation and the "else"
469;; value. (This else value is a merge input for _m ACLE functions
470;; and zero for _z ACLE functions.) The outer pattern then has the form:
471;;
472;; (unspec [pred fp_operation else_value] UNSPEC_SEL)
473;;
474;; This means that the patterns for (3) and (4) have two predicates:
475;; one for the FP operation itself and one for the UNSPEC_SEL.
476;; This pattern is equivalent to the result of combining an instance
477;; of (1) or (2) with a separate vcond instruction, so these patterns
478;; are useful as combine targets too.
479;;
480;; However, in the combine case, the instructions that we want to
481;; combine might use different predicates. Then:
482;;
483;; - Some of the active lanes of the FP operation might be discarded
484;; by the UNSPEC_SEL. It's OK to drop the FP operation on those lanes,
485;; even for SVE_STRICT_GP, since the operations on those lanes are
486;; effectively dead code.
487;;
488;; - Some of the inactive lanes of the FP operation might be selected
489;; by the UNSPEC_SEL, giving unspecified values for those lanes.
490;; SVE_RELAXED_GP lets us extend the FP operation to cover these
491;; extra lanes, but SVE_STRICT_GP does not.
492;;
493;; Thus SVE_RELAXED_GP allows us to ignore the predicate on the FP operation
494;; and operate on exactly the lanes selected by the UNSPEC_SEL predicate.
495;; This typically leads to patterns like:
496;;
497;; (unspec [(match_operand 1 "register_operand" "Upl")
498;; (unspec [(match_operand N)
499;; (const_int SVE_RELAXED_GP)
500;; ...]
501;; UNSPEC_COND_<MNEMONIC>)
502;; ...])
503;;
504;; where operand N is allowed to be anything. These instructions then
505;; have rewrite rules to replace operand N with operand 1, which gives the
506;; instructions a canonical form and means that the original operand N is
507;; not kept live unnecessarily.
508;;
509;; In contrast, SVE_STRICT_GP only allows the UNSPEC_SEL predicate to be
510;; a subset of the FP operation predicate. This case isn't interesting
511;; for FP operations that have an all-true predicate, since such operations
512;; use SVE_RELAXED_GP instead. And it is not possible for instruction
513;; conditions to track the subset relationship for arbitrary registers.
514;; So in practice, the only useful case for SVE_STRICT_GP is the one
515;; in which the predicates match:
516;;
517;; (unspec [(match_operand 1 "register_operand" "Upl")
518;; (unspec [(match_dup 1)
519;; (const_int SVE_STRICT_GP)
520;; ...]
521;; UNSPEC_COND_<MNEMONIC>)
522;; ...])
523;;
524;; This pattern would also be correct for SVE_RELAXED_GP, but it would
525;; be redundant with the one above. However, if the combine pattern
526;; has multiple FP operations, using a match_operand allows combinations
527;; of SVE_STRICT_GP and SVE_RELAXED_GP in the same operation, provided
528;; that the predicates are the same:
529;;
530;; (unspec [(match_operand 1 "register_operand" "Upl")
531;; (...
532;; (unspec [(match_dup 1)
533;; (match_operand:SI N "aarch64_sve_gp_strictness")
534;; ...]
535;; UNSPEC_COND_<MNEMONIC1>)
536;; (unspec [(match_dup 1)
537;; (match_operand:SI M "aarch64_sve_gp_strictness")
538;; ...]
539;; UNSPEC_COND_<MNEMONIC2>) ...)
540;; ...])
541;;
542;; The fully-relaxed version of this pattern is:
543;;
544;; (unspec [(match_operand 1 "register_operand" "Upl")
545;; (...
546;; (unspec [(match_operand:SI N)
547;; (const_int SVE_RELAXED_GP)
548;; ...]
549;; UNSPEC_COND_<MNEMONIC1>)
550;; (unspec [(match_operand:SI M)
551;; (const_int SVE_RELAXED_GP)
552;; ...]
553;; UNSPEC_COND_<MNEMONIC2>) ...)
554;; ...])
555;;
624d0f07
RS
556;; -------------------------------------------------------------------------
557;; ---- Note on FFR handling
558;; -------------------------------------------------------------------------
559;;
560;; Logically we want to divide FFR-related instructions into regions
561;; that contain exactly one of:
562;;
563;; - a single write to the FFR
564;; - any number of reads from the FFR (but only one read is likely)
565;; - any number of LDFF1 and LDNF1 instructions
566;;
567;; However, LDFF1 and LDNF1 instructions should otherwise behave like
568;; normal loads as far as possible. This means that they should be
569;; schedulable within a region in the same way that LD1 would be,
570;; and they should be deleted as dead if the result is unused. The loads
571;; should therefore not write to the FFR, since that would both serialize
572;; the loads with respect to each other and keep the loads live for any
573;; later RDFFR.
574;;
575;; We get around this by using a fake "FFR token" (FFRT) to help describe
576;; the dependencies. Writing to the FFRT starts a new "FFRT region",
577;; while using the FFRT keeps the instruction within its region.
578;; Specifically:
579;;
580;; - Writes start a new FFRT region as well as setting the FFR:
581;;
582;; W1: parallel (FFRT = <new value>, FFR = <actual FFR value>)
583;;
584;; - Loads use an LD1-like instruction that also uses the FFRT, so that the
585;; loads stay within the same FFRT region:
586;;
587;; L1: load data while using the FFRT
588;;
589;; In addition, any FFRT region that includes a load also has at least one
590;; instance of:
591;;
592;; L2: FFR = update(FFR, FFRT) [type == no_insn]
593;;
594;; to make it clear that the region both reads from and writes to the FFR.
595;;
596;; - Reads do the following:
597;;
598;; R1: FFRT = FFR [type == no_insn]
599;; R2: read from the FFRT
600;; R3: FFRT = update(FFRT) [type == no_insn]
601;;
602;; R1 and R3 both create new FFRT regions, so that previous LDFF1s and
603;; LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s
604;; cannot move backwards across R3.
605;;
606;; This way, writes are only kept alive by later loads or reads,
607;; and write/read pairs fold normally. For two consecutive reads,
608;; the first R3 is made dead by the second R1, which in turn becomes
609;; redundant with the first R1. We then have:
610;;
611;; first R1: FFRT = FFR
612;; first read from the FFRT
613;; second read from the FFRT
614;; second R3: FFRT = update(FFRT)
615;;
616;; i.e. the two FFRT regions collapse into a single one with two
617;; independent reads.
618;;
619;; The model still prevents some valid optimizations though. For example,
620;; if all loads in an FFRT region are deleted as dead, nothing would remove
621;; the L2 instructions.
43cacb12 622
915d28fe
RS
623;; =========================================================================
624;; == Moves
625;; =========================================================================
626
627;; -------------------------------------------------------------------------
628;; ---- Moves of single vectors
629;; -------------------------------------------------------------------------
630;; Includes:
631;; - MOV (including aliases)
632;; - LD1B (contiguous form)
633;; - LD1D ( " " )
634;; - LD1H ( " " )
635;; - LD1W ( " " )
636;; - LDR
637;; - ST1B (contiguous form)
638;; - ST1D ( " " )
639;; - ST1H ( " " )
640;; - ST1W ( " " )
641;; - STR
642;; -------------------------------------------------------------------------
643
43cacb12 644(define_expand "mov<mode>"
cc68f7c2
RS
645 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
646 (match_operand:SVE_ALL 1 "general_operand"))]
43cacb12
RS
647 "TARGET_SVE"
648 {
649 /* Use the predicated load and store patterns where possible.
650 This is required for big-endian targets (see the comment at the
651 head of the file) and increases the addressing choices for
652 little-endian. */
653 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
ea403d8b 654 && can_create_pseudo_p ())
43cacb12
RS
655 {
656 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
657 DONE;
658 }
659
660 if (CONSTANT_P (operands[1]))
661 {
4aeb1ba7 662 aarch64_expand_mov_immediate (operands[0], operands[1]);
43cacb12
RS
663 DONE;
664 }
002092be
RS
665
666 /* Optimize subregs on big-endian targets: we can use REV[BHW]
667 instead of going through memory. */
668 if (BYTES_BIG_ENDIAN
ea403d8b 669 && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
002092be
RS
670 DONE;
671 }
672)
673
915d28fe 674(define_expand "movmisalign<mode>"
cc68f7c2
RS
675 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
676 (match_operand:SVE_ALL 1 "general_operand"))]
915d28fe 677 "TARGET_SVE"
002092be 678 {
915d28fe
RS
679 /* Equivalent to a normal move for our purpooses. */
680 emit_move_insn (operands[0], operands[1]);
002092be 681 DONE;
43cacb12
RS
682 }
683)
684
cc68f7c2
RS
685;; Unpredicated moves that can use LDR and STR, i.e. full vectors for which
686;; little-endian ordering is acceptable. Only allow memory operations during
687;; and after RA; before RA we want the predicated load and store patterns to
688;; be used instead.
689(define_insn "*aarch64_sve_mov<mode>_ldr_str"
f75cdd2c
RS
690 [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
691 (match_operand:SVE_FULL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
43cacb12 692 "TARGET_SVE
c600df9a 693 && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
43cacb12
RS
694 && ((lra_in_progress || reload_completed)
695 || (register_operand (operands[0], <MODE>mode)
696 && nonmemory_operand (operands[1], <MODE>mode)))"
697 "@
698 ldr\t%0, %1
699 str\t%1, %0
700 mov\t%0.d, %1.d
701 * return aarch64_output_sve_mov_immediate (operands[1]);"
702)
703
cc68f7c2
RS
704;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
705;; or vectors for which little-endian ordering isn't acceptable. Memory
706;; accesses require secondary reloads.
707(define_insn "*aarch64_sve_mov<mode>_no_ldr_str"
708 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
709 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
710 "TARGET_SVE
711 && <MODE>mode != VNx16QImode
712 && (BYTES_BIG_ENDIAN
713 || maybe_ne (BYTES_PER_SVE_VECTOR, GET_MODE_SIZE (<MODE>mode)))"
43cacb12
RS
714 "@
715 mov\t%0.d, %1.d
716 * return aarch64_output_sve_mov_immediate (operands[1]);"
717)
718
cc68f7c2
RS
719;; Handle memory reloads for modes that can't use LDR and STR. We use
720;; byte PTRUE for all modes to try to encourage reuse. This pattern
721;; needs constraints because it is returned by TARGET_SECONDARY_RELOAD.
722(define_expand "aarch64_sve_reload_mem"
43cacb12
RS
723 [(parallel
724 [(set (match_operand 0)
ea403d8b 725 (match_operand 1))
43cacb12 726 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
cc68f7c2 727 "TARGET_SVE"
43cacb12
RS
728 {
729 /* Create a PTRUE. */
730 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
731
732 /* Refer to the PTRUE in the appropriate mode for this move. */
733 machine_mode mode = GET_MODE (operands[0]);
cc68f7c2 734 rtx pred = gen_lowpart (aarch64_sve_pred_mode (mode), operands[2]);
43cacb12
RS
735
736 /* Emit a predicated load or store. */
737 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
738 DONE;
739 }
740)
741
915d28fe
RS
742;; A predicated move in which the predicate is known to be all-true.
743;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
744;; so changes to this pattern will need changes there as well.
0c63a8ee 745(define_insn_and_split "@aarch64_pred_mov<mode>"
cc68f7c2
RS
746 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
747 (unspec:SVE_ALL
9c6b4601 748 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
cc68f7c2 749 (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")]
06308276 750 UNSPEC_PRED_X))]
43cacb12
RS
751 "TARGET_SVE
752 && (register_operand (operands[0], <MODE>mode)
753 || register_operand (operands[2], <MODE>mode))"
754 "@
9c6b4601 755 #
cc68f7c2
RS
756 ld1<Vesize>\t%0.<Vctype>, %1/z, %2
757 st1<Vesize>\t%2.<Vctype>, %1, %0"
9c6b4601
RS
758 "&& register_operand (operands[0], <MODE>mode)
759 && register_operand (operands[2], <MODE>mode)"
760 [(set (match_dup 0) (match_dup 2))]
43cacb12
RS
761)
762
915d28fe
RS
763;; A pattern for optimizing SUBREGs that have a reinterpreting effect
764;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
765;; for details. We use a special predicate for operand 2 to reduce
766;; the number of patterns.
767(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
cc68f7c2
RS
768 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
769 (unspec:SVE_ALL
915d28fe
RS
770 [(match_operand:VNx16BI 1 "register_operand" "Upl")
771 (match_operand 2 "aarch64_any_register_operand" "w")]
772 UNSPEC_REV_SUBREG))]
773 "TARGET_SVE && BYTES_BIG_ENDIAN"
774 "#"
775 "&& reload_completed"
776 [(const_int 0)]
f307441a 777 {
915d28fe
RS
778 aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
779 DONE;
f307441a
RS
780 }
781)
782
4aeb1ba7
RS
783;; Reinterpret operand 1 in operand 0's mode, without changing its contents.
784;; This is equivalent to a subreg on little-endian targets but not for
785;; big-endian; see the comment at the head of the file for details.
786(define_expand "@aarch64_sve_reinterpret<mode>"
cc68f7c2
RS
787 [(set (match_operand:SVE_ALL 0 "register_operand")
788 (unspec:SVE_ALL
f75cdd2c
RS
789 [(match_operand 1 "aarch64_any_register_operand")]
790 UNSPEC_REINTERPRET))]
4aeb1ba7
RS
791 "TARGET_SVE"
792 {
b23c6a2c
RS
793 machine_mode src_mode = GET_MODE (operands[1]);
794 if (targetm.can_change_mode_class (<MODE>mode, src_mode, FP_REGS))
4aeb1ba7
RS
795 {
796 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1]));
797 DONE;
798 }
799 }
800)
801
802;; A pattern for handling type punning on big-endian targets. We use a
803;; special predicate for operand 1 to reduce the number of patterns.
804(define_insn_and_split "*aarch64_sve_reinterpret<mode>"
cc68f7c2
RS
805 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
806 (unspec:SVE_ALL
f75cdd2c
RS
807 [(match_operand 1 "aarch64_any_register_operand" "w")]
808 UNSPEC_REINTERPRET))]
4aeb1ba7
RS
809 "TARGET_SVE"
810 "#"
811 "&& reload_completed"
812 [(set (match_dup 0) (match_dup 1))]
813 {
624d0f07 814 operands[1] = aarch64_replace_reg_mode (operands[1], <MODE>mode);
4aeb1ba7
RS
815 }
816)
817
915d28fe
RS
818;; -------------------------------------------------------------------------
819;; ---- Moves of multiple vectors
820;; -------------------------------------------------------------------------
821;; All patterns in this section are synthetic and split to real
822;; instructions after reload.
823;; -------------------------------------------------------------------------
f307441a 824
9f4cbab8
RS
825(define_expand "mov<mode>"
826 [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
827 (match_operand:SVE_STRUCT 1 "general_operand"))]
828 "TARGET_SVE"
829 {
830 /* Big-endian loads and stores need to be done via LD1 and ST1;
831 see the comment at the head of the file for details. */
832 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
833 && BYTES_BIG_ENDIAN)
834 {
835 gcc_assert (can_create_pseudo_p ());
836 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
837 DONE;
838 }
839
840 if (CONSTANT_P (operands[1]))
841 {
842 aarch64_expand_mov_immediate (operands[0], operands[1]);
843 DONE;
844 }
845 }
846)
847
848;; Unpredicated structure moves (little-endian).
849(define_insn "*aarch64_sve_mov<mode>_le"
850 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
851 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
852 "TARGET_SVE && !BYTES_BIG_ENDIAN"
853 "#"
854 [(set_attr "length" "<insn_length>")]
855)
856
857;; Unpredicated structure moves (big-endian). Memory accesses require
858;; secondary reloads.
915d28fe 859(define_insn "*aarch64_sve_mov<mode>_be"
9f4cbab8
RS
860 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
861 (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
862 "TARGET_SVE && BYTES_BIG_ENDIAN"
863 "#"
864 [(set_attr "length" "<insn_length>")]
865)
866
867;; Split unpredicated structure moves into pieces. This is the same
868;; for both big-endian and little-endian code, although it only needs
869;; to handle memory operands for little-endian code.
870(define_split
871 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
872 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
873 "TARGET_SVE && reload_completed"
874 [(const_int 0)]
875 {
876 rtx dest = operands[0];
877 rtx src = operands[1];
878 if (REG_P (dest) && REG_P (src))
879 aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
880 else
881 for (unsigned int i = 0; i < <vector_count>; ++i)
882 {
883 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
884 i * BYTES_PER_SVE_VECTOR);
885 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
886 i * BYTES_PER_SVE_VECTOR);
887 emit_insn (gen_rtx_SET (subdest, subsrc));
888 }
889 DONE;
890 }
891)
892
893;; Predicated structure moves. This works for both endiannesses but in
894;; practice is only useful for big-endian.
0c63a8ee 895(define_insn_and_split "@aarch64_pred_mov<mode>"
9c6b4601 896 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
9f4cbab8 897 (unspec:SVE_STRUCT
9c6b4601
RS
898 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
899 (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
06308276 900 UNSPEC_PRED_X))]
9f4cbab8
RS
901 "TARGET_SVE
902 && (register_operand (operands[0], <MODE>mode)
903 || register_operand (operands[2], <MODE>mode))"
904 "#"
905 "&& reload_completed"
906 [(const_int 0)]
907 {
908 for (unsigned int i = 0; i < <vector_count>; ++i)
909 {
910 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
911 <MODE>mode,
912 i * BYTES_PER_SVE_VECTOR);
913 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
914 <MODE>mode,
915 i * BYTES_PER_SVE_VECTOR);
916 aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
917 }
918 DONE;
919 }
920 [(set_attr "length" "<insn_length>")]
921)
922
915d28fe
RS
923;; -------------------------------------------------------------------------
924;; ---- Moves of predicates
925;; -------------------------------------------------------------------------
926;; Includes:
927;; - MOV
928;; - LDR
929;; - PFALSE
930;; - PTRUE
624d0f07 931;; - PTRUES
915d28fe
RS
932;; - STR
933;; -------------------------------------------------------------------------
934
43cacb12
RS
935(define_expand "mov<mode>"
936 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
937 (match_operand:PRED_ALL 1 "general_operand"))]
938 "TARGET_SVE"
939 {
940 if (GET_CODE (operands[0]) == MEM)
941 operands[1] = force_reg (<MODE>mode, operands[1]);
0b1fe8cf
RS
942
943 if (CONSTANT_P (operands[1]))
944 {
945 aarch64_expand_mov_immediate (operands[0], operands[1]);
946 DONE;
947 }
43cacb12
RS
948 }
949)
950
951(define_insn "*aarch64_sve_mov<mode>"
1044fa32 952 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa")
0b1fe8cf 953 (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))]
43cacb12
RS
954 "TARGET_SVE
955 && (register_operand (operands[0], <MODE>mode)
956 || register_operand (operands[1], <MODE>mode))"
957 "@
958 mov\t%0.b, %1.b
959 str\t%1, %0
960 ldr\t%0, %1
1044fa32 961 * return aarch64_output_sve_mov_immediate (operands[1]);"
43cacb12
RS
962)
963
624d0f07
RS
964;; Match PTRUES Pn.B when both the predicate and flags are useful.
965(define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc"
966 [(set (reg:CC_NZC CC_REGNUM)
967 (unspec:CC_NZC
968 [(match_operand 2)
969 (match_operand 3)
970 (const_int SVE_KNOWN_PTRUE)
971 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
972 [(unspec:VNx16BI
973 [(match_operand:SI 4 "const_int_operand")
974 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
975 UNSPEC_PTRUE)])]
976 UNSPEC_PTEST))
977 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
978 (match_dup 1))]
979 "TARGET_SVE"
980 {
981 return aarch64_output_sve_ptrues (operands[1]);
982 }
983 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
984 {
985 operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
986 }
987)
988
989;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful.
990(define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_cc"
991 [(set (reg:CC_NZC CC_REGNUM)
992 (unspec:CC_NZC
993 [(match_operand 2)
994 (match_operand 3)
995 (const_int SVE_KNOWN_PTRUE)
996 (subreg:PRED_HSD
997 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
998 [(unspec:VNx16BI
999 [(match_operand:SI 4 "const_int_operand")
1000 (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
1001 UNSPEC_PTRUE)]) 0)]
1002 UNSPEC_PTEST))
1003 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1004 (match_dup 1))]
1005 "TARGET_SVE"
1006 {
1007 return aarch64_output_sve_ptrues (operands[1]);
1008 }
1009 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1010 {
1011 operands[2] = CONSTM1_RTX (VNx16BImode);
1012 operands[3] = CONSTM1_RTX (<MODE>mode);
1013 }
1014)
1015
1016;; Match PTRUES Pn.B when only the flags result is useful (which is
1017;; a way of testing VL).
1018(define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest"
1019 [(set (reg:CC_NZC CC_REGNUM)
1020 (unspec:CC_NZC
1021 [(match_operand 2)
1022 (match_operand 3)
1023 (const_int SVE_KNOWN_PTRUE)
1024 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1025 [(unspec:VNx16BI
1026 [(match_operand:SI 4 "const_int_operand")
1027 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
1028 UNSPEC_PTRUE)])]
1029 UNSPEC_PTEST))
1030 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1031 "TARGET_SVE"
1032 {
1033 return aarch64_output_sve_ptrues (operands[1]);
1034 }
1035 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1036 {
1037 operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
1038 }
1039)
1040
1041;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is
1042;; a way of testing VL).
1043(define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest"
1044 [(set (reg:CC_NZC CC_REGNUM)
1045 (unspec:CC_NZC
1046 [(match_operand 2)
1047 (match_operand 3)
1048 (const_int SVE_KNOWN_PTRUE)
1049 (subreg:PRED_HSD
1050 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1051 [(unspec:VNx16BI
1052 [(match_operand:SI 4 "const_int_operand")
1053 (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
1054 UNSPEC_PTRUE)]) 0)]
1055 UNSPEC_PTEST))
1056 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1057 "TARGET_SVE"
1058 {
1059 return aarch64_output_sve_ptrues (operands[1]);
1060 }
1061 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1062 {
1063 operands[2] = CONSTM1_RTX (VNx16BImode);
1064 operands[3] = CONSTM1_RTX (<MODE>mode);
1065 }
1066)
1067
1068;; -------------------------------------------------------------------------
1069;; ---- Moves relating to the FFR
1070;; -------------------------------------------------------------------------
1071;; RDFFR
1072;; RDFFRS
1073;; SETFFR
1074;; WRFFR
1075;; -------------------------------------------------------------------------
1076
1077;; [W1 in the block comment above about FFR handling]
1078;;
1079;; Write to the FFR and start a new FFRT scheduling region.
1080(define_insn "aarch64_wrffr"
1081 [(set (reg:VNx16BI FFR_REGNUM)
1082 (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one" "Dm, Upa"))
1083 (set (reg:VNx16BI FFRT_REGNUM)
4ec943d6 1084 (unspec:VNx16BI [(match_dup 0)] UNSPEC_WRFFR))]
624d0f07
RS
1085 "TARGET_SVE"
1086 "@
1087 setffr
1088 wrffr\t%0.b"
1089)
1090
1091;; [L2 in the block comment above about FFR handling]
1092;;
1093;; Introduce a read from and write to the FFR in the current FFRT region,
1094;; so that the FFR value is live on entry to the region and so that the FFR
1095;; value visibly changes within the region. This is used (possibly multiple
1096;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions.
1097(define_insn "aarch64_update_ffr_for_load"
1098 [(set (reg:VNx16BI FFR_REGNUM)
1099 (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)
1100 (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))]
1101 "TARGET_SVE"
1102 ""
1103 [(set_attr "type" "no_insn")]
1104)
1105
1106;; [R1 in the block comment above about FFR handling]
1107;;
1108;; Notionally copy the FFR to the FFRT, so that the current FFR value
1109;; can be read from there by the RDFFR instructions below. This acts
1110;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and
1111;; creates a natural dependency with earlier writes.
1112(define_insn "aarch64_copy_ffr_to_ffrt"
1113 [(set (reg:VNx16BI FFRT_REGNUM)
1114 (reg:VNx16BI FFR_REGNUM))]
1115 "TARGET_SVE"
1116 ""
1117 [(set_attr "type" "no_insn")]
1118)
1119
1120;; [R2 in the block comment above about FFR handling]
1121;;
1122;; Read the FFR via the FFRT.
1123(define_insn "aarch64_rdffr"
1124 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1125 (reg:VNx16BI FFRT_REGNUM))]
1126 "TARGET_SVE"
1127 "rdffr\t%0.b"
1128)
1129
1130;; Likewise with zero predication.
1131(define_insn "aarch64_rdffr_z"
1132 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1133 (and:VNx16BI
1134 (reg:VNx16BI FFRT_REGNUM)
1135 (match_operand:VNx16BI 1 "register_operand" "Upa")))]
1136 "TARGET_SVE"
1137 "rdffr\t%0.b, %1/z"
1138)
1139
1140;; Read the FFR to test for a fault, without using the predicate result.
1141(define_insn "*aarch64_rdffr_z_ptest"
1142 [(set (reg:CC_NZC CC_REGNUM)
1143 (unspec:CC_NZC
1144 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1145 (match_dup 1)
1146 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1147 (and:VNx16BI
1148 (reg:VNx16BI FFRT_REGNUM)
1149 (match_dup 1))]
1150 UNSPEC_PTEST))
1151 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1152 "TARGET_SVE"
1153 "rdffrs\t%0.b, %1/z"
1154)
1155
1156;; Same for unpredicated RDFFR when tested with a known PTRUE.
1157(define_insn "*aarch64_rdffr_ptest"
1158 [(set (reg:CC_NZC CC_REGNUM)
1159 (unspec:CC_NZC
1160 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1161 (match_dup 1)
1162 (const_int SVE_KNOWN_PTRUE)
1163 (reg:VNx16BI FFRT_REGNUM)]
1164 UNSPEC_PTEST))
1165 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1166 "TARGET_SVE"
1167 "rdffrs\t%0.b, %1/z"
1168)
1169
1170;; Read the FFR with zero predication and test the result.
1171(define_insn "*aarch64_rdffr_z_cc"
1172 [(set (reg:CC_NZC CC_REGNUM)
1173 (unspec:CC_NZC
1174 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1175 (match_dup 1)
1176 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1177 (and:VNx16BI
1178 (reg:VNx16BI FFRT_REGNUM)
1179 (match_dup 1))]
1180 UNSPEC_PTEST))
1181 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1182 (and:VNx16BI
1183 (reg:VNx16BI FFRT_REGNUM)
1184 (match_dup 1)))]
1185 "TARGET_SVE"
1186 "rdffrs\t%0.b, %1/z"
1187)
1188
1189;; Same for unpredicated RDFFR when tested with a known PTRUE.
1190(define_insn "*aarch64_rdffr_cc"
1191 [(set (reg:CC_NZC CC_REGNUM)
1192 (unspec:CC_NZC
1193 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1194 (match_dup 1)
1195 (const_int SVE_KNOWN_PTRUE)
1196 (reg:VNx16BI FFRT_REGNUM)]
1197 UNSPEC_PTEST))
1198 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1199 (reg:VNx16BI FFRT_REGNUM))]
1200 "TARGET_SVE"
1201 "rdffrs\t%0.b, %1/z"
1202)
1203
1204;; [R3 in the block comment above about FFR handling]
1205;;
1206;; Arbitrarily update the FFRT after a read from the FFR. This acts as
1207;; a scheduling barrier for later LDFF1 and LDNF1 instructions.
1208(define_insn "aarch64_update_ffrt"
1209 [(set (reg:VNx16BI FFRT_REGNUM)
1210 (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))]
1211 "TARGET_SVE"
1212 ""
1213 [(set_attr "type" "no_insn")]
1214)
1215
915d28fe
RS
1216;; =========================================================================
1217;; == Loads
1218;; =========================================================================
1219
1220;; -------------------------------------------------------------------------
1221;; ---- Normal contiguous loads
1222;; -------------------------------------------------------------------------
1223;; Includes contiguous forms of:
1224;; - LD1B
1225;; - LD1D
1226;; - LD1H
1227;; - LD1W
1228;; - LD2B
1229;; - LD2D
1230;; - LD2H
1231;; - LD2W
1232;; - LD3B
1233;; - LD3D
1234;; - LD3H
1235;; - LD3W
1236;; - LD4B
1237;; - LD4D
1238;; - LD4H
1239;; - LD4W
1240;; -------------------------------------------------------------------------
1241
1242;; Predicated LD1.
1243(define_insn "maskload<mode><vpred>"
cc68f7c2
RS
1244 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1245 (unspec:SVE_ALL
915d28fe 1246 [(match_operand:<VPRED> 2 "register_operand" "Upl")
cc68f7c2 1247 (match_operand:SVE_ALL 1 "memory_operand" "m")]
915d28fe 1248 UNSPEC_LD1_SVE))]
43cacb12 1249 "TARGET_SVE"
cc68f7c2 1250 "ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
43cacb12
RS
1251)
1252
915d28fe
RS
1253;; Unpredicated LD[234].
1254(define_expand "vec_load_lanes<mode><vsingle>"
1255 [(set (match_operand:SVE_STRUCT 0 "register_operand")
1256 (unspec:SVE_STRUCT
1257 [(match_dup 2)
1258 (match_operand:SVE_STRUCT 1 "memory_operand")]
1259 UNSPEC_LDN))]
43cacb12
RS
1260 "TARGET_SVE"
1261 {
915d28fe 1262 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
1263 }
1264)
1265
915d28fe
RS
1266;; Predicated LD[234].
1267(define_insn "vec_mask_load_lanes<mode><vsingle>"
1268 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
1269 (unspec:SVE_STRUCT
1270 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1271 (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
1272 UNSPEC_LDN))]
8711e791 1273 "TARGET_SVE"
915d28fe 1274 "ld<vector_count><Vesize>\t%0, %2/z, %1"
8711e791
RS
1275)
1276
624d0f07
RS
1277;; -------------------------------------------------------------------------
1278;; ---- Extending contiguous loads
1279;; -------------------------------------------------------------------------
1280;; Includes contiguous forms of:
1281;; LD1B
1282;; LD1H
1283;; LD1SB
1284;; LD1SH
1285;; LD1SW
1286;; LD1W
1287;; -------------------------------------------------------------------------
1288
1289;; Predicated load and extend, with 8 elements per 128-bit block.
7bb4b7a5 1290(define_insn_and_rewrite "@aarch64_load<SVE_PRED_LOAD:pred_load>_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
217ccab8
RS
1291 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1292 (unspec:SVE_HSDI
1293 [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1294 (ANY_EXTEND:SVE_HSDI
1295 (unspec:SVE_PARTIAL_I
1296 [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1297 (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
7bb4b7a5 1298 SVE_PRED_LOAD))]
217ccab8
RS
1299 UNSPEC_PRED_X))]
1300 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1301 "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1302 "&& !CONSTANT_P (operands[3])"
1303 {
1304 operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1305 }
624d0f07
RS
1306)
1307
1308;; -------------------------------------------------------------------------
1309;; ---- First-faulting contiguous loads
1310;; -------------------------------------------------------------------------
1311;; Includes contiguous forms of:
1312;; - LDFF1B
1313;; - LDFF1D
1314;; - LDFF1H
1315;; - LDFF1W
1316;; - LDNF1B
1317;; - LDNF1D
1318;; - LDNF1H
1319;; - LDNF1W
1320;; -------------------------------------------------------------------------
1321
1322;; Contiguous non-extending first-faulting or non-faulting loads.
1323(define_insn "@aarch64_ld<fn>f1<mode>"
f75cdd2c
RS
1324 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1325 (unspec:SVE_FULL
624d0f07 1326 [(match_operand:<VPRED> 2 "register_operand" "Upl")
f75cdd2c 1327 (match_operand:SVE_FULL 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
624d0f07
RS
1328 (reg:VNx16BI FFRT_REGNUM)]
1329 SVE_LDFF1_LDNF1))]
1330 "TARGET_SVE"
1331 "ld<fn>f1<Vesize>\t%0.<Vetype>, %2/z, %1"
1332)
1333
1334;; -------------------------------------------------------------------------
1335;; ---- First-faulting extending contiguous loads
1336;; -------------------------------------------------------------------------
1337;; Includes contiguous forms of:
1338;; - LDFF1B
1339;; - LDFF1H
1340;; - LDFF1SB
1341;; - LDFF1SH
1342;; - LDFF1SW
1343;; - LDFF1W
1344;; - LDNF1B
1345;; - LDNF1H
1346;; - LDNF1SB
1347;; - LDNF1SH
1348;; - LDNF1SW
1349;; - LDNF1W
1350;; -------------------------------------------------------------------------
1351
217ccab8
RS
1352;; Predicated first-faulting or non-faulting load and extend.
1353(define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1354 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1355 (unspec:SVE_HSDI
1356 [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1357 (ANY_EXTEND:SVE_HSDI
1358 (unspec:SVE_PARTIAL_I
1359 [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1360 (match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1361 (reg:VNx16BI FFRT_REGNUM)]
1362 SVE_LDFF1_LDNF1))]
1363 UNSPEC_PRED_X))]
1364 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1365 "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1366 "&& !CONSTANT_P (operands[3])"
1367 {
1368 operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1369 }
624d0f07
RS
1370)
1371
1372;; -------------------------------------------------------------------------
1373;; ---- Non-temporal contiguous loads
1374;; -------------------------------------------------------------------------
1375;; Includes:
1376;; - LDNT1B
1377;; - LDNT1D
1378;; - LDNT1H
1379;; - LDNT1W
1380;; -------------------------------------------------------------------------
1381
1382;; Predicated contiguous non-temporal load.
1383(define_insn "@aarch64_ldnt1<mode>"
f75cdd2c
RS
1384 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1385 (unspec:SVE_FULL
624d0f07 1386 [(match_operand:<VPRED> 2 "register_operand" "Upl")
f75cdd2c 1387 (match_operand:SVE_FULL 1 "memory_operand" "m")]
624d0f07
RS
1388 UNSPEC_LDNT1_SVE))]
1389 "TARGET_SVE"
1390 "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1"
1391)
1392
915d28fe
RS
1393;; -------------------------------------------------------------------------
1394;; ---- Normal gather loads
1395;; -------------------------------------------------------------------------
1396;; Includes gather forms of:
1397;; - LD1D
1398;; - LD1W
1399;; -------------------------------------------------------------------------
1400
1401;; Unpredicated gather loads.
f8186eea
RS
1402(define_expand "gather_load<mode><v_int_container>"
1403 [(set (match_operand:SVE_24 0 "register_operand")
1404 (unspec:SVE_24
915d28fe 1405 [(match_dup 5)
624d0f07 1406 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
f8186eea 1407 (match_operand:<V_INT_CONTAINER> 2 "register_operand")
915d28fe
RS
1408 (match_operand:DI 3 "const_int_operand")
1409 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1410 (mem:BLK (scratch))]
1411 UNSPEC_LD1_GATHER))]
1412 "TARGET_SVE"
43cacb12 1413 {
915d28fe 1414 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12 1415 }
43cacb12
RS
1416)
1417
915d28fe
RS
1418;; Predicated gather loads for 32-bit elements. Operand 3 is true for
1419;; unsigned extension and false for signed extension.
f8186eea
RS
1420(define_insn "mask_gather_load<mode><v_int_container>"
1421 [(set (match_operand:SVE_4 0 "register_operand" "=w, w, w, w, w, w")
1422 (unspec:SVE_4
624d0f07 1423 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
f8186eea 1424 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
624d0f07
RS
1425 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1426 (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
f8186eea 1427 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
915d28fe
RS
1428 (mem:BLK (scratch))]
1429 UNSPEC_LD1_GATHER))]
1430 "TARGET_SVE"
1431 "@
f8186eea
RS
1432 ld1<Vesize>\t%0.s, %5/z, [%2.s]
1433 ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1434 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1435 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1436 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1437 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
915d28fe
RS
1438)
1439
1440;; Predicated gather loads for 64-bit elements. The value of operand 3
1441;; doesn't matter in this case.
f8186eea
RS
1442(define_insn "mask_gather_load<mode><v_int_container>"
1443 [(set (match_operand:SVE_2 0 "register_operand" "=w, w, w, w")
1444 (unspec:SVE_2
624d0f07 1445 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
f8186eea 1446 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
624d0f07 1447 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
915d28fe 1448 (match_operand:DI 3 "const_int_operand")
f8186eea 1449 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
915d28fe
RS
1450 (mem:BLK (scratch))]
1451 UNSPEC_LD1_GATHER))]
1452 "TARGET_SVE"
1453 "@
f8186eea
RS
1454 ld1<Vesize>\t%0.d, %5/z, [%2.d]
1455 ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1456 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1457 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
915d28fe
RS
1458)
1459
f8186eea
RS
1460;; Likewise, but with the offset being extended from 32 bits.
1461(define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked"
1462 [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1463 (unspec:SVE_2
624d0f07
RS
1464 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1465 (match_operand:DI 1 "register_operand" "rk, rk")
1466 (unspec:VNx2DI
f8186eea
RS
1467 [(match_operand 6)
1468 (ANY_EXTEND:VNx2DI
1469 (match_operand:VNx2SI 2 "register_operand" "w, w"))]
1470 UNSPEC_PRED_X)
1471 (match_operand:DI 3 "const_int_operand")
1472 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1473 (mem:BLK (scratch))]
1474 UNSPEC_LD1_GATHER))]
1475 "TARGET_SVE"
1476 "@
1477 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw]
1478 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]"
1479 "&& !CONSTANT_P (operands[6])"
1480 {
1481 operands[6] = CONSTM1_RTX (VNx2BImode);
1482 }
1483)
1484
1485;; Likewise, but with the offset being truncated to 32 bits and then
1486;; sign-extended.
1487(define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_sxtw"
1488 [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1489 (unspec:SVE_2
1490 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1491 (match_operand:DI 1 "register_operand" "rk, rk")
1492 (unspec:VNx2DI
1493 [(match_operand 6)
624d0f07
RS
1494 (sign_extend:VNx2DI
1495 (truncate:VNx2SI
1496 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1497 UNSPEC_PRED_X)
1498 (match_operand:DI 3 "const_int_operand")
f8186eea 1499 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
624d0f07
RS
1500 (mem:BLK (scratch))]
1501 UNSPEC_LD1_GATHER))]
1502 "TARGET_SVE"
1503 "@
f8186eea
RS
1504 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1505 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1506 "&& !CONSTANT_P (operands[6])"
1507 {
1508 operands[6] = CONSTM1_RTX (VNx2BImode);
1509 }
624d0f07
RS
1510)
1511
f8186eea
RS
1512;; Likewise, but with the offset being truncated to 32 bits and then
1513;; zero-extended.
1514(define_insn "*mask_gather_load<mode><v_int_container>_uxtw"
1515 [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1516 (unspec:SVE_2
624d0f07
RS
1517 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1518 (match_operand:DI 1 "register_operand" "rk, rk")
1519 (and:VNx2DI
1520 (match_operand:VNx2DI 2 "register_operand" "w, w")
1521 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1522 (match_operand:DI 3 "const_int_operand")
f8186eea 1523 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
624d0f07
RS
1524 (mem:BLK (scratch))]
1525 UNSPEC_LD1_GATHER))]
1526 "TARGET_SVE"
1527 "@
f8186eea
RS
1528 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1529 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
624d0f07 1530)
915d28fe
RS
1531
1532;; -------------------------------------------------------------------------
624d0f07 1533;; ---- Extending gather loads
915d28fe 1534;; -------------------------------------------------------------------------
624d0f07
RS
1535;; Includes gather forms of:
1536;; - LD1B
1537;; - LD1H
1538;; - LD1SB
1539;; - LD1SH
1540;; - LD1SW
1541;; - LD1W
915d28fe
RS
1542;; -------------------------------------------------------------------------
1543
624d0f07
RS
1544;; Predicated extending gather loads for 32-bit elements. Operand 3 is
1545;; true for unsigned extension and false for signed extension.
87a80d27
RS
1546(define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>"
1547 [(set (match_operand:SVE_4HSI 0 "register_operand" "=w, w, w, w, w, w")
1548 (unspec:SVE_4HSI
1549 [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1550 (ANY_EXTEND:SVE_4HSI
1551 (unspec:SVE_4BHI
1552 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1553 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_4BHI:Vesize>" "Z, vg<SVE_4BHI:Vesize>, rk, rk, rk, rk")
1554 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1555 (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
1556 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1557 (mem:BLK (scratch))]
1558 UNSPEC_LD1_GATHER))]
1559 UNSPEC_PRED_X))]
1560 "TARGET_SVE && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0"
624d0f07 1561 "@
87a80d27
RS
1562 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s]
1563 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1564 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1565 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1566 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1567 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1568 "&& !CONSTANT_P (operands[6])"
1569 {
1570 operands[6] = CONSTM1_RTX (VNx4BImode);
1571 }
624d0f07
RS
1572)
1573
1574;; Predicated extending gather loads for 64-bit elements. The value of
1575;; operand 3 doesn't matter in this case.
87a80d27
RS
1576(define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>"
1577 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w, w, w")
1578 (unspec:SVE_2HSDI
1579 [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1580 (ANY_EXTEND:SVE_2HSDI
1581 (unspec:SVE_2BHSI
1582 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1583 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_2BHSI:Vesize>" "Z, vg<SVE_2BHSI:Vesize>, rk, rk")
1584 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1585 (match_operand:DI 3 "const_int_operand")
1586 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, Ui1, Ui1, i")
1587 (mem:BLK (scratch))]
1588 UNSPEC_LD1_GATHER))]
1589 UNSPEC_PRED_X))]
1590 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
624d0f07 1591 "@
87a80d27
RS
1592 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d]
1593 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1594 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d]
1595 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1596 "&& !CONSTANT_P (operands[6])"
1597 {
1598 operands[6] = CONSTM1_RTX (VNx2BImode);
1599 }
624d0f07
RS
1600)
1601
87a80d27
RS
1602;; Likewise, but with the offset being extended from 32 bits.
1603(define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked"
1604 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1605 (unspec:SVE_2HSDI
1606 [(match_operand 6)
1607 (ANY_EXTEND:SVE_2HSDI
1608 (unspec:SVE_2BHSI
1609 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1610 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1611 (unspec:VNx2DI
1612 [(match_operand 7)
1613 (ANY_EXTEND2:VNx2DI
1614 (match_operand:VNx2SI 2 "register_operand" "w, w"))]
1615 UNSPEC_PRED_X)
1616 (match_operand:DI 3 "const_int_operand")
1617 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1618 (mem:BLK (scratch))]
1619 UNSPEC_LD1_GATHER))]
1620 UNSPEC_PRED_X))]
1621 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
624d0f07 1622 "@
87a80d27
RS
1623 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw]
1624 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]"
1625 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
43cacb12 1626 {
87a80d27
RS
1627 operands[6] = CONSTM1_RTX (VNx2BImode);
1628 operands[7] = CONSTM1_RTX (VNx2BImode);
43cacb12
RS
1629 }
1630)
1631
87a80d27
RS
1632;; Likewise, but with the offset being truncated to 32 bits and then
1633;; sign-extended.
1634(define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw"
1635 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1636 (unspec:SVE_2HSDI
1637 [(match_operand 6)
1638 (ANY_EXTEND:SVE_2HSDI
1639 (unspec:SVE_2BHSI
1640 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1641 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1642 (unspec:VNx2DI
1643 [(match_operand 7)
1644 (sign_extend:VNx2DI
1645 (truncate:VNx2SI
1646 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1647 UNSPEC_PRED_X)
1648 (match_operand:DI 3 "const_int_operand")
1649 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1650 (mem:BLK (scratch))]
1651 UNSPEC_LD1_GATHER))]
1652 UNSPEC_PRED_X))]
1653 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1654 "@
1655 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1656 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1657 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1658 {
1659 operands[6] = CONSTM1_RTX (VNx2BImode);
1660 operands[7] = CONSTM1_RTX (VNx2BImode);
1661 }
1662)
1663
1664;; Likewise, but with the offset being truncated to 32 bits and then
1665;; zero-extended.
1666(define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw"
1667 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1668 (unspec:SVE_2HSDI
1669 [(match_operand 7)
1670 (ANY_EXTEND:SVE_2HSDI
1671 (unspec:SVE_2BHSI
1672 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1673 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1674 (and:VNx2DI
1675 (match_operand:VNx2DI 2 "register_operand" "w, w")
1676 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1677 (match_operand:DI 3 "const_int_operand")
1678 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1679 (mem:BLK (scratch))]
1680 UNSPEC_LD1_GATHER))]
1681 UNSPEC_PRED_X))]
1682 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
624d0f07 1683 "@
87a80d27
RS
1684 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1685 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1686 "&& !CONSTANT_P (operands[7])"
1687 {
1688 operands[7] = CONSTM1_RTX (VNx2BImode);
1689 }
915d28fe
RS
1690)
1691
1692;; -------------------------------------------------------------------------
624d0f07 1693;; ---- First-faulting gather loads
915d28fe 1694;; -------------------------------------------------------------------------
624d0f07
RS
1695;; Includes gather forms of:
1696;; - LDFF1D
1697;; - LDFF1W
915d28fe
RS
1698;; -------------------------------------------------------------------------
1699
624d0f07
RS
1700;; Predicated first-faulting gather loads for 32-bit elements. Operand
1701;; 3 is true for unsigned extension and false for signed extension.
1702(define_insn "@aarch64_ldff1_gather<mode>"
f75cdd2c
RS
1703 [(set (match_operand:SVE_FULL_S 0 "register_operand" "=w, w, w, w, w, w")
1704 (unspec:SVE_FULL_S
624d0f07
RS
1705 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1706 (match_operand:DI 1 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk")
1707 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1708 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1709 (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i")
1710 (mem:BLK (scratch))
1711 (reg:VNx16BI FFRT_REGNUM)]
1712 UNSPEC_LDFF1_GATHER))]
1713 "TARGET_SVE"
1714 "@
1715 ldff1w\t%0.s, %5/z, [%2.s]
1716 ldff1w\t%0.s, %5/z, [%2.s, #%1]
1717 ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw]
1718 ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw]
1719 ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1720 ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1721)
1722
1723;; Predicated first-faulting gather loads for 64-bit elements. The value
1724;; of operand 3 doesn't matter in this case.
1725(define_insn "@aarch64_ldff1_gather<mode>"
f75cdd2c
RS
1726 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w, w, w")
1727 (unspec:SVE_FULL_D
624d0f07
RS
1728 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1729 (match_operand:DI 1 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk")
1730 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1731 (match_operand:DI 3 "const_int_operand")
1732 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i")
1733 (mem:BLK (scratch))
1734 (reg:VNx16BI FFRT_REGNUM)]
1735 UNSPEC_LDFF1_GATHER))]
1736 "TARGET_SVE"
1737 "@
1738 ldff1d\t%0.d, %5/z, [%2.d]
1739 ldff1d\t%0.d, %5/z, [%2.d, #%1]
1740 ldff1d\t%0.d, %5/z, [%1, %2.d]
1741 ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1742)
1743
1744;; Likewise, but with the offset being sign-extended from 32 bits.
1745(define_insn_and_rewrite "*aarch64_ldff1_gather<mode>_sxtw"
f75cdd2c
RS
1746 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1747 (unspec:SVE_FULL_D
624d0f07
RS
1748 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1749 (match_operand:DI 1 "register_operand" "rk, rk")
1750 (unspec:VNx2DI
1751 [(match_operand 6)
1752 (sign_extend:VNx2DI
1753 (truncate:VNx2SI
1754 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1755 UNSPEC_PRED_X)
1756 (match_operand:DI 3 "const_int_operand")
1757 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1758 (mem:BLK (scratch))
1759 (reg:VNx16BI FFRT_REGNUM)]
1760 UNSPEC_LDFF1_GATHER))]
1761 "TARGET_SVE"
1762 "@
1763 ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw]
1764 ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
87a80d27 1765 "&& !CONSTANT_P (operands[6])"
624d0f07 1766 {
87a80d27 1767 operands[6] = CONSTM1_RTX (VNx2BImode);
624d0f07
RS
1768 }
1769)
1770
1771;; Likewise, but with the offset being zero-extended from 32 bits.
1772(define_insn "*aarch64_ldff1_gather<mode>_uxtw"
f75cdd2c
RS
1773 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1774 (unspec:SVE_FULL_D
624d0f07
RS
1775 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1776 (match_operand:DI 1 "register_operand" "rk, rk")
1777 (and:VNx2DI
1778 (match_operand:VNx2DI 2 "register_operand" "w, w")
1779 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1780 (match_operand:DI 3 "const_int_operand")
1781 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1782 (mem:BLK (scratch))
1783 (reg:VNx16BI FFRT_REGNUM)]
1784 UNSPEC_LDFF1_GATHER))]
1785 "TARGET_SVE"
1786 "@
1787 ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw]
1788 ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1789)
1790
1791;; -------------------------------------------------------------------------
1792;; ---- First-faulting extending gather loads
1793;; -------------------------------------------------------------------------
1794;; Includes gather forms of:
1795;; - LDFF1B
1796;; - LDFF1H
1797;; - LDFF1SB
1798;; - LDFF1SH
1799;; - LDFF1SW
1800;; - LDFF1W
1801;; -------------------------------------------------------------------------
1802
1803;; Predicated extending first-faulting gather loads for 32-bit elements.
1804;; Operand 3 is true for unsigned extension and false for signed extension.
87a80d27 1805(define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
624d0f07 1806 [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w")
87a80d27
RS
1807 (unspec:VNx4_WIDE
1808 [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1809 (ANY_EXTEND:VNx4_WIDE
1810 (unspec:VNx4_NARROW
1811 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1812 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
1813 (match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w")
1814 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1815 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1816 (mem:BLK (scratch))
1817 (reg:VNx16BI FFRT_REGNUM)]
1818 UNSPEC_LDFF1_GATHER))]
1819 UNSPEC_PRED_X))]
624d0f07
RS
1820 "TARGET_SVE"
1821 "@
1822 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s]
1823 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1824 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1825 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1826 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1827 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
87a80d27
RS
1828 "&& !CONSTANT_P (operands[6])"
1829 {
1830 operands[6] = CONSTM1_RTX (VNx4BImode);
1831 }
624d0f07
RS
1832)
1833
1834;; Predicated extending first-faulting gather loads for 64-bit elements.
1835;; The value of operand 3 doesn't matter in this case.
87a80d27 1836(define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
624d0f07 1837 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w")
87a80d27
RS
1838 (unspec:VNx2_WIDE
1839 [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1840 (ANY_EXTEND:VNx2_WIDE
1841 (unspec:VNx2_NARROW
1842 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1843 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
1844 (match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w")
1845 (match_operand:DI 3 "const_int_operand")
1846 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
1847 (mem:BLK (scratch))
1848 (reg:VNx16BI FFRT_REGNUM)]
1849 UNSPEC_LDFF1_GATHER))]
1850 UNSPEC_PRED_X))]
624d0f07
RS
1851 "TARGET_SVE"
1852 "@
1853 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d]
1854 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1855 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d]
1856 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
87a80d27
RS
1857 "&& !CONSTANT_P (operands[6])"
1858 {
1859 operands[6] = CONSTM1_RTX (VNx2BImode);
1860 }
624d0f07
RS
1861)
1862
1863;; Likewise, but with the offset being sign-extended from 32 bits.
1864(define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw"
1865 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
87a80d27
RS
1866 (unspec:VNx2_WIDE
1867 [(match_operand 6)
1868 (ANY_EXTEND:VNx2_WIDE
1869 (unspec:VNx2_NARROW
1870 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1871 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1872 (unspec:VNx2DI
1873 [(match_operand 7)
1874 (sign_extend:VNx2DI
1875 (truncate:VNx2SI
1876 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1877 UNSPEC_PRED_X)
1878 (match_operand:DI 3 "const_int_operand")
1879 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1880 (mem:BLK (scratch))
1881 (reg:VNx16BI FFRT_REGNUM)]
1882 UNSPEC_LDFF1_GATHER))]
1883 UNSPEC_PRED_X))]
624d0f07
RS
1884 "TARGET_SVE"
1885 "@
1886 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1887 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
87a80d27 1888 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
624d0f07 1889 {
87a80d27
RS
1890 operands[6] = CONSTM1_RTX (VNx2BImode);
1891 operands[7] = CONSTM1_RTX (VNx2BImode);
624d0f07
RS
1892 }
1893)
1894
1895;; Likewise, but with the offset being zero-extended from 32 bits.
87a80d27 1896(define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw"
624d0f07 1897 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
87a80d27
RS
1898 (unspec:VNx2_WIDE
1899 [(match_operand 7)
1900 (ANY_EXTEND:VNx2_WIDE
1901 (unspec:VNx2_NARROW
1902 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1903 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1904 (and:VNx2DI
1905 (match_operand:VNx2DI 2 "register_operand" "w, w")
1906 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1907 (match_operand:DI 3 "const_int_operand")
1908 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1909 (mem:BLK (scratch))
1910 (reg:VNx16BI FFRT_REGNUM)]
1911 UNSPEC_LDFF1_GATHER))]
1912 UNSPEC_PRED_X))]
624d0f07
RS
1913 "TARGET_SVE"
1914 "@
1915 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1916 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
87a80d27
RS
1917 "&& !CONSTANT_P (operands[7])"
1918 {
1919 operands[7] = CONSTM1_RTX (VNx2BImode);
1920 }
624d0f07
RS
1921)
1922
1923;; =========================================================================
1924;; == Prefetches
1925;; =========================================================================
1926
1927;; -------------------------------------------------------------------------
1928;; ---- Contiguous prefetches
1929;; -------------------------------------------------------------------------
1930;; Includes contiguous forms of:
1931;; - PRFB
1932;; - PRFD
1933;; - PRFH
1934;; - PRFW
1935;; -------------------------------------------------------------------------
1936
1937;; Contiguous predicated prefetches. Operand 2 gives the real prefetch
1938;; operation (as an svprfop), with operands 3 and 4 providing distilled
1939;; information.
1940(define_insn "@aarch64_sve_prefetch<mode>"
1941 [(prefetch (unspec:DI
1942 [(match_operand:<VPRED> 0 "register_operand" "Upl")
f75cdd2c 1943 (match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP<Vesize>")
624d0f07
RS
1944 (match_operand:DI 2 "const_int_operand")]
1945 UNSPEC_SVE_PREFETCH)
1946 (match_operand:DI 3 "const_int_operand")
1947 (match_operand:DI 4 "const_int_operand"))]
1948 "TARGET_SVE"
1949 {
1950 operands[1] = gen_rtx_MEM (<MODE>mode, operands[1]);
1951 return aarch64_output_sve_prefetch ("prf<Vesize>", operands[2], "%0, %1");
1952 }
1953)
1954
1955;; -------------------------------------------------------------------------
1956;; ---- Gather prefetches
1957;; -------------------------------------------------------------------------
1958;; Includes gather forms of:
1959;; - PRFB
1960;; - PRFD
1961;; - PRFH
1962;; - PRFW
1963;; -------------------------------------------------------------------------
1964
1965;; Predicated gather prefetches for 32-bit bases and offsets. The operands
1966;; are:
1967;; 0: the governing predicate
1968;; 1: the scalar component of the address
1969;; 2: the vector component of the address
1970;; 3: 1 for zero extension, 0 for sign extension
1971;; 4: the scale multiplier
1972;; 5: a vector zero that identifies the mode of data being accessed
1973;; 6: the prefetch operator (an svprfop)
1974;; 7: the normal RTL prefetch rw flag
1975;; 8: the normal RTL prefetch locality value
f75cdd2c 1976(define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>"
624d0f07
RS
1977 [(prefetch (unspec:DI
1978 [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
f75cdd2c 1979 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk, rk, rk")
624d0f07
RS
1980 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w")
1981 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
f75cdd2c
RS
1982 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1983 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
624d0f07
RS
1984 (match_operand:DI 6 "const_int_operand")]
1985 UNSPEC_SVE_PREFETCH_GATHER)
1986 (match_operand:DI 7 "const_int_operand")
1987 (match_operand:DI 8 "const_int_operand"))]
1988 "TARGET_SVE"
1989 {
1990 static const char *const insns[][2] = {
f75cdd2c
RS
1991 "prf<SVE_FULL_I:Vesize>", "%0, [%2.s]",
1992 "prf<SVE_FULL_I:Vesize>", "%0, [%2.s, #%1]",
624d0f07
RS
1993 "prfb", "%0, [%1, %2.s, sxtw]",
1994 "prfb", "%0, [%1, %2.s, uxtw]",
f75cdd2c
RS
1995 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, sxtw %p4]",
1996 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, uxtw %p4]"
624d0f07
RS
1997 };
1998 const char *const *parts = insns[which_alternative];
1999 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2000 }
2001)
2002
2003;; Predicated gather prefetches for 64-bit elements. The value of operand 3
2004;; doesn't matter in this case.
f75cdd2c 2005(define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>"
624d0f07
RS
2006 [(prefetch (unspec:DI
2007 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl")
f75cdd2c 2008 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk")
624d0f07
RS
2009 (match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w")
2010 (match_operand:DI 3 "const_int_operand")
f75cdd2c
RS
2011 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, i")
2012 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
624d0f07
RS
2013 (match_operand:DI 6 "const_int_operand")]
2014 UNSPEC_SVE_PREFETCH_GATHER)
2015 (match_operand:DI 7 "const_int_operand")
2016 (match_operand:DI 8 "const_int_operand"))]
2017 "TARGET_SVE"
2018 {
2019 static const char *const insns[][2] = {
f75cdd2c
RS
2020 "prf<SVE_FULL_I:Vesize>", "%0, [%2.d]",
2021 "prf<SVE_FULL_I:Vesize>", "%0, [%2.d, #%1]",
624d0f07 2022 "prfb", "%0, [%1, %2.d]",
f75cdd2c 2023 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, lsl %p4]"
624d0f07
RS
2024 };
2025 const char *const *parts = insns[which_alternative];
2026 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2027 }
2028)
2029
2030;; Likewise, but with the offset being sign-extended from 32 bits.
f75cdd2c 2031(define_insn_and_rewrite "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw"
624d0f07
RS
2032 [(prefetch (unspec:DI
2033 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
2034 (match_operand:DI 1 "register_operand" "rk, rk")
2035 (unspec:VNx2DI_ONLY
2036 [(match_operand 9)
2037 (sign_extend:VNx2DI
2038 (truncate:VNx2SI
2039 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
2040 UNSPEC_PRED_X)
2041 (match_operand:DI 3 "const_int_operand")
f75cdd2c
RS
2042 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
2043 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
624d0f07
RS
2044 (match_operand:DI 6 "const_int_operand")]
2045 UNSPEC_SVE_PREFETCH_GATHER)
2046 (match_operand:DI 7 "const_int_operand")
2047 (match_operand:DI 8 "const_int_operand"))]
2048 "TARGET_SVE"
2049 {
2050 static const char *const insns[][2] = {
2051 "prfb", "%0, [%1, %2.d, sxtw]",
f75cdd2c 2052 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, sxtw %p4]"
624d0f07
RS
2053 };
2054 const char *const *parts = insns[which_alternative];
2055 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2056 }
2057 "&& !rtx_equal_p (operands[0], operands[9])"
2058 {
2059 operands[9] = copy_rtx (operands[0]);
2060 }
2061)
2062
2063;; Likewise, but with the offset being zero-extended from 32 bits.
f75cdd2c 2064(define_insn "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw"
624d0f07
RS
2065 [(prefetch (unspec:DI
2066 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
2067 (match_operand:DI 1 "register_operand" "rk, rk")
2068 (and:VNx2DI_ONLY
2069 (match_operand:VNx2DI 2 "register_operand" "w, w")
2070 (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate"))
2071 (match_operand:DI 3 "const_int_operand")
f75cdd2c
RS
2072 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
2073 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
624d0f07
RS
2074 (match_operand:DI 6 "const_int_operand")]
2075 UNSPEC_SVE_PREFETCH_GATHER)
2076 (match_operand:DI 7 "const_int_operand")
2077 (match_operand:DI 8 "const_int_operand"))]
2078 "TARGET_SVE"
2079 {
2080 static const char *const insns[][2] = {
2081 "prfb", "%0, [%1, %2.d, uxtw]",
f75cdd2c 2082 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, uxtw %p4]"
624d0f07
RS
2083 };
2084 const char *const *parts = insns[which_alternative];
2085 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2086 }
2087)
2088
2089;; =========================================================================
2090;; == Stores
2091;; =========================================================================
2092
2093;; -------------------------------------------------------------------------
2094;; ---- Normal contiguous stores
2095;; -------------------------------------------------------------------------
2096;; Includes contiguous forms of:
2097;; - ST1B
2098;; - ST1D
2099;; - ST1H
2100;; - ST1W
2101;; - ST2B
2102;; - ST2D
2103;; - ST2H
2104;; - ST2W
2105;; - ST3B
2106;; - ST3D
2107;; - ST3H
2108;; - ST3W
2109;; - ST4B
2110;; - ST4D
2111;; - ST4H
2112;; - ST4W
2113;; -------------------------------------------------------------------------
2114
2115;; Predicated ST1.
2116(define_insn "maskstore<mode><vpred>"
cc68f7c2
RS
2117 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
2118 (unspec:SVE_ALL
f75cdd2c 2119 [(match_operand:<VPRED> 2 "register_operand" "Upl")
cc68f7c2 2120 (match_operand:SVE_ALL 1 "register_operand" "w")
f75cdd2c
RS
2121 (match_dup 0)]
2122 UNSPEC_ST1_SVE))]
624d0f07 2123 "TARGET_SVE"
cc68f7c2 2124 "st1<Vesize>\t%1.<Vctype>, %2, %0"
624d0f07
RS
2125)
2126
2127;; Unpredicated ST[234]. This is always a full update, so the dependence
2128;; on the old value of the memory location (via (match_dup 0)) is redundant.
2129;; There doesn't seem to be any obvious benefit to treating the all-true
2130;; case differently though. In particular, it's very unlikely that we'll
2131;; only find out during RTL that a store_lanes is dead.
2132(define_expand "vec_store_lanes<mode><vsingle>"
2133 [(set (match_operand:SVE_STRUCT 0 "memory_operand")
2134 (unspec:SVE_STRUCT
2135 [(match_dup 2)
2136 (match_operand:SVE_STRUCT 1 "register_operand")
2137 (match_dup 0)]
2138 UNSPEC_STN))]
2139 "TARGET_SVE"
2140 {
2141 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2142 }
2143)
2144
2145;; Predicated ST[234].
2146(define_insn "vec_mask_store_lanes<mode><vsingle>"
2147 [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
2148 (unspec:SVE_STRUCT
2149 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2150 (match_operand:SVE_STRUCT 1 "register_operand" "w")
2151 (match_dup 0)]
2152 UNSPEC_STN))]
2153 "TARGET_SVE"
2154 "st<vector_count><Vesize>\t%1, %2, %0"
2155)
2156
2157;; -------------------------------------------------------------------------
2158;; ---- Truncating contiguous stores
2159;; -------------------------------------------------------------------------
2160;; Includes:
2161;; - ST1B
2162;; - ST1H
2163;; - ST1W
2164;; -------------------------------------------------------------------------
2165
2166;; Predicated truncate and store, with 8 elements per 128-bit block.
2167(define_insn "@aarch64_store_trunc<VNx8_NARROW:mode><VNx8_WIDE:mode>"
2168 [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m")
2169 (unspec:VNx8_NARROW
2170 [(match_operand:VNx8BI 2 "register_operand" "Upl")
2171 (truncate:VNx8_NARROW
2172 (match_operand:VNx8_WIDE 1 "register_operand" "w"))
2173 (match_dup 0)]
2174 UNSPEC_ST1_SVE))]
2175 "TARGET_SVE"
2176 "st1<VNx8_NARROW:Vesize>\t%1.<VNx8_WIDE:Vetype>, %2, %0"
2177)
2178
2179;; Predicated truncate and store, with 4 elements per 128-bit block.
2180(define_insn "@aarch64_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2181 [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m")
2182 (unspec:VNx4_NARROW
2183 [(match_operand:VNx4BI 2 "register_operand" "Upl")
2184 (truncate:VNx4_NARROW
2185 (match_operand:VNx4_WIDE 1 "register_operand" "w"))
2186 (match_dup 0)]
2187 UNSPEC_ST1_SVE))]
2188 "TARGET_SVE"
2189 "st1<VNx4_NARROW:Vesize>\t%1.<VNx4_WIDE:Vetype>, %2, %0"
2190)
2191
2192;; Predicated truncate and store, with 2 elements per 128-bit block.
2193(define_insn "@aarch64_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2194 [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m")
2195 (unspec:VNx2_NARROW
2196 [(match_operand:VNx2BI 2 "register_operand" "Upl")
2197 (truncate:VNx2_NARROW
2198 (match_operand:VNx2_WIDE 1 "register_operand" "w"))
2199 (match_dup 0)]
2200 UNSPEC_ST1_SVE))]
2201 "TARGET_SVE"
2202 "st1<VNx2_NARROW:Vesize>\t%1.<VNx2_WIDE:Vetype>, %2, %0"
2203)
2204
2205;; -------------------------------------------------------------------------
2206;; ---- Non-temporal contiguous stores
2207;; -------------------------------------------------------------------------
2208;; Includes:
2209;; - STNT1B
2210;; - STNT1D
2211;; - STNT1H
2212;; - STNT1W
2213;; -------------------------------------------------------------------------
2214
2215(define_insn "@aarch64_stnt1<mode>"
f75cdd2c
RS
2216 [(set (match_operand:SVE_FULL 0 "memory_operand" "+m")
2217 (unspec:SVE_FULL
2218 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2219 (match_operand:SVE_FULL 1 "register_operand" "w")
2220 (match_dup 0)]
2221 UNSPEC_STNT1_SVE))]
624d0f07
RS
2222 "TARGET_SVE"
2223 "stnt1<Vesize>\t%1.<Vetype>, %2, %0"
2224)
2225
2226;; -------------------------------------------------------------------------
2227;; ---- Normal scatter stores
2228;; -------------------------------------------------------------------------
2229;; Includes scatter forms of:
2230;; - ST1D
2231;; - ST1W
2232;; -------------------------------------------------------------------------
2233
2234;; Unpredicated scatter stores.
37a3662f 2235(define_expand "scatter_store<mode><v_int_container>"
915d28fe
RS
2236 [(set (mem:BLK (scratch))
2237 (unspec:BLK
2238 [(match_dup 5)
624d0f07 2239 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
37a3662f 2240 (match_operand:<V_INT_CONTAINER> 1 "register_operand")
915d28fe
RS
2241 (match_operand:DI 2 "const_int_operand")
2242 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
37a3662f 2243 (match_operand:SVE_24 4 "register_operand")]
915d28fe
RS
2244 UNSPEC_ST1_SCATTER))]
2245 "TARGET_SVE"
43cacb12 2246 {
915d28fe 2247 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
2248 }
2249)
2250
915d28fe
RS
2251;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
2252;; unsigned extension and false for signed extension.
37a3662f 2253(define_insn "mask_scatter_store<mode><v_int_container>"
915d28fe
RS
2254 [(set (mem:BLK (scratch))
2255 (unspec:BLK
624d0f07 2256 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
37a3662f 2257 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
624d0f07
RS
2258 (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2259 (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
37a3662f
RS
2260 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2261 (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")]
915d28fe 2262 UNSPEC_ST1_SCATTER))]
43cacb12
RS
2263 "TARGET_SVE"
2264 "@
37a3662f
RS
2265 st1<Vesize>\t%4.s, %5, [%1.s]
2266 st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2267 st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2268 st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2269 st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2270 st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
915d28fe
RS
2271)
2272
2273;; Predicated scatter stores for 64-bit elements. The value of operand 2
2274;; doesn't matter in this case.
37a3662f 2275(define_insn "mask_scatter_store<mode><v_int_container>"
915d28fe
RS
2276 [(set (mem:BLK (scratch))
2277 (unspec:BLK
624d0f07 2278 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
37a3662f 2279 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
624d0f07 2280 (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
915d28fe 2281 (match_operand:DI 2 "const_int_operand")
37a3662f
RS
2282 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
2283 (match_operand:SVE_2 4 "register_operand" "w, w, w, w")]
915d28fe
RS
2284 UNSPEC_ST1_SCATTER))]
2285 "TARGET_SVE"
2286 "@
37a3662f
RS
2287 st1<Vesize>\t%4.d, %5, [%1.d]
2288 st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2289 st1<Vesize>\t%4.d, %5, [%0, %1.d]
2290 st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
43cacb12
RS
2291)
2292
37a3662f
RS
2293;; Likewise, but with the offset being extended from 32 bits.
2294(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked"
2295 [(set (mem:BLK (scratch))
2296 (unspec:BLK
2297 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2298 (match_operand:DI 0 "register_operand" "rk, rk")
2299 (unspec:VNx2DI
2300 [(match_operand 6)
2301 (ANY_EXTEND:VNx2DI
2302 (match_operand:VNx2SI 1 "register_operand" "w, w"))]
2303 UNSPEC_PRED_X)
2304 (match_operand:DI 2 "const_int_operand")
2305 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2306 (match_operand:SVE_2 4 "register_operand" "w, w")]
2307 UNSPEC_ST1_SCATTER))]
2308 "TARGET_SVE"
2309 "@
2310 st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw]
2311 st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]"
2312 "&& !CONSTANT_P (operands[6])"
2313 {
2314 operands[6] = CONSTM1_RTX (<VPRED>mode);
2315 }
2316)
2317
2318;; Likewise, but with the offset being truncated to 32 bits and then
2319;; sign-extended.
2320(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw"
624d0f07
RS
2321 [(set (mem:BLK (scratch))
2322 (unspec:BLK
2323 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2324 (match_operand:DI 0 "register_operand" "rk, rk")
2325 (unspec:VNx2DI
2326 [(match_operand 6)
2327 (sign_extend:VNx2DI
2328 (truncate:VNx2SI
2329 (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2330 UNSPEC_PRED_X)
2331 (match_operand:DI 2 "const_int_operand")
37a3662f
RS
2332 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2333 (match_operand:SVE_2 4 "register_operand" "w, w")]
624d0f07
RS
2334 UNSPEC_ST1_SCATTER))]
2335 "TARGET_SVE"
2336 "@
37a3662f
RS
2337 st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2338 st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2339 "&& !CONSTANT_P (operands[6])"
624d0f07 2340 {
37a3662f 2341 operands[6] = CONSTM1_RTX (<VPRED>mode);
624d0f07
RS
2342 }
2343)
2344
37a3662f
RS
2345;; Likewise, but with the offset being truncated to 32 bits and then
2346;; zero-extended.
2347(define_insn "*mask_scatter_store<mode><v_int_container>_uxtw"
624d0f07
RS
2348 [(set (mem:BLK (scratch))
2349 (unspec:BLK
2350 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2351 (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2352 (and:VNx2DI
2353 (match_operand:VNx2DI 1 "register_operand" "w, w")
2354 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2355 (match_operand:DI 2 "const_int_operand")
37a3662f
RS
2356 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2357 (match_operand:SVE_2 4 "register_operand" "w, w")]
624d0f07
RS
2358 UNSPEC_ST1_SCATTER))]
2359 "TARGET_SVE"
2360 "@
37a3662f
RS
2361 st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2362 st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
624d0f07
RS
2363)
2364
2365;; -------------------------------------------------------------------------
2366;; ---- Truncating scatter stores
2367;; -------------------------------------------------------------------------
2368;; Includes scatter forms of:
2369;; - ST1B
2370;; - ST1H
2371;; - ST1W
2372;; -------------------------------------------------------------------------
2373
2374;; Predicated truncating scatter stores for 32-bit elements. Operand 2 is
2375;; true for unsigned extension and false for signed extension.
2376(define_insn "@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2377 [(set (mem:BLK (scratch))
2378 (unspec:BLK
2379 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2380 (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
2381 (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2382 (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
2383 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2384 (truncate:VNx4_NARROW
2385 (match_operand:VNx4_WIDE 4 "register_operand" "w, w, w, w, w, w"))]
2386 UNSPEC_ST1_SCATTER))]
2387 "TARGET_SVE"
2388 "@
2389 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s]
2390 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s, #%0]
2391 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2392 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2393 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2394 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
2395)
2396
2397;; Predicated truncating scatter stores for 64-bit elements. The value of
2398;; operand 2 doesn't matter in this case.
2399(define_insn "@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2400 [(set (mem:BLK (scratch))
2401 (unspec:BLK
2402 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
2403 (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
2404 (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
2405 (match_operand:DI 2 "const_int_operand")
2406 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
2407 (truncate:VNx2_NARROW
2408 (match_operand:VNx2_WIDE 4 "register_operand" "w, w, w, w"))]
2409 UNSPEC_ST1_SCATTER))]
2410 "TARGET_SVE"
2411 "@
2412 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d]
2413 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d, #%0]
2414 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d]
2415 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
2416)
2417
2418;; Likewise, but with the offset being sign-extended from 32 bits.
2419(define_insn_and_rewrite "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw"
2420 [(set (mem:BLK (scratch))
2421 (unspec:BLK
2422 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2423 (match_operand:DI 0 "register_operand" "rk, rk")
2424 (unspec:VNx2DI
2425 [(match_operand 6)
2426 (sign_extend:VNx2DI
2427 (truncate:VNx2SI
2428 (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2429 UNSPEC_PRED_X)
2430 (match_operand:DI 2 "const_int_operand")
2431 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2432 (truncate:VNx2_NARROW
2433 (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2434 UNSPEC_ST1_SCATTER))]
2435 "TARGET_SVE"
2436 "@
2437 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2438 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2439 "&& !rtx_equal_p (operands[5], operands[6])"
2440 {
2441 operands[6] = copy_rtx (operands[5]);
2442 }
2443)
2444
2445;; Likewise, but with the offset being zero-extended from 32 bits.
2446(define_insn "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw"
2447 [(set (mem:BLK (scratch))
2448 (unspec:BLK
2449 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2450 (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2451 (and:VNx2DI
2452 (match_operand:VNx2DI 1 "register_operand" "w, w")
2453 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2454 (match_operand:DI 2 "const_int_operand")
2455 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2456 (truncate:VNx2_NARROW
2457 (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2458 UNSPEC_ST1_SCATTER))]
2459 "TARGET_SVE"
2460 "@
2461 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2462 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
2463)
2464
915d28fe
RS
2465;; =========================================================================
2466;; == Vector creation
2467;; =========================================================================
2468
2469;; -------------------------------------------------------------------------
2470;; ---- [INT,FP] Duplicate element
2471;; -------------------------------------------------------------------------
2472;; Includes:
624d0f07 2473;; - DUP
915d28fe
RS
2474;; - MOV
2475;; - LD1RB
2476;; - LD1RD
2477;; - LD1RH
2478;; - LD1RW
36696774
RS
2479;; - LD1ROB (F64MM)
2480;; - LD1ROD (F64MM)
2481;; - LD1ROH (F64MM)
2482;; - LD1ROW (F64MM)
915d28fe
RS
2483;; - LD1RQB
2484;; - LD1RQD
2485;; - LD1RQH
2486;; - LD1RQW
2487;; -------------------------------------------------------------------------
2488
43cacb12
RS
2489(define_expand "vec_duplicate<mode>"
2490 [(parallel
cc68f7c2
RS
2491 [(set (match_operand:SVE_ALL 0 "register_operand")
2492 (vec_duplicate:SVE_ALL
43cacb12 2493 (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
678faefc 2494 (clobber (scratch:VNx16BI))])]
43cacb12
RS
2495 "TARGET_SVE"
2496 {
2497 if (MEM_P (operands[1]))
2498 {
16de3637 2499 rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
2500 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
2501 CONST0_RTX (<MODE>mode)));
2502 DONE;
2503 }
2504 }
2505)
2506
2507;; Accept memory operands for the benefit of combine, and also in case
2508;; the scalar input gets spilled to memory during RA. We want to split
2509;; the load at the first opportunity in order to allow the PTRUE to be
2510;; optimized with surrounding code.
2511(define_insn_and_split "*vec_duplicate<mode>_reg"
cc68f7c2
RS
2512 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
2513 (vec_duplicate:SVE_ALL
43cacb12 2514 (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
678faefc 2515 (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
43cacb12
RS
2516 "TARGET_SVE"
2517 "@
2518 mov\t%0.<Vetype>, %<vwcore>1
2519 mov\t%0.<Vetype>, %<Vetype>1
2520 #"
2521 "&& MEM_P (operands[1])"
2522 [(const_int 0)]
2523 {
2524 if (GET_CODE (operands[2]) == SCRATCH)
678faefc
RS
2525 operands[2] = gen_reg_rtx (VNx16BImode);
2526 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
2527 rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
2528 emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1],
43cacb12
RS
2529 CONST0_RTX (<MODE>mode)));
2530 DONE;
2531 }
2532 [(set_attr "length" "4,4,8")]
2533)
2534
4aeb1ba7
RS
2535;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version).
2536(define_insn "@aarch64_vec_duplicate_vq<mode>_le"
f75cdd2c
RS
2537 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2538 (vec_duplicate:SVE_FULL
4aeb1ba7
RS
2539 (match_operand:<V128> 1 "register_operand" "w")))]
2540 "TARGET_SVE && !BYTES_BIG_ENDIAN"
2541 {
2542 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2543 return "dup\t%0.q, %1.q[0]";
2544 }
2545)
2546
2547;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version).
2548;; The SVE register layout puts memory lane N into (architectural)
2549;; register lane N, whereas the Advanced SIMD layout puts the memory
2550;; lsb into the register lsb. We therefore have to describe this in rtl
2551;; terms as a reverse of the V128 vector followed by a duplicate.
2552(define_insn "@aarch64_vec_duplicate_vq<mode>_be"
f75cdd2c
RS
2553 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2554 (vec_duplicate:SVE_FULL
4aeb1ba7
RS
2555 (vec_select:<V128>
2556 (match_operand:<V128> 1 "register_operand" "w")
2557 (match_operand 2 "descending_int_parallel"))))]
2558 "TARGET_SVE
2559 && BYTES_BIG_ENDIAN
2560 && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
2561 GET_MODE_NUNITS (<V128>mode) - 1)"
2562 {
2563 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2564 return "dup\t%0.q, %1.q[0]";
2565 }
2566)
2567
43cacb12 2568;; This is used for vec_duplicate<mode>s from memory, but can also
700d4cb0 2569;; be used by combine to optimize selects of a vec_duplicate<mode>
43cacb12
RS
2570;; with zero.
2571(define_insn "sve_ld1r<mode>"
cc68f7c2
RS
2572 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
2573 (unspec:SVE_ALL
43cacb12 2574 [(match_operand:<VPRED> 1 "register_operand" "Upl")
cc68f7c2 2575 (vec_duplicate:SVE_ALL
43cacb12 2576 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
cc68f7c2 2577 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
43cacb12
RS
2578 UNSPEC_SEL))]
2579 "TARGET_SVE"
2580 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
2581)
2582
4aeb1ba7
RS
2583;; Load 128 bits from memory under predicate control and duplicate to
2584;; fill a vector.
2585(define_insn "@aarch64_sve_ld1rq<mode>"
f75cdd2c
RS
2586 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2587 (unspec:SVE_FULL
4aeb1ba7
RS
2588 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2589 (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")]
43cacb12
RS
2590 UNSPEC_LD1RQ))]
2591 "TARGET_SVE"
4aeb1ba7
RS
2592 {
2593 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2594 return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1";
2595 }
43cacb12
RS
2596)
2597
9ceec73f
MM
2598(define_insn "@aarch64_sve_ld1ro<mode>"
2599 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2600 (unspec:SVE_FULL
2601 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2602 (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>"
2603 "UO<Vesize>")]
2604 UNSPEC_LD1RO))]
36696774 2605 "TARGET_SVE_F64MM"
9ceec73f
MM
2606 {
2607 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2608 return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1";
2609 }
2610)
2611
915d28fe
RS
2612;; -------------------------------------------------------------------------
2613;; ---- [INT,FP] Initialize from individual elements
2614;; -------------------------------------------------------------------------
2615;; Includes:
2616;; - INSR
2617;; -------------------------------------------------------------------------
2618
2619(define_expand "vec_init<mode><Vel>"
f75cdd2c 2620 [(match_operand:SVE_FULL 0 "register_operand")
915d28fe 2621 (match_operand 1 "")]
43cacb12
RS
2622 "TARGET_SVE"
2623 {
915d28fe 2624 aarch64_sve_expand_vector_init (operands[0], operands[1]);
43cacb12
RS
2625 DONE;
2626 }
2627)
2628
915d28fe
RS
2629;; Shift an SVE vector left and insert a scalar into element 0.
2630(define_insn "vec_shl_insert_<mode>"
f75cdd2c
RS
2631 [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??&w, ?&w")
2632 (unspec:SVE_FULL
2633 [(match_operand:SVE_FULL 1 "register_operand" "0, 0, w, w")
61ee25b9 2634 (match_operand:<VEL> 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")]
915d28fe
RS
2635 UNSPEC_INSR))]
2636 "TARGET_SVE"
2637 "@
2638 insr\t%0.<Vetype>, %<vwcore>2
61ee25b9
RS
2639 insr\t%0.<Vetype>, %<Vetype>2
2640 movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2
2641 movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2"
2642 [(set_attr "movprfx" "*,*,yes,yes")]
915d28fe
RS
2643)
2644
2645;; -------------------------------------------------------------------------
2646;; ---- [INT] Linear series
2647;; -------------------------------------------------------------------------
2648;; Includes:
2649;; - INDEX
2650;; -------------------------------------------------------------------------
2651
2652(define_insn "vec_series<mode>"
cc68f7c2
RS
2653 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
2654 (vec_series:SVE_I
915d28fe
RS
2655 (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
2656 (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
2657 "TARGET_SVE"
2658 "@
30f8bf3d
RS
2659 index\t%0.<Vctype>, #%1, %<vccore>2
2660 index\t%0.<Vctype>, %<vccore>1, #%2
2661 index\t%0.<Vctype>, %<vccore>1, %<vccore>2"
43cacb12
RS
2662)
2663
2664;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
2665;; of an INDEX instruction.
2666(define_insn "*vec_series<mode>_plus"
cc68f7c2
RS
2667 [(set (match_operand:SVE_I 0 "register_operand" "=w")
2668 (plus:SVE_I
2669 (vec_duplicate:SVE_I
43cacb12 2670 (match_operand:<VEL> 1 "register_operand" "r"))
cc68f7c2 2671 (match_operand:SVE_I 2 "immediate_operand")))]
43cacb12
RS
2672 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
2673 {
2674 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
30f8bf3d 2675 return "index\t%0.<Vctype>, %<vccore>1, #%2";
43cacb12
RS
2676 }
2677)
2678
915d28fe
RS
2679;; -------------------------------------------------------------------------
2680;; ---- [PRED] Duplicate element
2681;; -------------------------------------------------------------------------
2682;; The patterns in this section are synthetic.
2683;; -------------------------------------------------------------------------
2684
2685;; Implement a predicate broadcast by shifting the low bit of the scalar
2686;; input into the top bit and using a WHILELO. An alternative would be to
2687;; duplicate the input and do a compare with zero.
2688(define_expand "vec_duplicate<mode>"
2689 [(set (match_operand:PRED_ALL 0 "register_operand")
624d0f07 2690 (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))]
9f4cbab8
RS
2691 "TARGET_SVE"
2692 {
915d28fe
RS
2693 rtx tmp = gen_reg_rtx (DImode);
2694 rtx op1 = gen_lowpart (DImode, operands[1]);
2695 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
2696 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
2697 DONE;
9f4cbab8
RS
2698 }
2699)
2700
915d28fe
RS
2701;; =========================================================================
2702;; == Vector decomposition
2703;; =========================================================================
9f4cbab8 2704
915d28fe
RS
2705;; -------------------------------------------------------------------------
2706;; ---- [INT,FP] Extract index
2707;; -------------------------------------------------------------------------
2708;; Includes:
2709;; - DUP (Advanced SIMD)
2710;; - DUP (SVE)
2711;; - EXT (SVE)
2712;; - ST1 (Advanced SIMD)
2713;; - UMOV (Advanced SIMD)
2714;; -------------------------------------------------------------------------
2715
2716(define_expand "vec_extract<mode><Vel>"
2717 [(set (match_operand:<VEL> 0 "register_operand")
2718 (vec_select:<VEL>
f75cdd2c 2719 (match_operand:SVE_FULL 1 "register_operand")
915d28fe 2720 (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
9f4cbab8
RS
2721 "TARGET_SVE"
2722 {
915d28fe
RS
2723 poly_int64 val;
2724 if (poly_int_rtx_p (operands[2], &val)
2725 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
2726 {
2727 /* The last element can be extracted with a LASTB and a false
2728 predicate. */
2729 rtx sel = aarch64_pfalse_reg (<VPRED>mode);
2730 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2731 DONE;
2732 }
2733 if (!CONST_INT_P (operands[2]))
2734 {
2735 /* Create an index with operand[2] as the base and -1 as the step.
2736 It will then be zero for the element we care about. */
2737 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
2738 index = force_reg (<VEL_INT>mode, index);
2739 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
2740 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
2741
2742 /* Get a predicate that is true for only that element. */
2743 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
2744 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
2745 rtx sel = gen_reg_rtx (<VPRED>mode);
2746 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
2747
2748 /* Select the element using LASTB. */
2749 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2750 DONE;
2751 }
9f4cbab8
RS
2752 }
2753)
2754
915d28fe
RS
2755;; Extract element zero. This is a special case because we want to force
2756;; the registers to be the same for the second alternative, and then
2757;; split the instruction into nothing after RA.
2758(define_insn_and_split "*vec_extract<mode><Vel>_0"
2759 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2760 (vec_select:<VEL>
f75cdd2c 2761 (match_operand:SVE_FULL 1 "register_operand" "w, 0, w")
915d28fe 2762 (parallel [(const_int 0)])))]
9f4cbab8 2763 "TARGET_SVE"
915d28fe
RS
2764 {
2765 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2766 switch (which_alternative)
2767 {
2768 case 0:
2769 return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
2770 case 1:
2771 return "#";
2772 case 2:
2773 return "st1\\t{%1.<Vetype>}[0], %0";
2774 default:
2775 gcc_unreachable ();
2776 }
2777 }
2778 "&& reload_completed
2779 && REG_P (operands[0])
2780 && REGNO (operands[0]) == REGNO (operands[1])"
2781 [(const_int 0)]
2782 {
2783 emit_note (NOTE_INSN_DELETED);
2784 DONE;
2785 }
2786 [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
9f4cbab8
RS
2787)
2788
915d28fe
RS
2789;; Extract an element from the Advanced SIMD portion of the register.
2790;; We don't just reuse the aarch64-simd.md pattern because we don't
2791;; want any change in lane number on big-endian targets.
2792(define_insn "*vec_extract<mode><Vel>_v128"
2793 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2794 (vec_select:<VEL>
f75cdd2c 2795 (match_operand:SVE_FULL 1 "register_operand" "w, w, w")
915d28fe
RS
2796 (parallel [(match_operand:SI 2 "const_int_operand")])))]
2797 "TARGET_SVE
2798 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
43cacb12 2799 {
915d28fe
RS
2800 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2801 switch (which_alternative)
2802 {
2803 case 0:
2804 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2805 case 1:
2806 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2807 case 2:
2808 return "st1\\t{%1.<Vetype>}[%2], %0";
2809 default:
2810 gcc_unreachable ();
2811 }
43cacb12 2812 }
915d28fe 2813 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
43cacb12
RS
2814)
2815
915d28fe
RS
2816;; Extract an element in the range of DUP. This pattern allows the
2817;; source and destination to be different.
2818(define_insn "*vec_extract<mode><Vel>_dup"
2819 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2820 (vec_select:<VEL>
f75cdd2c 2821 (match_operand:SVE_FULL 1 "register_operand" "w")
915d28fe
RS
2822 (parallel [(match_operand:SI 2 "const_int_operand")])))]
2823 "TARGET_SVE
2824 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
2825 {
2826 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2827 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
2828 }
43cacb12
RS
2829)
2830
915d28fe
RS
2831;; Extract an element outside the range of DUP. This pattern requires the
2832;; source and destination to be the same.
2833(define_insn "*vec_extract<mode><Vel>_ext"
06b3ba23 2834 [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w")
915d28fe 2835 (vec_select:<VEL>
f75cdd2c 2836 (match_operand:SVE_FULL 1 "register_operand" "0, w")
915d28fe
RS
2837 (parallel [(match_operand:SI 2 "const_int_operand")])))]
2838 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
2839 {
2840 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2841 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
06b3ba23
RS
2842 return (which_alternative == 0
2843 ? "ext\t%0.b, %0.b, %0.b, #%2"
2844 : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2");
915d28fe 2845 }
06b3ba23 2846 [(set_attr "movprfx" "*,yes")]
43cacb12
RS
2847)
2848
915d28fe
RS
2849;; -------------------------------------------------------------------------
2850;; ---- [INT,FP] Extract active element
2851;; -------------------------------------------------------------------------
2852;; Includes:
624d0f07 2853;; - LASTA
915d28fe
RS
2854;; - LASTB
2855;; -------------------------------------------------------------------------
2856
2857;; Extract the last active element of operand 1 into operand 0.
2858;; If no elements are active, extract the last inactive element instead.
624d0f07
RS
2859(define_insn "@extract_<last_op>_<mode>"
2860 [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
915d28fe
RS
2861 (unspec:<VEL>
2862 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c 2863 (match_operand:SVE_FULL 2 "register_operand" "w, w")]
624d0f07 2864 LAST))]
43cacb12 2865 "TARGET_SVE"
915d28fe 2866 "@
624d0f07
RS
2867 last<ab>\t%<vwcore>0, %1, %2.<Vetype>
2868 last<ab>\t%<Vetype>0, %1, %2.<Vetype>"
43cacb12
RS
2869)
2870
915d28fe
RS
2871;; -------------------------------------------------------------------------
2872;; ---- [PRED] Extract index
2873;; -------------------------------------------------------------------------
2874;; The patterns in this section are synthetic.
2875;; -------------------------------------------------------------------------
2876
2877;; Handle extractions from a predicate by converting to an integer vector
2878;; and extracting from there.
2879(define_expand "vec_extract<vpred><Vel>"
2880 [(match_operand:<VEL> 0 "register_operand")
2881 (match_operand:<VPRED> 1 "register_operand")
2882 (match_operand:SI 2 "nonmemory_operand")
2883 ;; Dummy operand to which we can attach the iterator.
f75cdd2c 2884 (reg:SVE_FULL_I V0_REGNUM)]
43cacb12 2885 "TARGET_SVE"
915d28fe
RS
2886 {
2887 rtx tmp = gen_reg_rtx (<MODE>mode);
d29f7dd5
RS
2888 emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
2889 CONST1_RTX (<MODE>mode),
2890 CONST0_RTX (<MODE>mode)));
915d28fe
RS
2891 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
2892 DONE;
2893 }
43cacb12
RS
2894)
2895
915d28fe
RS
2896;; =========================================================================
2897;; == Unary arithmetic
2898;; =========================================================================
2899
2900;; -------------------------------------------------------------------------
2901;; ---- [INT] General unary arithmetic corresponding to rtx codes
2902;; -------------------------------------------------------------------------
2903;; Includes:
2904;; - ABS
bca5a997
RS
2905;; - CLS (= clrsb)
2906;; - CLZ
915d28fe
RS
2907;; - CNT (= popcount)
2908;; - NEG
2909;; - NOT
2910;; -------------------------------------------------------------------------
2911
2912;; Unpredicated integer unary arithmetic.
2913(define_expand "<optab><mode>2"
bb3ab62a
JR
2914 [(set (match_operand:SVE_I 0 "register_operand")
2915 (unspec:SVE_I
915d28fe 2916 [(match_dup 2)
bb3ab62a
JR
2917 (SVE_INT_UNARY:SVE_I
2918 (match_operand:SVE_I 1 "register_operand"))]
06308276 2919 UNSPEC_PRED_X))]
43cacb12 2920 "TARGET_SVE"
915d28fe
RS
2921 {
2922 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2923 }
43cacb12
RS
2924)
2925
915d28fe 2926;; Integer unary arithmetic predicated with a PTRUE.
624d0f07 2927(define_insn "@aarch64_pred_<optab><mode>"
a4d9837e 2928 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
bb3ab62a 2929 (unspec:SVE_I
a4d9837e 2930 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
bb3ab62a 2931 (SVE_INT_UNARY:SVE_I
a4d9837e 2932 (match_operand:SVE_I 2 "register_operand" "0, w"))]
06308276 2933 UNSPEC_PRED_X))]
43cacb12 2934 "TARGET_SVE"
a4d9837e
RS
2935 "@
2936 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2937 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2938 [(set_attr "movprfx" "*,yes")]
43cacb12
RS
2939)
2940
624d0f07
RS
2941;; Predicated integer unary arithmetic with merging.
2942(define_expand "@cond_<optab><mode>"
0f9d2c1a
RS
2943 [(set (match_operand:SVE_I 0 "register_operand")
2944 (unspec:SVE_I
624d0f07 2945 [(match_operand:<VPRED> 1 "register_operand")
0f9d2c1a
RS
2946 (SVE_INT_UNARY:SVE_I
2947 (match_operand:SVE_I 2 "register_operand"))
2948 (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")]
624d0f07
RS
2949 UNSPEC_SEL))]
2950 "TARGET_SVE"
2951)
2952
2953;; Predicated integer unary arithmetic, merging with the first input.
2954(define_insn "*cond_<optab><mode>_2"
0f9d2c1a
RS
2955 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
2956 (unspec:SVE_I
3c9f4963 2957 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
0f9d2c1a
RS
2958 (SVE_INT_UNARY:SVE_I
2959 (match_operand:SVE_I 2 "register_operand" "0, w"))
3c9f4963
RS
2960 (match_dup 2)]
2961 UNSPEC_SEL))]
2962 "TARGET_SVE"
2963 "@
2964 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
2965 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2966 [(set_attr "movprfx" "*,yes")]
2967)
2968
2969;; Predicated integer unary arithmetic, merging with an independent value.
2970;;
2971;; The earlyclobber isn't needed for the first alternative, but omitting
2972;; it would only help the case in which operands 2 and 3 are the same,
2973;; which is handled above rather than here. Marking all the alternatives
2974;; as earlyclobber helps to make the instruction more regular to the
2975;; register allocator.
2976(define_insn "*cond_<optab><mode>_any"
0f9d2c1a
RS
2977 [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w")
2978 (unspec:SVE_I
3c9f4963 2979 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
0f9d2c1a
RS
2980 (SVE_INT_UNARY:SVE_I
2981 (match_operand:SVE_I 2 "register_operand" "w, w, w"))
2982 (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3c9f4963
RS
2983 UNSPEC_SEL))]
2984 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
2985 "@
2986 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2987 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2988 movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2989 [(set_attr "movprfx" "*,yes,yes")]
2990)
2991
d7a09c44
RS
2992;; -------------------------------------------------------------------------
2993;; ---- [INT] General unary arithmetic corresponding to unspecs
2994;; -------------------------------------------------------------------------
2995;; Includes
624d0f07 2996;; - RBIT
d7a09c44
RS
2997;; - REVB
2998;; - REVH
2999;; - REVW
3000;; -------------------------------------------------------------------------
3001
3002;; Predicated integer unary operations.
3003(define_insn "@aarch64_pred_<optab><mode>"
a4d9837e 3004 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
f75cdd2c 3005 (unspec:SVE_FULL_I
a4d9837e 3006 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c 3007 (unspec:SVE_FULL_I
a4d9837e 3008 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")]
d7a09c44
RS
3009 SVE_INT_UNARY)]
3010 UNSPEC_PRED_X))]
3011 "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
a4d9837e
RS
3012 "@
3013 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3014 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3015 [(set_attr "movprfx" "*,yes")]
d7a09c44
RS
3016)
3017
6c3ce63b
RS
3018;; Another way of expressing the REVB, REVH and REVW patterns, with this
3019;; form being easier for permutes. The predicate mode determines the number
3020;; of lanes and the data mode decides the granularity of the reversal within
3021;; each lane.
3022(define_insn "@aarch64_sve_revbhw_<SVE_ALL:mode><PRED_HSD:mode>"
a4d9837e 3023 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w")
6c3ce63b 3024 (unspec:SVE_ALL
a4d9837e 3025 [(match_operand:PRED_HSD 1 "register_operand" "Upl, Upl")
6c3ce63b 3026 (unspec:SVE_ALL
a4d9837e 3027 [(match_operand:SVE_ALL 2 "register_operand" "0, w")]
6c3ce63b
RS
3028 UNSPEC_REVBHW)]
3029 UNSPEC_PRED_X))]
3030 "TARGET_SVE && <PRED_HSD:elem_bits> > <SVE_ALL:container_bits>"
a4d9837e
RS
3031 "@
3032 rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>
3033 movprfx\t%0, %2\;rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>"
3034 [(set_attr "movprfx" "*,yes")]
6c3ce63b
RS
3035)
3036
624d0f07
RS
3037;; Predicated integer unary operations with merging.
3038(define_insn "@cond_<optab><mode>"
f75cdd2c
RS
3039 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, ?&w")
3040 (unspec:SVE_FULL_I
624d0f07 3041 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
f75cdd2c
RS
3042 (unspec:SVE_FULL_I
3043 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")]
624d0f07 3044 SVE_INT_UNARY)
f75cdd2c 3045 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
624d0f07
RS
3046 UNSPEC_SEL))]
3047 "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
3048 "@
3049 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3050 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3051 movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3052 [(set_attr "movprfx" "*,yes,yes")]
3053)
3054
3055;; -------------------------------------------------------------------------
e58703e2 3056;; ---- [INT] Sign and zero extension
624d0f07
RS
3057;; -------------------------------------------------------------------------
3058;; Includes:
3059;; - SXTB
3060;; - SXTH
3061;; - SXTW
e58703e2
RS
3062;; - UXTB
3063;; - UXTH
3064;; - UXTW
624d0f07
RS
3065;; -------------------------------------------------------------------------
3066
e58703e2
RS
3067;; Unpredicated sign and zero extension from a narrower mode.
3068(define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
3069 [(set (match_operand:SVE_HSDI 0 "register_operand")
3070 (unspec:SVE_HSDI
3071 [(match_dup 2)
3072 (ANY_EXTEND:SVE_HSDI
3073 (match_operand:SVE_PARTIAL_I 1 "register_operand"))]
3074 UNSPEC_PRED_X))]
3075 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3076 {
3077 operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode);
3078 }
3079)
3080
3081;; Predicated sign and zero extension from a narrower mode.
3082(define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
a4d9837e 3083 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w, ?&w")
e58703e2 3084 (unspec:SVE_HSDI
a4d9837e 3085 [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
e58703e2 3086 (ANY_EXTEND:SVE_HSDI
a4d9837e 3087 (match_operand:SVE_PARTIAL_I 2 "register_operand" "0, w"))]
e58703e2
RS
3088 UNSPEC_PRED_X))]
3089 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
a4d9837e
RS
3090 "@
3091 <su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
3092 movprfx\t%0, %2\;<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>"
3093 [(set_attr "movprfx" "*,yes")]
e58703e2
RS
3094)
3095
3096;; Predicated truncate-and-sign-extend operations.
6544cb52 3097(define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
a4d9837e 3098 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
f75cdd2c 3099 (unspec:SVE_FULL_HSDI
a4d9837e 3100 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c 3101 (sign_extend:SVE_FULL_HSDI
6544cb52 3102 (truncate:SVE_PARTIAL_I
a4d9837e 3103 (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")))]
624d0f07 3104 UNSPEC_PRED_X))]
e58703e2
RS
3105 "TARGET_SVE
3106 && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
a4d9837e
RS
3107 "@
3108 sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3109 movprfx\t%0, %2\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
3110 [(set_attr "movprfx" "*,yes")]
624d0f07
RS
3111)
3112
e58703e2 3113;; Predicated truncate-and-sign-extend operations with merging.
6544cb52 3114(define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
f75cdd2c
RS
3115 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w")
3116 (unspec:SVE_FULL_HSDI
cc68f7c2 3117 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
f75cdd2c 3118 (sign_extend:SVE_FULL_HSDI
6544cb52 3119 (truncate:SVE_PARTIAL_I
f75cdd2c
RS
3120 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")))
3121 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
624d0f07 3122 UNSPEC_SEL))]
e58703e2
RS
3123 "TARGET_SVE
3124 && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
624d0f07 3125 "@
6544cb52
RS
3126 sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3127 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3128 movprfx\t%0, %3\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
624d0f07
RS
3129 [(set_attr "movprfx" "*,yes,yes")]
3130)
3131
e58703e2 3132;; Predicated truncate-and-zero-extend operations, merging with the
d113ece6 3133;; first input.
e58703e2
RS
3134;;
3135;; The canonical form of this operation is an AND of a constant rather
3136;; than (zero_extend (truncate ...)).
d113ece6 3137(define_insn "*cond_uxt<mode>_2"
f3c5d1fa
RS
3138 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3139 (unspec:SVE_I
d113ece6 3140 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f3c5d1fa
RS
3141 (and:SVE_I
3142 (match_operand:SVE_I 2 "register_operand" "0, w")
3143 (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate"))
d113ece6
RS
3144 (match_dup 2)]
3145 UNSPEC_SEL))]
3146 "TARGET_SVE"
3147 "@
3148 uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype>
3149 movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3150 [(set_attr "movprfx" "*,yes")]
3151)
3152
e58703e2 3153;; Predicated truncate-and-zero-extend operations, merging with an
d113ece6
RS
3154;; independent value.
3155;;
3156;; The earlyclobber isn't needed for the first alternative, but omitting
3157;; it would only help the case in which operands 2 and 4 are the same,
3158;; which is handled above rather than here. Marking all the alternatives
3159;; as early-clobber helps to make the instruction more regular to the
3160;; register allocator.
3161(define_insn "*cond_uxt<mode>_any"
f3c5d1fa
RS
3162 [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w")
3163 (unspec:SVE_I
d113ece6 3164 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
f3c5d1fa
RS
3165 (and:SVE_I
3166 (match_operand:SVE_I 2 "register_operand" "w, w, w")
3167 (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate"))
3168 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "0, Dz, w")]
d113ece6
RS
3169 UNSPEC_SEL))]
3170 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
3171 "@
3172 uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3173 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3174 movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3175 [(set_attr "movprfx" "*,yes,yes")]
3176)
3177
2d56600c
RS
3178;; -------------------------------------------------------------------------
3179;; ---- [INT] Truncation
3180;; -------------------------------------------------------------------------
3181;; The patterns in this section are synthetic.
3182;; -------------------------------------------------------------------------
3183
3184;; Truncate to a partial SVE vector from either a full vector or a
3185;; wider partial vector. This is a no-op, because we can just ignore
3186;; the unused upper bits of the source.
3187(define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
3188 [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w")
3189 (truncate:SVE_PARTIAL_I
3190 (match_operand:SVE_HSDI 1 "register_operand" "w")))]
3191 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3192 "#"
3193 "&& reload_completed"
3194 [(set (match_dup 0) (match_dup 1))]
3195 {
3196 operands[1] = aarch64_replace_reg_mode (operands[1],
3197 <SVE_PARTIAL_I:MODE>mode);
3198 }
3199)
3200
e0a0be93
RS
3201;; -------------------------------------------------------------------------
3202;; ---- [INT] Logical inverse
3203;; -------------------------------------------------------------------------
624d0f07
RS
3204;; Includes:
3205;; - CNOT
3206;; -------------------------------------------------------------------------
e0a0be93
RS
3207
3208;; Predicated logical inverse.
624d0f07 3209(define_expand "@aarch64_pred_cnot<mode>"
f75cdd2c
RS
3210 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3211 (unspec:SVE_FULL_I
624d0f07
RS
3212 [(unspec:<VPRED>
3213 [(match_operand:<VPRED> 1 "register_operand")
3214 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
3215 (eq:<VPRED>
f75cdd2c 3216 (match_operand:SVE_FULL_I 3 "register_operand")
624d0f07
RS
3217 (match_dup 4))]
3218 UNSPEC_PRED_Z)
3219 (match_dup 5)
3220 (match_dup 4)]
3221 UNSPEC_SEL))]
3222 "TARGET_SVE"
3223 {
3224 operands[4] = CONST0_RTX (<MODE>mode);
3225 operands[5] = CONST1_RTX (<MODE>mode);
3226 }
3227)
3228
e0a0be93 3229(define_insn "*cnot<mode>"
5fe3e6bf
RS
3230 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3231 (unspec:SVE_I
e0a0be93 3232 [(unspec:<VPRED>
a4d9837e 3233 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
e0a0be93
RS
3234 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
3235 (eq:<VPRED>
5fe3e6bf
RS
3236 (match_operand:SVE_I 2 "register_operand" "0, w")
3237 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
e0a0be93 3238 UNSPEC_PRED_Z)
5fe3e6bf 3239 (match_operand:SVE_I 4 "aarch64_simd_imm_one")
e0a0be93
RS
3240 (match_dup 3)]
3241 UNSPEC_SEL))]
3242 "TARGET_SVE"
a4d9837e
RS
3243 "@
3244 cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3245 movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3246 [(set_attr "movprfx" "*,yes")]
e0a0be93
RS
3247)
3248
624d0f07
RS
3249;; Predicated logical inverse with merging.
3250(define_expand "@cond_cnot<mode>"
f75cdd2c
RS
3251 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3252 (unspec:SVE_FULL_I
624d0f07 3253 [(match_operand:<VPRED> 1 "register_operand")
f75cdd2c 3254 (unspec:SVE_FULL_I
624d0f07
RS
3255 [(unspec:<VPRED>
3256 [(match_dup 4)
3257 (const_int SVE_KNOWN_PTRUE)
3258 (eq:<VPRED>
f75cdd2c 3259 (match_operand:SVE_FULL_I 2 "register_operand")
624d0f07
RS
3260 (match_dup 5))]
3261 UNSPEC_PRED_Z)
3262 (match_dup 6)
3263 (match_dup 5)]
3264 UNSPEC_SEL)
f75cdd2c 3265 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
624d0f07
RS
3266 UNSPEC_SEL))]
3267 "TARGET_SVE"
3268 {
3269 operands[4] = CONSTM1_RTX (<VPRED>mode);
3270 operands[5] = CONST0_RTX (<MODE>mode);
3271 operands[6] = CONST1_RTX (<MODE>mode);
3272 }
3273)
3274
e0a0be93
RS
3275;; Predicated logical inverse, merging with the first input.
3276(define_insn_and_rewrite "*cond_cnot<mode>_2"
5fe3e6bf
RS
3277 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3278 (unspec:SVE_I
e0a0be93
RS
3279 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3280 ;; Logical inverse of operand 2 (as above).
5fe3e6bf 3281 (unspec:SVE_I
e0a0be93
RS
3282 [(unspec:<VPRED>
3283 [(match_operand 5)
3284 (const_int SVE_KNOWN_PTRUE)
3285 (eq:<VPRED>
5fe3e6bf
RS
3286 (match_operand:SVE_I 2 "register_operand" "0, w")
3287 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
e0a0be93 3288 UNSPEC_PRED_Z)
5fe3e6bf 3289 (match_operand:SVE_I 4 "aarch64_simd_imm_one")
e0a0be93
RS
3290 (match_dup 3)]
3291 UNSPEC_SEL)
3292 (match_dup 2)]
3293 UNSPEC_SEL))]
3294 "TARGET_SVE"
3295 "@
3296 cnot\t%0.<Vetype>, %1/m, %0.<Vetype>
3297 movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3298 "&& !CONSTANT_P (operands[5])"
3299 {
3300 operands[5] = CONSTM1_RTX (<VPRED>mode);
3301 }
3302 [(set_attr "movprfx" "*,yes")]
3303)
3304
3305;; Predicated logical inverse, merging with an independent value.
3306;;
3307;; The earlyclobber isn't needed for the first alternative, but omitting
3308;; it would only help the case in which operands 2 and 6 are the same,
3309;; which is handled above rather than here. Marking all the alternatives
3310;; as earlyclobber helps to make the instruction more regular to the
3311;; register allocator.
3312(define_insn_and_rewrite "*cond_cnot<mode>_any"
5fe3e6bf
RS
3313 [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w")
3314 (unspec:SVE_I
e0a0be93
RS
3315 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3316 ;; Logical inverse of operand 2 (as above).
5fe3e6bf 3317 (unspec:SVE_I
e0a0be93
RS
3318 [(unspec:<VPRED>
3319 [(match_operand 5)
3320 (const_int SVE_KNOWN_PTRUE)
3321 (eq:<VPRED>
5fe3e6bf
RS
3322 (match_operand:SVE_I 2 "register_operand" "w, w, w")
3323 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
e0a0be93 3324 UNSPEC_PRED_Z)
5fe3e6bf 3325 (match_operand:SVE_I 4 "aarch64_simd_imm_one")
e0a0be93
RS
3326 (match_dup 3)]
3327 UNSPEC_SEL)
5fe3e6bf 3328 (match_operand:SVE_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")]
e0a0be93
RS
3329 UNSPEC_SEL))]
3330 "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])"
3331 "@
3332 cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3333 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3334 movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3335 "&& !CONSTANT_P (operands[5])"
3336 {
3337 operands[5] = CONSTM1_RTX (<VPRED>mode);
3338 }
3339 [(set_attr "movprfx" "*,yes,yes")]
3340)
3341
624d0f07
RS
3342;; -------------------------------------------------------------------------
3343;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
3344;; -------------------------------------------------------------------------
3345;; Includes:
3346;; - FEXPA
3347;; -------------------------------------------------------------------------
3348
3349;; Unpredicated unary operations that take an integer and return a float.
3350(define_insn "@aarch64_sve_<optab><mode>"
f75cdd2c
RS
3351 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3352 (unspec:SVE_FULL_F
3353 [(match_operand:<V_INT_EQUIV> 1 "register_operand" "w")]
3354 SVE_FP_UNARY_INT))]
624d0f07
RS
3355 "TARGET_SVE"
3356 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3357)
3358
915d28fe 3359;; -------------------------------------------------------------------------
d45b20a5 3360;; ---- [FP] General unary arithmetic corresponding to unspecs
915d28fe
RS
3361;; -------------------------------------------------------------------------
3362;; Includes:
3363;; - FABS
3364;; - FNEG
624d0f07
RS
3365;; - FRECPE
3366;; - FRECPX
915d28fe
RS
3367;; - FRINTA
3368;; - FRINTI
3369;; - FRINTM
3370;; - FRINTN
3371;; - FRINTP
3372;; - FRINTX
3373;; - FRINTZ
a0ee8352 3374;; - FRSQRTE
d45b20a5 3375;; - FSQRT
915d28fe
RS
3376;; -------------------------------------------------------------------------
3377
624d0f07
RS
3378;; Unpredicated floating-point unary operations.
3379(define_insn "@aarch64_sve_<optab><mode>"
f75cdd2c
RS
3380 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3381 (unspec:SVE_FULL_F
3382 [(match_operand:SVE_FULL_F 1 "register_operand" "w")]
3383 SVE_FP_UNARY))]
624d0f07
RS
3384 "TARGET_SVE"
3385 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3386)
3387
d45b20a5
RS
3388;; Unpredicated floating-point unary operations.
3389(define_expand "<optab><mode>2"
f75cdd2c
RS
3390 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3391 (unspec:SVE_FULL_F
915d28fe 3392 [(match_dup 2)
c9c5a809 3393 (const_int SVE_RELAXED_GP)
f75cdd2c 3394 (match_operand:SVE_FULL_F 1 "register_operand")]
a0ee8352 3395 SVE_COND_FP_UNARY_OPTAB))]
915d28fe
RS
3396 "TARGET_SVE"
3397 {
3398 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3399 }
3400)
3401
d45b20a5 3402;; Predicated floating-point unary operations.
624d0f07 3403(define_insn "@aarch64_pred_<optab><mode>"
a4d9837e 3404 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
f75cdd2c 3405 (unspec:SVE_FULL_F
a4d9837e 3406 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
c9c5a809 3407 (match_operand:SI 3 "aarch64_sve_gp_strictness")
a4d9837e 3408 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
d45b20a5 3409 SVE_COND_FP_UNARY))]
915d28fe 3410 "TARGET_SVE"
a4d9837e
RS
3411 "@
3412 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3413 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3414 [(set_attr "movprfx" "*,yes")]
915d28fe
RS
3415)
3416
624d0f07
RS
3417;; Predicated floating-point unary arithmetic with merging.
3418(define_expand "@cond_<optab><mode>"
f75cdd2c
RS
3419 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3420 (unspec:SVE_FULL_F
624d0f07 3421 [(match_operand:<VPRED> 1 "register_operand")
f75cdd2c 3422 (unspec:SVE_FULL_F
624d0f07
RS
3423 [(match_dup 1)
3424 (const_int SVE_STRICT_GP)
f75cdd2c 3425 (match_operand:SVE_FULL_F 2 "register_operand")]
624d0f07 3426 SVE_COND_FP_UNARY)
f75cdd2c 3427 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
624d0f07
RS
3428 UNSPEC_SEL))]
3429 "TARGET_SVE"
3430)
3431
b21f7d53 3432;; Predicated floating-point unary arithmetic, merging with the first input.
0eb5e901 3433(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
f75cdd2c
RS
3434 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
3435 (unspec:SVE_FULL_F
b21f7d53 3436 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c 3437 (unspec:SVE_FULL_F
b21f7d53 3438 [(match_operand 3)
0eb5e901 3439 (const_int SVE_RELAXED_GP)
f75cdd2c 3440 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
b21f7d53
RS
3441 SVE_COND_FP_UNARY)
3442 (match_dup 2)]
3443 UNSPEC_SEL))]
0eb5e901 3444 "TARGET_SVE"
b21f7d53
RS
3445 "@
3446 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3447 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3448 "&& !rtx_equal_p (operands[1], operands[3])"
3449 {
3450 operands[3] = copy_rtx (operands[1]);
3451 }
3452 [(set_attr "movprfx" "*,yes")]
3453)
3454
0eb5e901
RS
3455(define_insn "*cond_<optab><mode>_2_strict"
3456 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
3457 (unspec:SVE_FULL_F
3458 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3459 (unspec:SVE_FULL_F
3460 [(match_dup 1)
3461 (const_int SVE_STRICT_GP)
3462 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
3463 SVE_COND_FP_UNARY)
3464 (match_dup 2)]
3465 UNSPEC_SEL))]
3466 "TARGET_SVE"
3467 "@
3468 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3469 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3470 [(set_attr "movprfx" "*,yes")]
3471)
3472
b21f7d53
RS
3473;; Predicated floating-point unary arithmetic, merging with an independent
3474;; value.
3475;;
3476;; The earlyclobber isn't needed for the first alternative, but omitting
3477;; it would only help the case in which operands 2 and 3 are the same,
3478;; which is handled above rather than here. Marking all the alternatives
3479;; as earlyclobber helps to make the instruction more regular to the
3480;; register allocator.
0eb5e901 3481(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
f75cdd2c
RS
3482 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
3483 (unspec:SVE_FULL_F
b21f7d53 3484 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
f75cdd2c 3485 (unspec:SVE_FULL_F
b21f7d53 3486 [(match_operand 4)
0eb5e901 3487 (const_int SVE_RELAXED_GP)
f75cdd2c 3488 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
b21f7d53 3489 SVE_COND_FP_UNARY)
f75cdd2c 3490 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
b21f7d53 3491 UNSPEC_SEL))]
0eb5e901 3492 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
b21f7d53
RS
3493 "@
3494 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3495 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3496 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3497 "&& !rtx_equal_p (operands[1], operands[4])"
3498 {
3499 operands[4] = copy_rtx (operands[1]);
3500 }
3501 [(set_attr "movprfx" "*,yes,yes")]
3502)
3503
0eb5e901
RS
3504(define_insn "*cond_<optab><mode>_any_strict"
3505 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
3506 (unspec:SVE_FULL_F
3507 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3508 (unspec:SVE_FULL_F
3509 [(match_dup 1)
3510 (const_int SVE_STRICT_GP)
3511 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
3512 SVE_COND_FP_UNARY)
3513 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3514 UNSPEC_SEL))]
3515 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
3516 "@
3517 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3518 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3519 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3520 [(set_attr "movprfx" "*,yes,yes")]
3521)
3522
a0ee8352
RS
3523;; -------------------------------------------------------------------------
3524;; ---- [FP] Square root
3525;; -------------------------------------------------------------------------
3526
3527(define_expand "sqrt<mode>2"
3528 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3529 (unspec:SVE_FULL_F
3530 [(match_dup 2)
3531 (const_int SVE_RELAXED_GP)
3532 (match_operand:SVE_FULL_F 1 "register_operand")]
3533 UNSPEC_COND_FSQRT))]
3534 "TARGET_SVE"
3535{
3536 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
3537 DONE;
3538 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3539})
3540
3541;; -------------------------------------------------------------------------
3542;; ---- [FP] Reciprocal square root
3543;; -------------------------------------------------------------------------
3544
3545(define_expand "rsqrt<mode>2"
3546 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3547 (unspec:SVE_FULL_SDF
3548 [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3549 UNSPEC_RSQRT))]
3550 "TARGET_SVE"
3551{
3552 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
3553 DONE;
3554})
3555
3556(define_expand "@aarch64_rsqrte<mode>"
3557 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3558 (unspec:SVE_FULL_SDF
3559 [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3560 UNSPEC_RSQRTE))]
3561 "TARGET_SVE"
3562)
3563
3564(define_expand "@aarch64_rsqrts<mode>"
3565 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3566 (unspec:SVE_FULL_SDF
3567 [(match_operand:SVE_FULL_SDF 1 "register_operand")
3568 (match_operand:SVE_FULL_SDF 2 "register_operand")]
3569 UNSPEC_RSQRTS))]
3570 "TARGET_SVE"
3571)
3572
915d28fe
RS
3573;; -------------------------------------------------------------------------
3574;; ---- [PRED] Inverse
3575;; -------------------------------------------------------------------------
3576;; Includes:
3577;; - NOT
3578;; -------------------------------------------------------------------------
3579
3580;; Unpredicated predicate inverse.
3581(define_expand "one_cmpl<mode>2"
3582 [(set (match_operand:PRED_ALL 0 "register_operand")
3583 (and:PRED_ALL
3584 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
3585 (match_dup 2)))]
3586 "TARGET_SVE"
3587 {
3588 operands[2] = aarch64_ptrue_reg (<MODE>mode);
3589 }
3590)
3591
3592;; Predicated predicate inverse.
3593(define_insn "*one_cmpl<mode>3"
3594 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3595 (and:PRED_ALL
3596 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
3597 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
3598 "TARGET_SVE"
3599 "not\t%0.b, %1/z, %2.b"
3600)
3601
3602;; =========================================================================
3603;; == Binary arithmetic
3604;; =========================================================================
3605
3606;; -------------------------------------------------------------------------
3607;; ---- [INT] General binary arithmetic corresponding to rtx codes
3608;; -------------------------------------------------------------------------
f8c22a8b
RS
3609;; Includes:
3610;; - ADD (merging form only)
3611;; - AND (merging form only)
20103c0e 3612;; - ASR (merging form only)
f8c22a8b 3613;; - EOR (merging form only)
20103c0e
RS
3614;; - LSL (merging form only)
3615;; - LSR (merging form only)
915d28fe 3616;; - MUL
f8c22a8b 3617;; - ORR (merging form only)
915d28fe
RS
3618;; - SMAX
3619;; - SMIN
0a09a948
RS
3620;; - SQADD (SVE2 merging form only)
3621;; - SQSUB (SVE2 merging form only)
f8c22a8b 3622;; - SUB (merging form only)
915d28fe
RS
3623;; - UMAX
3624;; - UMIN
0a09a948
RS
3625;; - UQADD (SVE2 merging form only)
3626;; - UQSUB (SVE2 merging form only)
915d28fe
RS
3627;; -------------------------------------------------------------------------
3628
f8c22a8b
RS
3629;; Unpredicated integer binary operations that have an immediate form.
3630(define_expand "<optab><mode>3"
48c7f5b8
RS
3631 [(set (match_operand:SVE_I 0 "register_operand")
3632 (unspec:SVE_I
f8c22a8b 3633 [(match_dup 3)
48c7f5b8
RS
3634 (SVE_INT_BINARY_IMM:SVE_I
3635 (match_operand:SVE_I 1 "register_operand")
3636 (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
f8c22a8b
RS
3637 UNSPEC_PRED_X))]
3638 "TARGET_SVE"
3639 {
3640 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3641 }
3642)
3643
3644;; Integer binary operations that have an immediate form, predicated
3645;; with a PTRUE. We don't actually need the predicate for the first
3646;; and third alternatives, but using Upa or X isn't likely to gain much
3647;; and would make the instruction seem less uniform to the register
3648;; allocator.
624d0f07 3649(define_insn_and_split "@aarch64_pred_<optab><mode>"
48c7f5b8
RS
3650 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w, ?&w")
3651 (unspec:SVE_I
f8c22a8b 3652 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
48c7f5b8
RS
3653 (SVE_INT_BINARY_IMM:SVE_I
3654 (match_operand:SVE_I 2 "register_operand" "%0, 0, w, w")
3655 (match_operand:SVE_I 3 "aarch64_sve_<sve_imm_con>_operand" "<sve_imm_con>, w, <sve_imm_con>, w"))]
f8c22a8b
RS
3656 UNSPEC_PRED_X))]
3657 "TARGET_SVE"
3658 "@
3659 #
3660 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3661 #
3662 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3663 ; Split the unpredicated form after reload, so that we don't have
3664 ; the unnecessary PTRUE.
3665 "&& reload_completed
3666 && !register_operand (operands[3], <MODE>mode)"
f75cdd2c 3667 [(set (match_dup 0)
48c7f5b8 3668 (SVE_INT_BINARY_IMM:SVE_I (match_dup 2) (match_dup 3)))]
f8c22a8b
RS
3669 ""
3670 [(set_attr "movprfx" "*,*,yes,yes")]
3671)
3672
3673;; Unpredicated binary operations with a constant (post-RA only).
3674;; These are generated by splitting a predicated instruction whose
3675;; predicate is unused.
3676(define_insn "*post_ra_<optab><mode>3"
48c7f5b8
RS
3677 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3678 (SVE_INT_BINARY_IMM:SVE_I
3679 (match_operand:SVE_I 1 "register_operand" "0, w")
3680 (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_immediate")))]
f8c22a8b
RS
3681 "TARGET_SVE && reload_completed"
3682 "@
3683 <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2
3684 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2"
3685 [(set_attr "movprfx" "*,yes")]
3686)
3687
915d28fe 3688;; Predicated integer operations with merging.
b6c3aea1 3689(define_expand "@cond_<optab><mode>"
ab76e3db
RS
3690 [(set (match_operand:SVE_I 0 "register_operand")
3691 (unspec:SVE_I
915d28fe 3692 [(match_operand:<VPRED> 1 "register_operand")
ab76e3db
RS
3693 (SVE_INT_BINARY:SVE_I
3694 (match_operand:SVE_I 2 "register_operand")
3695 (match_operand:SVE_I 3 "<sve_pred_int_rhs2_operand>"))
3696 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
915d28fe
RS
3697 UNSPEC_SEL))]
3698 "TARGET_SVE"
3699)
3700
3701;; Predicated integer operations, merging with the first input.
3702(define_insn "*cond_<optab><mode>_2"
ab76e3db
RS
3703 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3704 (unspec:SVE_I
915d28fe 3705 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
ab76e3db
RS
3706 (SVE_INT_BINARY:SVE_I
3707 (match_operand:SVE_I 2 "register_operand" "0, w")
3708 (match_operand:SVE_I 3 "register_operand" "w, w"))
915d28fe
RS
3709 (match_dup 2)]
3710 UNSPEC_SEL))]
3711 "TARGET_SVE"
3712 "@
3713 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3714 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3715 [(set_attr "movprfx" "*,yes")]
3716)
3717
3718;; Predicated integer operations, merging with the second input.
3719(define_insn "*cond_<optab><mode>_3"
ab76e3db
RS
3720 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3721 (unspec:SVE_I
915d28fe 3722 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
ab76e3db
RS
3723 (SVE_INT_BINARY:SVE_I
3724 (match_operand:SVE_I 2 "register_operand" "w, w")
3725 (match_operand:SVE_I 3 "register_operand" "0, w"))
915d28fe
RS
3726 (match_dup 3)]
3727 UNSPEC_SEL))]
3728 "TARGET_SVE"
3729 "@
3730 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3731 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
3732 [(set_attr "movprfx" "*,yes")]
3733)
3734
3735;; Predicated integer operations, merging with an independent value.
3736(define_insn_and_rewrite "*cond_<optab><mode>_any"
ab76e3db
RS
3737 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
3738 (unspec:SVE_I
915d28fe 3739 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
ab76e3db
RS
3740 (SVE_INT_BINARY:SVE_I
3741 (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
3742 (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))
3743 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
915d28fe 3744 UNSPEC_SEL))]
43cacb12 3745 "TARGET_SVE
915d28fe
RS
3746 && !rtx_equal_p (operands[2], operands[4])
3747 && !rtx_equal_p (operands[3], operands[4])"
3748 "@
3749 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3750 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3751 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3752 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3753 #"
3754 "&& reload_completed
3755 && register_operand (operands[4], <MODE>mode)
3756 && !rtx_equal_p (operands[0], operands[4])"
43cacb12 3757 {
915d28fe
RS
3758 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
3759 operands[4], operands[1]));
3760 operands[4] = operands[2] = operands[0];
43cacb12 3761 }
915d28fe 3762 [(set_attr "movprfx" "yes")]
43cacb12
RS
3763)
3764
915d28fe
RS
3765;; -------------------------------------------------------------------------
3766;; ---- [INT] Addition
3767;; -------------------------------------------------------------------------
3768;; Includes:
3769;; - ADD
3770;; - DECB
3771;; - DECD
3772;; - DECH
3773;; - DECW
3774;; - INCB
3775;; - INCD
3776;; - INCH
3777;; - INCW
3778;; - SUB
3779;; -------------------------------------------------------------------------
3780
43cacb12 3781(define_insn "add<mode>3"
cc68f7c2
RS
3782 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?w, ?w, w")
3783 (plus:SVE_I
3784 (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w, w, w")
3785 (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))]
43cacb12
RS
3786 "TARGET_SVE"
3787 "@
3788 add\t%0.<Vetype>, %0.<Vetype>, #%D2
3789 sub\t%0.<Vetype>, %0.<Vetype>, #%N2
0fdc30bc 3790 * return aarch64_output_sve_vector_inc_dec (\"%0.<Vetype>\", operands[2]);
5e176a61
RS
3791 movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
3792 movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2
43cacb12 3793 add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
5e176a61 3794 [(set_attr "movprfx" "*,*,*,yes,yes,*")]
43cacb12
RS
3795)
3796
915d28fe
RS
3797;; Merging forms are handled through SVE_INT_BINARY.
3798
3799;; -------------------------------------------------------------------------
3800;; ---- [INT] Subtraction
3801;; -------------------------------------------------------------------------
3802;; Includes:
3803;; - SUB
3804;; - SUBR
3805;; -------------------------------------------------------------------------
3806
43cacb12 3807(define_insn "sub<mode>3"
9623f61b
BL
3808 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
3809 (minus:SVE_I
3810 (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa, vsa")
3811 (match_operand:SVE_I 2 "register_operand" "w, 0, w")))]
43cacb12
RS
3812 "TARGET_SVE"
3813 "@
3814 sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
5e176a61
RS
3815 subr\t%0.<Vetype>, %0.<Vetype>, #%D1
3816 movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
3817 [(set_attr "movprfx" "*,*,yes")]
43cacb12
RS
3818)
3819
915d28fe
RS
3820;; Merging forms are handled through SVE_INT_BINARY.
3821
a229966c
RS
3822;; -------------------------------------------------------------------------
3823;; ---- [INT] Take address
3824;; -------------------------------------------------------------------------
3825;; Includes:
3826;; - ADR
3827;; -------------------------------------------------------------------------
3828
624d0f07
RS
3829;; An unshifted and unscaled ADR. This is functionally equivalent to an ADD,
3830;; but the svadrb intrinsics should preserve the user's choice.
3831(define_insn "@aarch64_adr<mode>"
f75cdd2c
RS
3832 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
3833 (unspec:SVE_FULL_SDI
3834 [(match_operand:SVE_FULL_SDI 1 "register_operand" "w")
3835 (match_operand:SVE_FULL_SDI 2 "register_operand" "w")]
624d0f07
RS
3836 UNSPEC_ADR))]
3837 "TARGET_SVE"
3838 "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>]"
3839)
3840
3841;; Same, but with the offset being sign-extended from the low 32 bits.
3842(define_insn_and_rewrite "*aarch64_adr_sxtw"
3843 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3844 (unspec:VNx2DI
3845 [(match_operand:VNx2DI 1 "register_operand" "w")
3846 (unspec:VNx2DI
3847 [(match_operand 3)
3848 (sign_extend:VNx2DI
3849 (truncate:VNx2SI
3850 (match_operand:VNx2DI 2 "register_operand" "w")))]
3851 UNSPEC_PRED_X)]
3852 UNSPEC_ADR))]
3853 "TARGET_SVE"
3854 "adr\t%0.d, [%1.d, %2.d, sxtw]"
3855 "&& !CONSTANT_P (operands[3])"
3856 {
3857 operands[3] = CONSTM1_RTX (VNx2BImode);
3858 }
3859)
3860
3861;; Same, but with the offset being zero-extended from the low 32 bits.
3862(define_insn "*aarch64_adr_uxtw_unspec"
3863 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3864 (unspec:VNx2DI
3865 [(match_operand:VNx2DI 1 "register_operand" "w")
3866 (and:VNx2DI
3867 (match_operand:VNx2DI 2 "register_operand" "w")
3868 (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))]
3869 UNSPEC_ADR))]
3870 "TARGET_SVE"
3871 "adr\t%0.d, [%1.d, %2.d, uxtw]"
3872)
3873
3874;; Same, matching as a PLUS rather than unspec.
3875(define_insn "*aarch64_adr_uxtw_and"
a229966c
RS
3876 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3877 (plus:VNx2DI
3878 (and:VNx2DI
3879 (match_operand:VNx2DI 2 "register_operand" "w")
3880 (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))
3881 (match_operand:VNx2DI 1 "register_operand" "w")))]
3882 "TARGET_SVE"
3883 "adr\t%0.d, [%1.d, %2.d, uxtw]"
3884)
3885
3886;; ADR with a nonzero shift.
624d0f07 3887(define_expand "@aarch64_adr<mode>_shift"
f75cdd2c
RS
3888 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
3889 (plus:SVE_FULL_SDI
3890 (unspec:SVE_FULL_SDI
624d0f07 3891 [(match_dup 4)
f75cdd2c
RS
3892 (ashift:SVE_FULL_SDI
3893 (match_operand:SVE_FULL_SDI 2 "register_operand")
3894 (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
624d0f07 3895 UNSPEC_PRED_X)
f75cdd2c 3896 (match_operand:SVE_FULL_SDI 1 "register_operand")))]
624d0f07
RS
3897 "TARGET_SVE"
3898 {
3899 operands[4] = CONSTM1_RTX (<VPRED>mode);
3900 }
3901)
3902
a229966c 3903(define_insn_and_rewrite "*aarch64_adr<mode>_shift"
3f8b0bba
RS
3904 [(set (match_operand:SVE_24I 0 "register_operand" "=w")
3905 (plus:SVE_24I
3906 (unspec:SVE_24I
a229966c 3907 [(match_operand 4)
3f8b0bba
RS
3908 (ashift:SVE_24I
3909 (match_operand:SVE_24I 2 "register_operand" "w")
3910 (match_operand:SVE_24I 3 "const_1_to_3_operand"))]
a229966c 3911 UNSPEC_PRED_X)
3f8b0bba 3912 (match_operand:SVE_24I 1 "register_operand" "w")))]
a229966c 3913 "TARGET_SVE"
3f8b0bba 3914 "adr\t%0.<Vctype>, [%1.<Vctype>, %2.<Vctype>, lsl %3]"
a229966c
RS
3915 "&& !CONSTANT_P (operands[4])"
3916 {
3917 operands[4] = CONSTM1_RTX (<VPRED>mode);
3918 }
3919)
3920
624d0f07
RS
3921;; Same, but with the index being sign-extended from the low 32 bits.
3922(define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
3923 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3924 (plus:VNx2DI
3925 (unspec:VNx2DI
3926 [(match_operand 4)
3927 (ashift:VNx2DI
3928 (unspec:VNx2DI
3929 [(match_operand 5)
3930 (sign_extend:VNx2DI
3931 (truncate:VNx2SI
3932 (match_operand:VNx2DI 2 "register_operand" "w")))]
3933 UNSPEC_PRED_X)
3934 (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3935 UNSPEC_PRED_X)
3936 (match_operand:VNx2DI 1 "register_operand" "w")))]
3937 "TARGET_SVE"
3938 "adr\t%0.d, [%1.d, %2.d, sxtw %3]"
3939 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
3940 {
3941 operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
3942 }
3943)
3944
a229966c
RS
3945;; Same, but with the index being zero-extended from the low 32 bits.
3946(define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
3947 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3948 (plus:VNx2DI
3949 (unspec:VNx2DI
3950 [(match_operand 5)
3951 (ashift:VNx2DI
3952 (and:VNx2DI
3953 (match_operand:VNx2DI 2 "register_operand" "w")
3954 (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
3955 (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3956 UNSPEC_PRED_X)
3957 (match_operand:VNx2DI 1 "register_operand" "w")))]
3958 "TARGET_SVE"
3959 "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
3960 "&& !CONSTANT_P (operands[5])"
3961 {
3962 operands[5] = CONSTM1_RTX (VNx2BImode);
3963 }
3964)
3965
915d28fe
RS
3966;; -------------------------------------------------------------------------
3967;; ---- [INT] Absolute difference
3968;; -------------------------------------------------------------------------
3969;; Includes:
3970;; - SABD
3971;; - UABD
3972;; -------------------------------------------------------------------------
3973
3974;; Unpredicated integer absolute difference.
3975(define_expand "<su>abd<mode>_3"
907ea379
RS
3976 [(use (match_operand:SVE_I 0 "register_operand"))
3977 (USMAX:SVE_I
3978 (match_operand:SVE_I 1 "register_operand")
3979 (match_operand:SVE_I 2 "register_operand"))]
915d28fe
RS
3980 "TARGET_SVE"
3981 {
3982 rtx pred = aarch64_ptrue_reg (<VPRED>mode);
624d0f07
RS
3983 emit_insn (gen_aarch64_pred_<su>abd<mode> (operands[0], pred, operands[1],
3984 operands[2]));
915d28fe
RS
3985 DONE;
3986 }
3987)
3988
3989;; Predicated integer absolute difference.
624d0f07 3990(define_insn "@aarch64_pred_<su>abd<mode>"
907ea379
RS
3991 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3992 (minus:SVE_I
3993 (unspec:SVE_I
3994 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3995 (USMAX:SVE_I
3996 (match_operand:SVE_I 2 "register_operand" "%0, w")
3997 (match_operand:SVE_I 3 "register_operand" "w, w"))]
3998 UNSPEC_PRED_X)
3999 (unspec:SVE_I
4000 [(match_dup 1)
4001 (<max_opp>:SVE_I
915d28fe 4002 (match_dup 2)
907ea379
RS
4003 (match_dup 3))]
4004 UNSPEC_PRED_X)))]
915d28fe
RS
4005 "TARGET_SVE"
4006 "@
4007 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4008 movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4009 [(set_attr "movprfx" "*,yes")]
4010)
4011
624d0f07 4012(define_expand "@aarch64_cond_<su>abd<mode>"
f75cdd2c
RS
4013 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4014 (unspec:SVE_FULL_I
624d0f07 4015 [(match_operand:<VPRED> 1 "register_operand")
f75cdd2c
RS
4016 (minus:SVE_FULL_I
4017 (unspec:SVE_FULL_I
624d0f07 4018 [(match_dup 1)
f75cdd2c
RS
4019 (USMAX:SVE_FULL_I
4020 (match_operand:SVE_FULL_I 2 "register_operand")
4021 (match_operand:SVE_FULL_I 3 "register_operand"))]
624d0f07 4022 UNSPEC_PRED_X)
f75cdd2c 4023 (unspec:SVE_FULL_I
624d0f07 4024 [(match_dup 1)
f75cdd2c 4025 (<max_opp>:SVE_FULL_I
624d0f07
RS
4026 (match_dup 2)
4027 (match_dup 3))]
4028 UNSPEC_PRED_X))
f75cdd2c 4029 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
624d0f07
RS
4030 UNSPEC_SEL))]
4031 "TARGET_SVE"
4032{
4033 if (rtx_equal_p (operands[3], operands[4]))
4034 std::swap (operands[2], operands[3]);
4035})
4036
9730c5cc
RS
4037;; Predicated integer absolute difference, merging with the first input.
4038(define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2"
907ea379
RS
4039 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4040 (unspec:SVE_I
9730c5cc 4041 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
907ea379
RS
4042 (minus:SVE_I
4043 (unspec:SVE_I
9730c5cc 4044 [(match_operand 4)
907ea379
RS
4045 (USMAX:SVE_I
4046 (match_operand:SVE_I 2 "register_operand" "0, w")
4047 (match_operand:SVE_I 3 "register_operand" "w, w"))]
9730c5cc 4048 UNSPEC_PRED_X)
907ea379 4049 (unspec:SVE_I
9730c5cc 4050 [(match_operand 5)
907ea379 4051 (<max_opp>:SVE_I
9730c5cc
RS
4052 (match_dup 2)
4053 (match_dup 3))]
4054 UNSPEC_PRED_X))
4055 (match_dup 2)]
4056 UNSPEC_SEL))]
4057 "TARGET_SVE"
4058 "@
4059 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4060 movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4061 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
4062 {
4063 operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
4064 }
4065 [(set_attr "movprfx" "*,yes")]
4066)
4067
907ea379
RS
4068;; Predicated integer absolute difference, merging with the second input.
4069(define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_3"
4070 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4071 (unspec:SVE_I
4072 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4073 (minus:SVE_I
4074 (unspec:SVE_I
4075 [(match_operand 4)
4076 (USMAX:SVE_I
4077 (match_operand:SVE_I 2 "register_operand" "w, w")
4078 (match_operand:SVE_I 3 "register_operand" "0, w"))]
4079 UNSPEC_PRED_X)
4080 (unspec:SVE_I
4081 [(match_operand 5)
4082 (<max_opp>:SVE_I
4083 (match_dup 2)
4084 (match_dup 3))]
4085 UNSPEC_PRED_X))
4086 (match_dup 3)]
4087 UNSPEC_SEL))]
4088 "TARGET_SVE"
4089 "@
4090 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4091 movprfx\t%0, %3\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
4092 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
4093 {
4094 operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
4095 }
4096 [(set_attr "movprfx" "*,yes")]
4097)
4098
9730c5cc
RS
4099;; Predicated integer absolute difference, merging with an independent value.
4100(define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any"
907ea379
RS
4101 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4102 (unspec:SVE_I
9730c5cc 4103 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
907ea379
RS
4104 (minus:SVE_I
4105 (unspec:SVE_I
9730c5cc 4106 [(match_operand 5)
907ea379
RS
4107 (USMAX:SVE_I
4108 (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
4109 (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))]
9730c5cc 4110 UNSPEC_PRED_X)
907ea379 4111 (unspec:SVE_I
9730c5cc 4112 [(match_operand 6)
907ea379 4113 (<max_opp>:SVE_I
9730c5cc
RS
4114 (match_dup 2)
4115 (match_dup 3))]
4116 UNSPEC_PRED_X))
907ea379 4117 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
9730c5cc
RS
4118 UNSPEC_SEL))]
4119 "TARGET_SVE
4120 && !rtx_equal_p (operands[2], operands[4])
4121 && !rtx_equal_p (operands[3], operands[4])"
4122 "@
4123 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4124 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4125 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4126 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4127 #"
4128 "&& 1"
4129 {
4130 if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6]))
4131 operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode);
4132 else if (reload_completed
4133 && register_operand (operands[4], <MODE>mode)
4134 && !rtx_equal_p (operands[0], operands[4]))
4135 {
4136 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4137 operands[4], operands[1]));
4138 operands[4] = operands[2] = operands[0];
4139 }
4140 else
4141 FAIL;
4142 }
4143 [(set_attr "movprfx" "yes")]
4144)
4145
624d0f07
RS
4146;; -------------------------------------------------------------------------
4147;; ---- [INT] Saturating addition and subtraction
4148;; -------------------------------------------------------------------------
4149;; - SQADD
4150;; - SQSUB
4151;; - UQADD
4152;; - UQSUB
4153;; -------------------------------------------------------------------------
4154
4155;; Unpredicated saturating signed addition and subtraction.
694e6b19 4156(define_insn "@aarch64_sve_<optab><mode>"
f75cdd2c
RS
4157 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w, w")
4158 (SBINQOPS:SVE_FULL_I
4159 (match_operand:SVE_FULL_I 1 "register_operand" "0, 0, w, w, w")
4160 (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand" "vsQ, vsS, vsQ, vsS, w")))]
624d0f07
RS
4161 "TARGET_SVE"
4162 "@
4163 <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4164 <binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
4165 movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4166 movprfx\t%0, %1\;<binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
4167 <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4168 [(set_attr "movprfx" "*,*,yes,yes,*")]
4169)
4170
4171;; Unpredicated saturating unsigned addition and subtraction.
694e6b19 4172(define_insn "@aarch64_sve_<optab><mode>"
f75cdd2c
RS
4173 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, w")
4174 (UBINQOPS:SVE_FULL_I
4175 (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w")
4176 (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand" "vsa, vsa, w")))]
624d0f07
RS
4177 "TARGET_SVE"
4178 "@
4179 <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4180 movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4181 <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4182 [(set_attr "movprfx" "*,yes,*")]
4183)
4184
915d28fe
RS
4185;; -------------------------------------------------------------------------
4186;; ---- [INT] Highpart multiplication
4187;; -------------------------------------------------------------------------
4188;; Includes:
4189;; - SMULH
4190;; - UMULH
4191;; -------------------------------------------------------------------------
43cacb12 4192
11e9443f
RS
4193;; Unpredicated highpart multiplication.
4194(define_expand "<su>mul<mode>3_highpart"
7446de5a
RS
4195 [(set (match_operand:SVE_I 0 "register_operand")
4196 (unspec:SVE_I
11e9443f 4197 [(match_dup 3)
7446de5a
RS
4198 (unspec:SVE_I
4199 [(match_operand:SVE_I 1 "register_operand")
4200 (match_operand:SVE_I 2 "register_operand")]
f75cdd2c 4201 MUL_HIGHPART)]
06308276 4202 UNSPEC_PRED_X))]
11e9443f
RS
4203 "TARGET_SVE"
4204 {
16de3637 4205 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
11e9443f
RS
4206 }
4207)
4208
4209;; Predicated highpart multiplication.
624d0f07 4210(define_insn "@aarch64_pred_<optab><mode>"
7446de5a
RS
4211 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4212 (unspec:SVE_I
a08acce8 4213 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7446de5a
RS
4214 (unspec:SVE_I
4215 [(match_operand:SVE_I 2 "register_operand" "%0, w")
4216 (match_operand:SVE_I 3 "register_operand" "w, w")]
f75cdd2c 4217 MUL_HIGHPART)]
06308276 4218 UNSPEC_PRED_X))]
11e9443f 4219 "TARGET_SVE"
a08acce8
RH
4220 "@
4221 <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4222 movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4223 [(set_attr "movprfx" "*,yes")]
11e9443f
RS
4224)
4225
624d0f07
RS
4226;; Predicated highpart multiplications with merging.
4227(define_expand "@cond_<optab><mode>"
f75cdd2c
RS
4228 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4229 (unspec:SVE_FULL_I
624d0f07 4230 [(match_operand:<VPRED> 1 "register_operand")
f75cdd2c
RS
4231 (unspec:SVE_FULL_I
4232 [(match_operand:SVE_FULL_I 2 "register_operand")
4233 (match_operand:SVE_FULL_I 3 "register_operand")]
624d0f07 4234 MUL_HIGHPART)
f75cdd2c 4235 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
624d0f07
RS
4236 UNSPEC_SEL))]
4237 "TARGET_SVE"
4238{
4239 /* Only target code is aware of these operations, so we don't need
4240 to handle the fully-general case. */
4241 gcc_assert (rtx_equal_p (operands[2], operands[4])
4242 || CONSTANT_P (operands[4]));
4243})
4244
4245;; Predicated highpart multiplications, merging with the first input.
4246(define_insn "*cond_<optab><mode>_2"
f75cdd2c
RS
4247 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4248 (unspec:SVE_FULL_I
624d0f07 4249 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c
RS
4250 (unspec:SVE_FULL_I
4251 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4252 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
624d0f07
RS
4253 MUL_HIGHPART)
4254 (match_dup 2)]
4255 UNSPEC_SEL))]
4256 "TARGET_SVE"
4257 "@
4258 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4259 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4260 [(set_attr "movprfx" "*,yes")])
4261
4262;; Predicated highpart multiplications, merging with zero.
4263(define_insn "*cond_<optab><mode>_z"
f75cdd2c
RS
4264 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w")
4265 (unspec:SVE_FULL_I
624d0f07 4266 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c
RS
4267 (unspec:SVE_FULL_I
4268 [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
4269 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
624d0f07 4270 MUL_HIGHPART)
f75cdd2c 4271 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
624d0f07
RS
4272 UNSPEC_SEL))]
4273 "TARGET_SVE"
4274 "@
4275 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4276 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4277 [(set_attr "movprfx" "yes")])
4278
915d28fe
RS
4279;; -------------------------------------------------------------------------
4280;; ---- [INT] Division
4281;; -------------------------------------------------------------------------
4282;; Includes:
4283;; - SDIV
4284;; - SDIVR
4285;; - UDIV
4286;; - UDIVR
4287;; -------------------------------------------------------------------------
4288
4289;; Unpredicated integer division.
c38f7319 4290(define_expand "<optab><mode>3"
f75cdd2c
RS
4291 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4292 (unspec:SVE_FULL_SDI
c38f7319 4293 [(match_dup 3)
f75cdd2c
RS
4294 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4295 (match_operand:SVE_FULL_SDI 1 "register_operand")
4296 (match_operand:SVE_FULL_SDI 2 "register_operand"))]
06308276 4297 UNSPEC_PRED_X))]
c38f7319
RS
4298 "TARGET_SVE"
4299 {
16de3637 4300 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
c38f7319
RS
4301 }
4302)
4303
915d28fe 4304;; Integer division predicated with a PTRUE.
624d0f07 4305(define_insn "@aarch64_pred_<optab><mode>"
f75cdd2c
RS
4306 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w, ?&w")
4307 (unspec:SVE_FULL_SDI
a08acce8 4308 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
f75cdd2c
RS
4309 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4310 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w")
4311 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w"))]
06308276 4312 UNSPEC_PRED_X))]
c38f7319
RS
4313 "TARGET_SVE"
4314 "@
4315 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
a08acce8
RH
4316 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4317 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4318 [(set_attr "movprfx" "*,*,yes")]
c38f7319
RS
4319)
4320
915d28fe 4321;; Predicated integer division with merging.
624d0f07 4322(define_expand "@cond_<optab><mode>"
f75cdd2c
RS
4323 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4324 (unspec:SVE_FULL_SDI
915d28fe 4325 [(match_operand:<VPRED> 1 "register_operand")
f75cdd2c
RS
4326 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4327 (match_operand:SVE_FULL_SDI 2 "register_operand")
4328 (match_operand:SVE_FULL_SDI 3 "register_operand"))
4329 (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
915d28fe 4330 UNSPEC_SEL))]
43cacb12 4331 "TARGET_SVE"
43cacb12
RS
4332)
4333
915d28fe
RS
4334;; Predicated integer division, merging with the first input.
4335(define_insn "*cond_<optab><mode>_2"
f75cdd2c
RS
4336 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4337 (unspec:SVE_FULL_SDI
915d28fe 4338 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c
RS
4339 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4340 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w")
4341 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))
915d28fe
RS
4342 (match_dup 2)]
4343 UNSPEC_SEL))]
43cacb12 4344 "TARGET_SVE"
915d28fe
RS
4345 "@
4346 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4347 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4348 [(set_attr "movprfx" "*,yes")]
4349)
4350
4351;; Predicated integer division, merging with the second input.
4352(define_insn "*cond_<optab><mode>_3"
f75cdd2c
RS
4353 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4354 (unspec:SVE_FULL_SDI
915d28fe 4355 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c
RS
4356 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4357 (match_operand:SVE_FULL_SDI 2 "register_operand" "w, w")
4358 (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w"))
915d28fe
RS
4359 (match_dup 3)]
4360 UNSPEC_SEL))]
4361 "TARGET_SVE"
4362 "@
4363 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4364 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
4365 [(set_attr "movprfx" "*,yes")]
4366)
4367
4368;; Predicated integer division, merging with an independent value.
4369(define_insn_and_rewrite "*cond_<optab><mode>_any"
f75cdd2c
RS
4370 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4371 (unspec:SVE_FULL_SDI
915d28fe 4372 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
f75cdd2c
RS
4373 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4374 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w, w, w")
4375 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w, w, w"))
4376 (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
915d28fe
RS
4377 UNSPEC_SEL))]
4378 "TARGET_SVE
4379 && !rtx_equal_p (operands[2], operands[4])
4380 && !rtx_equal_p (operands[3], operands[4])"
4381 "@
4382 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4383 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4384 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4385 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4386 #"
4387 "&& reload_completed
4388 && register_operand (operands[4], <MODE>mode)
4389 && !rtx_equal_p (operands[0], operands[4])"
4390 {
4391 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4392 operands[4], operands[1]));
4393 operands[4] = operands[2] = operands[0];
4394 }
4395 [(set_attr "movprfx" "yes")]
43cacb12
RS
4396)
4397
915d28fe
RS
4398;; -------------------------------------------------------------------------
4399;; ---- [INT] Binary logical operations
4400;; -------------------------------------------------------------------------
4401;; Includes:
4402;; - AND
4403;; - EOR
4404;; - ORR
4405;; -------------------------------------------------------------------------
4406
4407;; Unpredicated integer binary logical operations.
43cacb12 4408(define_insn "<optab><mode>3"
6802b5ba
JR
4409 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?w, w")
4410 (LOGICAL:SVE_I
4411 (match_operand:SVE_I 1 "register_operand" "%0, w, w")
4412 (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))]
43cacb12
RS
4413 "TARGET_SVE"
4414 "@
4415 <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
5e176a61 4416 movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
43cacb12 4417 <logical>\t%0.d, %1.d, %2.d"
5e176a61 4418 [(set_attr "movprfx" "*,yes,*")]
43cacb12
RS
4419)
4420
915d28fe
RS
4421;; Merging forms are handled through SVE_INT_BINARY.
4422
4423;; -------------------------------------------------------------------------
4424;; ---- [INT] Binary logical operations (inverted second input)
4425;; -------------------------------------------------------------------------
4426;; Includes:
4427;; - BIC
4428;; -------------------------------------------------------------------------
43cacb12 4429
624d0f07
RS
4430;; Unpredicated BIC.
4431(define_expand "@aarch64_bic<mode>"
bb3ab62a
JR
4432 [(set (match_operand:SVE_I 0 "register_operand")
4433 (and:SVE_I
4434 (unspec:SVE_I
624d0f07 4435 [(match_dup 3)
bb3ab62a 4436 (not:SVE_I (match_operand:SVE_I 2 "register_operand"))]
624d0f07 4437 UNSPEC_PRED_X)
bb3ab62a 4438 (match_operand:SVE_I 1 "register_operand")))]
624d0f07
RS
4439 "TARGET_SVE"
4440 {
4441 operands[3] = CONSTM1_RTX (<VPRED>mode);
4442 }
4443)
4444
4445;; Predicated BIC.
35d6c591 4446(define_insn_and_rewrite "*bic<mode>3"
bb3ab62a
JR
4447 [(set (match_operand:SVE_I 0 "register_operand" "=w")
4448 (and:SVE_I
4449 (unspec:SVE_I
35d6c591 4450 [(match_operand 3)
bb3ab62a
JR
4451 (not:SVE_I
4452 (match_operand:SVE_I 2 "register_operand" "w"))]
06308276 4453 UNSPEC_PRED_X)
bb3ab62a 4454 (match_operand:SVE_I 1 "register_operand" "w")))]
35d6c591
RS
4455 "TARGET_SVE"
4456 "bic\t%0.d, %1.d, %2.d"
4457 "&& !CONSTANT_P (operands[3])"
4458 {
4459 operands[3] = CONSTM1_RTX (<VPRED>mode);
4460 }
43cacb12
RS
4461)
4462
624d0f07
RS
4463;; Predicated BIC with merging.
4464(define_expand "@cond_bic<mode>"
f75cdd2c
RS
4465 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4466 (unspec:SVE_FULL_I
624d0f07 4467 [(match_operand:<VPRED> 1 "register_operand")
f75cdd2c
RS
4468 (and:SVE_FULL_I
4469 (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand"))
4470 (match_operand:SVE_FULL_I 2 "register_operand"))
4471 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
624d0f07
RS
4472 UNSPEC_SEL))]
4473 "TARGET_SVE"
4474)
4475
1b187f36
RS
4476;; Predicated integer BIC, merging with the first input.
4477(define_insn "*cond_bic<mode>_2"
37426e0f
RS
4478 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4479 (unspec:SVE_I
1b187f36 4480 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
37426e0f
RS
4481 (and:SVE_I
4482 (not:SVE_I
4483 (match_operand:SVE_I 3 "register_operand" "w, w"))
4484 (match_operand:SVE_I 2 "register_operand" "0, w"))
1b187f36
RS
4485 (match_dup 2)]
4486 UNSPEC_SEL))]
4487 "TARGET_SVE"
4488 "@
4489 bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4490 movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4491 [(set_attr "movprfx" "*,yes")]
4492)
4493
4494;; Predicated integer BIC, merging with an independent value.
4495(define_insn_and_rewrite "*cond_bic<mode>_any"
37426e0f
RS
4496 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, ?&w")
4497 (unspec:SVE_I
1b187f36 4498 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
37426e0f
RS
4499 (and:SVE_I
4500 (not:SVE_I
4501 (match_operand:SVE_I 3 "register_operand" "w, w, w, w"))
4502 (match_operand:SVE_I 2 "register_operand" "0, w, w, w"))
4503 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
1b187f36
RS
4504 UNSPEC_SEL))]
4505 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4506 "@
4507 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4508 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4509 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4510 #"
4511 "&& reload_completed
4512 && register_operand (operands[4], <MODE>mode)
4513 && !rtx_equal_p (operands[0], operands[4])"
4514 {
4515 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4516 operands[4], operands[1]));
4517 operands[4] = operands[2] = operands[0];
4518 }
4519 [(set_attr "movprfx" "yes")]
4520)
4521
915d28fe 4522;; -------------------------------------------------------------------------
624d0f07 4523;; ---- [INT] Shifts (rounding towards -Inf)
915d28fe
RS
4524;; -------------------------------------------------------------------------
4525;; Includes:
4526;; - ASR
624d0f07 4527;; - ASRR
915d28fe 4528;; - LSL
624d0f07 4529;; - LSLR
915d28fe 4530;; - LSR
624d0f07 4531;; - LSRR
915d28fe 4532;; -------------------------------------------------------------------------
43cacb12 4533
915d28fe
RS
4534;; Unpredicated shift by a scalar, which expands into one of the vector
4535;; shifts below.
4536(define_expand "<ASHIFT:optab><mode>3"
b81fbfe1
RS
4537 [(set (match_operand:SVE_I 0 "register_operand")
4538 (ASHIFT:SVE_I
4539 (match_operand:SVE_I 1 "register_operand")
f75cdd2c 4540 (match_operand:<VEL> 2 "general_operand")))]
43cacb12
RS
4541 "TARGET_SVE"
4542 {
915d28fe
RS
4543 rtx amount;
4544 if (CONST_INT_P (operands[2]))
4545 {
4546 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
4547 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
4548 amount = force_reg (<MODE>mode, amount);
4549 }
4550 else
4551 {
6070e39c
AV
4552 amount = convert_to_mode (<VEL>mode, operands[2], 0);
4553 amount = expand_vector_broadcast (<MODE>mode, amount);
915d28fe
RS
4554 }
4555 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
4556 DONE;
43cacb12
RS
4557 }
4558)
4559
915d28fe 4560;; Unpredicated shift by a vector.
43cacb12 4561(define_expand "v<optab><mode>3"
b81fbfe1
RS
4562 [(set (match_operand:SVE_I 0 "register_operand")
4563 (unspec:SVE_I
43cacb12 4564 [(match_dup 3)
b81fbfe1
RS
4565 (ASHIFT:SVE_I
4566 (match_operand:SVE_I 1 "register_operand")
4567 (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
06308276 4568 UNSPEC_PRED_X))]
43cacb12
RS
4569 "TARGET_SVE"
4570 {
16de3637 4571 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
4572 }
4573)
4574
915d28fe
RS
4575;; Shift by a vector, predicated with a PTRUE. We don't actually need
4576;; the predicate for the first alternative, but using Upa or X isn't
4577;; likely to gain much and would make the instruction seem less uniform
4578;; to the register allocator.
624d0f07 4579(define_insn_and_split "@aarch64_pred_<optab><mode>"
b81fbfe1
RS
4580 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w")
4581 (unspec:SVE_I
7d1f2401 4582 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
b81fbfe1
RS
4583 (ASHIFT:SVE_I
4584 (match_operand:SVE_I 2 "register_operand" "w, 0, w, w")
4585 (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))]
06308276 4586 UNSPEC_PRED_X))]
43cacb12
RS
4587 "TARGET_SVE"
4588 "@
26004f51 4589 #
a08acce8 4590 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
7d1f2401 4591 <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype>
a08acce8 4592 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
26004f51
RS
4593 "&& reload_completed
4594 && !register_operand (operands[3], <MODE>mode)"
b81fbfe1 4595 [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
26004f51 4596 ""
7d1f2401 4597 [(set_attr "movprfx" "*,*,*,yes")]
43cacb12
RS
4598)
4599
26004f51
RS
4600;; Unpredicated shift operations by a constant (post-RA only).
4601;; These are generated by splitting a predicated instruction whose
4602;; predicate is unused.
4603(define_insn "*post_ra_v<optab><mode>3"
b81fbfe1
RS
4604 [(set (match_operand:SVE_I 0 "register_operand" "=w")
4605 (ASHIFT:SVE_I
4606 (match_operand:SVE_I 1 "register_operand" "w")
4607 (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))]
26004f51
RS
4608 "TARGET_SVE && reload_completed"
4609 "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
4610)
4611
20103c0e
RS
4612;; Predicated integer shift, merging with the first input.
4613(define_insn "*cond_<optab><mode>_2_const"
ab76e3db
RS
4614 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4615 (unspec:SVE_I
20103c0e 4616 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
ab76e3db
RS
4617 (ASHIFT:SVE_I
4618 (match_operand:SVE_I 2 "register_operand" "0, w")
4619 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
20103c0e
RS
4620 (match_dup 2)]
4621 UNSPEC_SEL))]
4622 "TARGET_SVE"
4623 "@
4624 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4625 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4626 [(set_attr "movprfx" "*,yes")]
4627)
4628
4629;; Predicated integer shift, merging with an independent value.
4630(define_insn_and_rewrite "*cond_<optab><mode>_any_const"
ab76e3db
RS
4631 [(set (match_operand:SVE_I 0 "register_operand" "=w, &w, ?&w")
4632 (unspec:SVE_I
20103c0e 4633 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
ab76e3db
RS
4634 (ASHIFT:SVE_I
4635 (match_operand:SVE_I 2 "register_operand" "w, w, w")
4636 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
4637 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
20103c0e
RS
4638 UNSPEC_SEL))]
4639 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4640 "@
4641 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4642 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4643 #"
4644 "&& reload_completed
4645 && register_operand (operands[4], <MODE>mode)
4646 && !rtx_equal_p (operands[0], operands[4])"
4647 {
4648 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4649 operands[4], operands[1]));
4650 operands[4] = operands[2] = operands[0];
4651 }
4652 [(set_attr "movprfx" "yes")]
4653)
4654
624d0f07
RS
4655;; Unpredicated shifts of narrow elements by 64-bit amounts.
4656(define_insn "@aarch64_sve_<sve_int_op><mode>"
f75cdd2c
RS
4657 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
4658 (unspec:SVE_FULL_BHSI
4659 [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")
624d0f07
RS
4660 (match_operand:VNx2DI 2 "register_operand" "w")]
4661 SVE_SHIFT_WIDE))]
4662 "TARGET_SVE"
4663 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.d"
4664)
c0c2f013 4665
624d0f07
RS
4666;; Merging predicated shifts of narrow elements by 64-bit amounts.
4667(define_expand "@cond_<sve_int_op><mode>"
f75cdd2c
RS
4668 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
4669 (unspec:SVE_FULL_BHSI
624d0f07 4670 [(match_operand:<VPRED> 1 "register_operand")
f75cdd2c
RS
4671 (unspec:SVE_FULL_BHSI
4672 [(match_operand:SVE_FULL_BHSI 2 "register_operand")
624d0f07
RS
4673 (match_operand:VNx2DI 3 "register_operand")]
4674 SVE_SHIFT_WIDE)
f75cdd2c 4675 (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")]
624d0f07 4676 UNSPEC_SEL))]
c0c2f013 4677 "TARGET_SVE"
c0c2f013
YW
4678)
4679
624d0f07
RS
4680;; Predicated shifts of narrow elements by 64-bit amounts, merging with
4681;; the first input.
4682(define_insn "*cond_<sve_int_op><mode>_m"
f75cdd2c
RS
4683 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w, ?&w")
4684 (unspec:SVE_FULL_BHSI
624d0f07 4685 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c
RS
4686 (unspec:SVE_FULL_BHSI
4687 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
624d0f07
RS
4688 (match_operand:VNx2DI 3 "register_operand" "w, w")]
4689 SVE_SHIFT_WIDE)
4690 (match_dup 2)]
4691 UNSPEC_SEL))]
4692 "TARGET_SVE"
4693 "@
4694 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4695 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4696 [(set_attr "movprfx" "*, yes")])
4697
4698;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero.
4699(define_insn "*cond_<sve_int_op><mode>_z"
f75cdd2c
RS
4700 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=&w, &w")
4701 (unspec:SVE_FULL_BHSI
624d0f07 4702 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c
RS
4703 (unspec:SVE_FULL_BHSI
4704 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
624d0f07
RS
4705 (match_operand:VNx2DI 3 "register_operand" "w, w")]
4706 SVE_SHIFT_WIDE)
f75cdd2c 4707 (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")]
624d0f07
RS
4708 UNSPEC_SEL))]
4709 "TARGET_SVE"
4710 "@
4711 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4712 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4713 [(set_attr "movprfx" "yes")])
4714
4715;; -------------------------------------------------------------------------
4716;; ---- [INT] Shifts (rounding towards 0)
4717;; -------------------------------------------------------------------------
4718;; Includes:
4719;; - ASRD
0a09a948
RS
4720;; - SQSHLU (SVE2)
4721;; - SRSHR (SVE2)
4722;; - URSHR (SVE2)
624d0f07
RS
4723;; -------------------------------------------------------------------------
4724
a958b2fc 4725;; Unpredicated ASRD.
624d0f07 4726(define_expand "sdiv_pow2<mode>3"
a958b2fc
RS
4727 [(set (match_operand:SVE_I 0 "register_operand")
4728 (unspec:SVE_I
624d0f07 4729 [(match_dup 3)
a958b2fc
RS
4730 (unspec:SVE_I
4731 [(match_operand:SVE_I 1 "register_operand")
624d0f07 4732 (match_operand 2 "aarch64_simd_rshift_imm")]
a958b2fc
RS
4733 UNSPEC_ASRD)]
4734 UNSPEC_PRED_X))]
624d0f07
RS
4735 "TARGET_SVE"
4736 {
4737 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4738 }
4739)
4740
a958b2fc
RS
4741;; Predicated ASRD.
4742(define_insn "*sdiv_pow2<mode>3"
4743 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4744 (unspec:SVE_I
4745 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4746 (unspec:SVE_I
4747 [(match_operand:SVE_I 2 "register_operand" "0, w")
4748 (match_operand:SVE_I 3 "aarch64_simd_rshift_imm")]
4749 UNSPEC_ASRD)]
4750 UNSPEC_PRED_X))]
4751 "TARGET_SVE"
4752 "@
4753 asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4754 movprfx\t%0, %2\;asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4755 [(set_attr "movprfx" "*,yes")])
4756
4757;; Predicated shift with merging.
0a09a948 4758(define_expand "@cond_<sve_int_op><mode>"
a958b2fc
RS
4759 [(set (match_operand:SVE_I 0 "register_operand")
4760 (unspec:SVE_I
624d0f07 4761 [(match_operand:<VPRED> 1 "register_operand")
a958b2fc
RS
4762 (unspec:SVE_I
4763 [(match_dup 5)
4764 (unspec:SVE_I
4765 [(match_operand:SVE_I 2 "register_operand")
4766 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
4767 SVE_INT_SHIFT_IMM)]
4768 UNSPEC_PRED_X)
4769 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
624d0f07
RS
4770 UNSPEC_SEL))]
4771 "TARGET_SVE"
a958b2fc
RS
4772 {
4773 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
4774 }
624d0f07
RS
4775)
4776
a958b2fc
RS
4777;; Predicated shift, merging with the first input.
4778(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
4779 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4780 (unspec:SVE_I
c0c2f013 4781 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
a958b2fc
RS
4782 (unspec:SVE_I
4783 [(match_operand 4)
4784 (unspec:SVE_I
4785 [(match_operand:SVE_I 2 "register_operand" "0, w")
4786 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
4787 SVE_INT_SHIFT_IMM)]
4788 UNSPEC_PRED_X)
624d0f07
RS
4789 (match_dup 2)]
4790 UNSPEC_SEL))]
4791 "TARGET_SVE"
4792 "@
0a09a948
RS
4793 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4794 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
a958b2fc
RS
4795 "&& !CONSTANT_P (operands[4])"
4796 {
4797 operands[4] = CONSTM1_RTX (<VPRED>mode);
4798 }
624d0f07
RS
4799 [(set_attr "movprfx" "*,yes")])
4800
a958b2fc
RS
4801;; Predicated shift, merging with an independent value.
4802(define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
4803 [(set (match_operand:SVE_I 0 "register_operand" "=w, &w, ?&w")
4804 (unspec:SVE_I
4805 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4806 (unspec:SVE_I
4807 [(match_operand 5)
4808 (unspec:SVE_I
4809 [(match_operand:SVE_I 2 "register_operand" "w, w, w")
4810 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
4811 SVE_INT_SHIFT_IMM)]
4812 UNSPEC_PRED_X)
4813 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
4814 UNSPEC_SEL))]
4815 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4816 "@
4817 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4818 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4819 #"
4820 "&& reload_completed
4821 && register_operand (operands[4], <MODE>mode)
4822 && !rtx_equal_p (operands[0], operands[4])"
4823 {
4824 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4825 operands[4], operands[1]));
4826 operands[4] = operands[2] = operands[0];
4827 }
4828 [(set_attr "movprfx" "yes")]
4829)
624d0f07
RS
4830
4831;; -------------------------------------------------------------------------
4832;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
4833;; -------------------------------------------------------------------------
4834;; Includes:
4835;; - FSCALE
4836;; - FTSMUL
4837;; - FTSSEL
4838;; -------------------------------------------------------------------------
4839
4840;; Unpredicated floating-point binary operations that take an integer as
4841;; their second operand.
4842(define_insn "@aarch64_sve_<optab><mode>"
f75cdd2c
RS
4843 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4844 (unspec:SVE_FULL_F
4845 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
4846 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
4847 SVE_FP_BINARY_INT))]
624d0f07
RS
4848 "TARGET_SVE"
4849 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4850)
4851
4852;; Predicated floating-point binary operations that take an integer
4853;; as their second operand.
4854(define_insn "@aarch64_pred_<optab><mode>"
f75cdd2c
RS
4855 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4856 (unspec:SVE_FULL_F
624d0f07
RS
4857 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4858 (match_operand:SI 4 "aarch64_sve_gp_strictness")
f75cdd2c 4859 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
624d0f07
RS
4860 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4861 SVE_COND_FP_BINARY_INT))]
4862 "TARGET_SVE"
4863 "@
4864 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4865 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4866 [(set_attr "movprfx" "*,yes")]
4867)
4868
4869;; Predicated floating-point binary operations with merging, taking an
4870;; integer as their second operand.
4871(define_expand "@cond_<optab><mode>"
f75cdd2c
RS
4872 [(set (match_operand:SVE_FULL_F 0 "register_operand")
4873 (unspec:SVE_FULL_F
624d0f07 4874 [(match_operand:<VPRED> 1 "register_operand")
f75cdd2c 4875 (unspec:SVE_FULL_F
624d0f07
RS
4876 [(match_dup 1)
4877 (const_int SVE_STRICT_GP)
f75cdd2c 4878 (match_operand:SVE_FULL_F 2 "register_operand")
624d0f07
RS
4879 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
4880 SVE_COND_FP_BINARY_INT)
f75cdd2c 4881 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
624d0f07 4882 UNSPEC_SEL))]
c0c2f013 4883 "TARGET_SVE"
624d0f07
RS
4884)
4885
4886;; Predicated floating-point binary operations that take an integer as their
4887;; second operand, with inactive lanes coming from the first operand.
0eb5e901 4888(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
f75cdd2c
RS
4889 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4890 (unspec:SVE_FULL_F
624d0f07 4891 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c 4892 (unspec:SVE_FULL_F
624d0f07 4893 [(match_operand 4)
0eb5e901 4894 (const_int SVE_RELAXED_GP)
f75cdd2c 4895 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
624d0f07
RS
4896 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4897 SVE_COND_FP_BINARY_INT)
4898 (match_dup 2)]
4899 UNSPEC_SEL))]
0eb5e901 4900 "TARGET_SVE"
c0c2f013 4901 "@
624d0f07
RS
4902 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4903 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4904 "&& !rtx_equal_p (operands[1], operands[4])"
4905 {
4906 operands[4] = copy_rtx (operands[1]);
4907 }
c0c2f013
YW
4908 [(set_attr "movprfx" "*,yes")]
4909)
4910
0eb5e901
RS
4911(define_insn "*cond_<optab><mode>_2_strict"
4912 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4913 (unspec:SVE_FULL_F
4914 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4915 (unspec:SVE_FULL_F
4916 [(match_dup 1)
4917 (const_int SVE_STRICT_GP)
4918 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4919 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4920 SVE_COND_FP_BINARY_INT)
4921 (match_dup 2)]
4922 UNSPEC_SEL))]
4923 "TARGET_SVE"
4924 "@
4925 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4926 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4927 [(set_attr "movprfx" "*,yes")]
4928)
4929
624d0f07
RS
4930;; Predicated floating-point binary operations that take an integer as
4931;; their second operand, with the values of inactive lanes being distinct
4932;; from the other inputs.
0eb5e901 4933(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
f75cdd2c
RS
4934 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
4935 (unspec:SVE_FULL_F
624d0f07 4936 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
f75cdd2c 4937 (unspec:SVE_FULL_F
624d0f07 4938 [(match_operand 5)
0eb5e901 4939 (const_int SVE_RELAXED_GP)
f75cdd2c 4940 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
624d0f07
RS
4941 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
4942 SVE_COND_FP_BINARY_INT)
f75cdd2c 4943 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
624d0f07 4944 UNSPEC_SEL))]
0eb5e901 4945 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
624d0f07
RS
4946 "@
4947 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4948 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4949 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4950 #"
4951 "&& 1"
4952 {
4953 if (reload_completed
4954 && register_operand (operands[4], <MODE>mode)
4955 && !rtx_equal_p (operands[0], operands[4]))
4956 {
4957 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4958 operands[4], operands[1]));
4959 operands[4] = operands[2] = operands[0];
4960 }
4961 else if (!rtx_equal_p (operands[1], operands[5]))
4962 operands[5] = copy_rtx (operands[1]);
4963 else
4964 FAIL;
4965 }
4966 [(set_attr "movprfx" "yes")]
4967)
4968
0eb5e901
RS
4969(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
4970 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
4971 (unspec:SVE_FULL_F
4972 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4973 (unspec:SVE_FULL_F
4974 [(match_dup 1)
4975 (const_int SVE_STRICT_GP)
4976 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
4977 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
4978 SVE_COND_FP_BINARY_INT)
4979 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4980 UNSPEC_SEL))]
4981 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4982 "@
4983 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4984 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4985 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4986 #"
4987 "&& reload_completed
4988 && register_operand (operands[4], <MODE>mode)
4989 && !rtx_equal_p (operands[0], operands[4])"
4990 {
4991 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4992 operands[4], operands[1]));
4993 operands[4] = operands[2] = operands[0];
4994 }
4995 [(set_attr "movprfx" "yes")]
4996)
4997
915d28fe
RS
4998;; -------------------------------------------------------------------------
4999;; ---- [FP] General binary arithmetic corresponding to rtx codes
5000;; -------------------------------------------------------------------------
5001;; Includes post-RA forms of:
5002;; - FADD
5003;; - FMUL
5004;; - FSUB
5005;; -------------------------------------------------------------------------
43cacb12 5006
915d28fe
RS
5007;; Unpredicated floating-point binary operations (post-RA only).
5008;; These are generated by splitting a predicated instruction whose
5009;; predicate is unused.
5010(define_insn "*post_ra_<sve_fp_op><mode>3"
f75cdd2c
RS
5011 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5012 (SVE_UNPRED_FP_BINARY:SVE_FULL_F
5013 (match_operand:SVE_FULL_F 1 "register_operand" "w")
5014 (match_operand:SVE_FULL_F 2 "register_operand" "w")))]
915d28fe
RS
5015 "TARGET_SVE && reload_completed"
5016 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
5017
5018;; -------------------------------------------------------------------------
5019;; ---- [FP] General binary arithmetic corresponding to unspecs
5020;; -------------------------------------------------------------------------
5021;; Includes merging forms of:
a19ba9e1 5022;; - FADD (constant forms handled in the "Addition" section)
915d28fe
RS
5023;; - FDIV
5024;; - FDIVR
624d0f07 5025;; - FMAX
a19ba9e1 5026;; - FMAXNM (including #0.0 and #1.0)
624d0f07 5027;; - FMIN
a19ba9e1
RS
5028;; - FMINNM (including #0.0 and #1.0)
5029;; - FMUL (including #0.5 and #2.0)
624d0f07
RS
5030;; - FMULX
5031;; - FRECPS
5032;; - FRSQRTS
a19ba9e1
RS
5033;; - FSUB (constant forms handled in the "Addition" section)
5034;; - FSUBR (constant forms handled in the "Subtraction" section)
915d28fe
RS
5035;; -------------------------------------------------------------------------
5036
0254ed79 5037;; Unpredicated floating-point binary operations.
624d0f07 5038(define_insn "@aarch64_sve_<optab><mode>"
f75cdd2c
RS
5039 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5040 (unspec:SVE_FULL_F
5041 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
5042 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
5043 SVE_FP_BINARY))]
624d0f07
RS
5044 "TARGET_SVE"
5045 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
5046)
5047
5048;; Unpredicated floating-point binary operations that need to be predicated
5049;; for SVE.
0254ed79 5050(define_expand "<optab><mode>3"
f75cdd2c
RS
5051 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5052 (unspec:SVE_FULL_F
0254ed79
RS
5053 [(match_dup 3)
5054 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
5055 (match_operand:SVE_FULL_F 1 "<sve_pred_fp_rhs1_operand>")
5056 (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs2_operand>")]
04f307cb 5057 SVE_COND_FP_BINARY_OPTAB))]
0254ed79
RS
5058 "TARGET_SVE"
5059 {
5060 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5061 }
5062)
5063
5064;; Predicated floating-point binary operations that have no immediate forms.
624d0f07 5065(define_insn "@aarch64_pred_<optab><mode>"
f75cdd2c
RS
5066 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
5067 (unspec:SVE_FULL_F
0254ed79
RS
5068 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5069 (match_operand:SI 4 "aarch64_sve_gp_strictness")
f75cdd2c
RS
5070 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w")
5071 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w")]
0254ed79
RS
5072 SVE_COND_FP_BINARY_REG))]
5073 "TARGET_SVE"
5074 "@
5075 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5076 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5077 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5078 [(set_attr "movprfx" "*,*,yes")]
5079)
5080
915d28fe 5081;; Predicated floating-point operations with merging.
624d0f07 5082(define_expand "@cond_<optab><mode>"
f75cdd2c
RS
5083 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5084 (unspec:SVE_FULL_F
915d28fe 5085 [(match_operand:<VPRED> 1 "register_operand")
f75cdd2c 5086 (unspec:SVE_FULL_F
6fe679cc 5087 [(match_dup 1)
c9c5a809 5088 (const_int SVE_STRICT_GP)
f75cdd2c
RS
5089 (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs1_operand>")
5090 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_operand>")]
915d28fe 5091 SVE_COND_FP_BINARY)
f75cdd2c 5092 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
915d28fe 5093 UNSPEC_SEL))]
43cacb12 5094 "TARGET_SVE"
43cacb12
RS
5095)
5096
915d28fe 5097;; Predicated floating-point operations, merging with the first input.
0eb5e901 5098(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
f75cdd2c
RS
5099 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5100 (unspec:SVE_FULL_F
57d6f4d0 5101 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c 5102 (unspec:SVE_FULL_F
c9c5a809 5103 [(match_operand 4)
0eb5e901 5104 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
5105 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5106 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
915d28fe
RS
5107 SVE_COND_FP_BINARY)
5108 (match_dup 2)]
5109 UNSPEC_SEL))]
0eb5e901 5110 "TARGET_SVE"
43cacb12 5111 "@
915d28fe
RS
5112 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5113 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
c9c5a809
RS
5114 "&& !rtx_equal_p (operands[1], operands[4])"
5115 {
5116 operands[4] = copy_rtx (operands[1]);
5117 }
915d28fe 5118 [(set_attr "movprfx" "*,yes")]
43cacb12
RS
5119)
5120
0eb5e901
RS
5121(define_insn "*cond_<optab><mode>_2_strict"
5122 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5123 (unspec:SVE_FULL_F
5124 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5125 (unspec:SVE_FULL_F
5126 [(match_dup 1)
5127 (const_int SVE_STRICT_GP)
5128 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5129 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5130 SVE_COND_FP_BINARY)
5131 (match_dup 2)]
5132 UNSPEC_SEL))]
5133 "TARGET_SVE"
5134 "@
5135 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5136 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5137 [(set_attr "movprfx" "*,yes")]
5138)
5139
a19ba9e1 5140;; Same for operations that take a 1-bit constant.
0eb5e901 5141(define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed"
f75cdd2c
RS
5142 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5143 (unspec:SVE_FULL_F
a19ba9e1 5144 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c 5145 (unspec:SVE_FULL_F
a19ba9e1 5146 [(match_operand 4)
0eb5e901 5147 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
5148 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5149 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
a19ba9e1
RS
5150 SVE_COND_FP_BINARY_I1)
5151 (match_dup 2)]
5152 UNSPEC_SEL))]
0eb5e901 5153 "TARGET_SVE"
a19ba9e1
RS
5154 "@
5155 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5156 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
5157 "&& !rtx_equal_p (operands[1], operands[4])"
5158 {
5159 operands[4] = copy_rtx (operands[1]);
5160 }
5161 [(set_attr "movprfx" "*,yes")]
5162)
5163
0eb5e901
RS
5164(define_insn "*cond_<optab><mode>_2_const_strict"
5165 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5166 (unspec:SVE_FULL_F
5167 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5168 (unspec:SVE_FULL_F
5169 [(match_dup 1)
5170 (const_int SVE_STRICT_GP)
5171 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5172 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5173 SVE_COND_FP_BINARY_I1)
5174 (match_dup 2)]
5175 UNSPEC_SEL))]
5176 "TARGET_SVE"
5177 "@
5178 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5179 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
5180 [(set_attr "movprfx" "*,yes")]
5181)
5182
915d28fe 5183;; Predicated floating-point operations, merging with the second input.
0eb5e901 5184(define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed"
f75cdd2c
RS
5185 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5186 (unspec:SVE_FULL_F
57d6f4d0 5187 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c 5188 (unspec:SVE_FULL_F
c9c5a809 5189 [(match_operand 4)
0eb5e901 5190 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
5191 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
5192 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
915d28fe
RS
5193 SVE_COND_FP_BINARY)
5194 (match_dup 3)]
5195 UNSPEC_SEL))]
0eb5e901 5196 "TARGET_SVE"
43cacb12 5197 "@
915d28fe
RS
5198 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5199 movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
c9c5a809
RS
5200 "&& !rtx_equal_p (operands[1], operands[4])"
5201 {
5202 operands[4] = copy_rtx (operands[1]);
5203 }
915d28fe 5204 [(set_attr "movprfx" "*,yes")]
cee99fa0
RS
5205)
5206
0eb5e901
RS
5207(define_insn "*cond_<optab><mode>_3_strict"
5208 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5209 (unspec:SVE_FULL_F
5210 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5211 (unspec:SVE_FULL_F
5212 [(match_dup 1)
5213 (const_int SVE_STRICT_GP)
5214 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
5215 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5216 SVE_COND_FP_BINARY)
5217 (match_dup 3)]
5218 UNSPEC_SEL))]
5219 "TARGET_SVE"
5220 "@
5221 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5222 movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
5223 [(set_attr "movprfx" "*,yes")]
5224)
5225
915d28fe 5226;; Predicated floating-point operations, merging with an independent value.
0eb5e901 5227(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
f75cdd2c
RS
5228 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
5229 (unspec:SVE_FULL_F
915d28fe 5230 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
f75cdd2c 5231 (unspec:SVE_FULL_F
c9c5a809 5232 [(match_operand 5)
0eb5e901 5233 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
5234 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
5235 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
915d28fe 5236 SVE_COND_FP_BINARY)
f75cdd2c 5237 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
915d28fe
RS
5238 UNSPEC_SEL))]
5239 "TARGET_SVE
5240 && !rtx_equal_p (operands[2], operands[4])
0eb5e901 5241 && !rtx_equal_p (operands[3], operands[4])"
cee99fa0 5242 "@
915d28fe
RS
5243 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5244 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5245 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5246 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5247 #"
c9c5a809 5248 "&& 1"
915d28fe 5249 {
c9c5a809
RS
5250 if (reload_completed
5251 && register_operand (operands[4], <MODE>mode)
5252 && !rtx_equal_p (operands[0], operands[4]))
5253 {
5254 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5255 operands[4], operands[1]));
5256 operands[4] = operands[2] = operands[0];
5257 }
5258 else if (!rtx_equal_p (operands[1], operands[5]))
5259 operands[5] = copy_rtx (operands[1]);
5260 else
5261 FAIL;
915d28fe
RS
5262 }
5263 [(set_attr "movprfx" "yes")]
cee99fa0
RS
5264)
5265
0eb5e901
RS
5266(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
5267 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
5268 (unspec:SVE_FULL_F
5269 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5270 (unspec:SVE_FULL_F
5271 [(match_dup 1)
5272 (const_int SVE_STRICT_GP)
5273 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
5274 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
5275 SVE_COND_FP_BINARY)
5276 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
5277 UNSPEC_SEL))]
5278 "TARGET_SVE
5279 && !rtx_equal_p (operands[2], operands[4])
5280 && !rtx_equal_p (operands[3], operands[4])"
5281 "@
5282 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5283 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5284 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5285 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5286 #"
5287 "&& reload_completed
5288 && register_operand (operands[4], <MODE>mode)
5289 && !rtx_equal_p (operands[0], operands[4])"
5290 {
5291 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5292 operands[4], operands[1]));
5293 operands[4] = operands[2] = operands[0];
5294 }
5295 [(set_attr "movprfx" "yes")]
5296)
5297
a19ba9e1 5298;; Same for operations that take a 1-bit constant.
0eb5e901 5299(define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed"
f75cdd2c
RS
5300 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5301 (unspec:SVE_FULL_F
a19ba9e1 5302 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
f75cdd2c 5303 (unspec:SVE_FULL_F
a19ba9e1 5304 [(match_operand 5)
0eb5e901 5305 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
5306 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
5307 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
a19ba9e1 5308 SVE_COND_FP_BINARY_I1)
f75cdd2c 5309 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
a19ba9e1 5310 UNSPEC_SEL))]
0eb5e901 5311 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
a19ba9e1
RS
5312 "@
5313 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5314 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5315 #"
5316 "&& 1"
5317 {
5318 if (reload_completed
5319 && register_operand (operands[4], <MODE>mode)
5320 && !rtx_equal_p (operands[0], operands[4]))
5321 {
5322 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5323 operands[4], operands[1]));
5324 operands[4] = operands[2] = operands[0];
5325 }
5326 else if (!rtx_equal_p (operands[1], operands[5]))
5327 operands[5] = copy_rtx (operands[1]);
5328 else
5329 FAIL;
5330 }
5331 [(set_attr "movprfx" "yes")]
5332)
5333
0eb5e901
RS
5334(define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict"
5335 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5336 (unspec:SVE_FULL_F
5337 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5338 (unspec:SVE_FULL_F
5339 [(match_dup 1)
5340 (const_int SVE_STRICT_GP)
5341 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
5342 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5343 SVE_COND_FP_BINARY_I1)
5344 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5345 UNSPEC_SEL))]
5346 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5347 "@
5348 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5349 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5350 #"
5351 "&& reload_completed
5352 && register_operand (operands[4], <MODE>mode)
5353 && !rtx_equal_p (operands[0], operands[4])"
5354 {
5355 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5356 operands[4], operands[1]));
5357 operands[4] = operands[2] = operands[0];
5358 }
5359 [(set_attr "movprfx" "yes")]
5360)
5361
915d28fe
RS
5362;; -------------------------------------------------------------------------
5363;; ---- [FP] Addition
5364;; -------------------------------------------------------------------------
5365;; Includes:
5366;; - FADD
5367;; - FSUB
5368;; -------------------------------------------------------------------------
43cacb12 5369
c9c5a809 5370;; Predicated floating-point addition.
624d0f07 5371(define_insn_and_split "@aarch64_pred_<optab><mode>"
f75cdd2c
RS
5372 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w, ?&w")
5373 (unspec:SVE_FULL_F
624d0f07
RS
5374 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl")
5375 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z, Ui1, i, i, Ui1")
f75cdd2c
RS
5376 (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, 0, w, w, w")
5377 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w, w, vsA, vsN, w")]
624d0f07 5378 SVE_COND_FP_ADD))]
cee99fa0 5379 "TARGET_SVE"
915d28fe
RS
5380 "@
5381 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5382 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5e176a61 5383 #
624d0f07 5384 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5e176a61 5385 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
624d0f07
RS
5386 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5387 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
915d28fe
RS
5388 ; Split the unpredicated form after reload, so that we don't have
5389 ; the unnecessary PTRUE.
5390 "&& reload_completed
624d0f07
RS
5391 && register_operand (operands[3], <MODE>mode)
5392 && INTVAL (operands[4]) == SVE_RELAXED_GP"
f75cdd2c 5393 [(set (match_dup 0) (plus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5e176a61 5394 ""
624d0f07 5395 [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
cee99fa0
RS
5396)
5397
a19ba9e1
RS
5398;; Predicated floating-point addition of a constant, merging with the
5399;; first input.
0eb5e901 5400(define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed"
f75cdd2c
RS
5401 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
5402 (unspec:SVE_FULL_F
a19ba9e1 5403 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
f75cdd2c 5404 (unspec:SVE_FULL_F
a19ba9e1 5405 [(match_operand 4)
0eb5e901 5406 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
5407 (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
5408 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
a19ba9e1
RS
5409 UNSPEC_COND_FADD)
5410 (match_dup 2)]
5411 UNSPEC_SEL))]
0eb5e901 5412 "TARGET_SVE"
a19ba9e1
RS
5413 "@
5414 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5415 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5416 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5417 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
5418 "&& !rtx_equal_p (operands[1], operands[4])"
5419 {
5420 operands[4] = copy_rtx (operands[1]);
5421 }
5422 [(set_attr "movprfx" "*,*,yes,yes")]
5423)
5424
0eb5e901
RS
5425(define_insn "*cond_add<mode>_2_const_strict"
5426 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
f75cdd2c 5427 (unspec:SVE_FULL_F
0eb5e901 5428 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
f75cdd2c 5429 (unspec:SVE_FULL_F
0eb5e901
RS
5430 [(match_dup 1)
5431 (const_int SVE_STRICT_GP)
5432 (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
5433 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
5434 UNSPEC_COND_FADD)
5435 (match_dup 2)]
5436 UNSPEC_SEL))]
5437 "TARGET_SVE"
5438 "@
5439 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5440 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5441 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5442 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
5443 [(set_attr "movprfx" "*,*,yes,yes")]
5444)
5445
5446;; Predicated floating-point addition of a constant, merging with an
5447;; independent value.
5448(define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed"
5449 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
5450 (unspec:SVE_FULL_F
5451 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5452 (unspec:SVE_FULL_F
5453 [(match_operand 5)
5454 (const_int SVE_RELAXED_GP)
5455 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
f75cdd2c 5456 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
a19ba9e1 5457 UNSPEC_COND_FADD)
f75cdd2c 5458 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
a19ba9e1 5459 UNSPEC_SEL))]
0eb5e901 5460 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
a19ba9e1
RS
5461 "@
5462 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5463 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5464 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5465 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5466 #
5467 #"
5468 "&& 1"
5469 {
5470 if (reload_completed
5471 && register_operand (operands[4], <MODE>mode)
5472 && !rtx_equal_p (operands[0], operands[4]))
5473 {
5474 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5475 operands[4], operands[1]));
5476 operands[4] = operands[2] = operands[0];
5477 }
5478 else if (!rtx_equal_p (operands[1], operands[5]))
5479 operands[5] = copy_rtx (operands[1]);
5480 else
5481 FAIL;
5482 }
5483 [(set_attr "movprfx" "yes")]
5484)
5485
0eb5e901
RS
5486(define_insn_and_rewrite "*cond_add<mode>_any_const_strict"
5487 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
5488 (unspec:SVE_FULL_F
5489 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5490 (unspec:SVE_FULL_F
5491 [(match_dup 1)
5492 (const_int SVE_STRICT_GP)
5493 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
5494 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
5495 UNSPEC_COND_FADD)
5496 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
5497 UNSPEC_SEL))]
5498 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5499 "@
5500 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5501 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5502 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5503 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5504 #
5505 #"
5506 "&& reload_completed
5507 && register_operand (operands[4], <MODE>mode)
5508 && !rtx_equal_p (operands[0], operands[4])"
5509 {
5510 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5511 operands[4], operands[1]));
5512 operands[4] = operands[2] = operands[0];
5513 }
5514 [(set_attr "movprfx" "yes")]
5515)
5516
a19ba9e1 5517;; Register merging forms are handled through SVE_COND_FP_BINARY.
cee99fa0 5518
624d0f07
RS
5519;; -------------------------------------------------------------------------
5520;; ---- [FP] Complex addition
5521;; -------------------------------------------------------------------------
5522;; Includes:
5523;; - FCADD
5524;; -------------------------------------------------------------------------
5525
5526;; Predicated FCADD.
5527(define_insn "@aarch64_pred_<optab><mode>"
f75cdd2c
RS
5528 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5529 (unspec:SVE_FULL_F
624d0f07
RS
5530 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5531 (match_operand:SI 4 "aarch64_sve_gp_strictness")
f75cdd2c
RS
5532 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5533 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
624d0f07
RS
5534 SVE_COND_FCADD))]
5535 "TARGET_SVE"
5536 "@
5537 fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5538 movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5539 [(set_attr "movprfx" "*,yes")]
5540)
5541
5542;; Predicated FCADD with merging.
5543(define_expand "@cond_<optab><mode>"
f75cdd2c
RS
5544 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5545 (unspec:SVE_FULL_F
624d0f07 5546 [(match_operand:<VPRED> 1 "register_operand")
f75cdd2c 5547 (unspec:SVE_FULL_F
624d0f07
RS
5548 [(match_dup 1)
5549 (const_int SVE_STRICT_GP)
f75cdd2c
RS
5550 (match_operand:SVE_FULL_F 2 "register_operand")
5551 (match_operand:SVE_FULL_F 3 "register_operand")]
624d0f07 5552 SVE_COND_FCADD)
f75cdd2c 5553 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
624d0f07
RS
5554 UNSPEC_SEL))]
5555 "TARGET_SVE"
5556)
5557
84747acf
TC
5558;; Predicated FCADD using ptrue for unpredicated optab for auto-vectorizer
5559(define_expand "@cadd<rot><mode>3"
5560 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5561 (unspec:SVE_FULL_F
5562 [(match_dup 3)
5563 (const_int SVE_RELAXED_GP)
5564 (match_operand:SVE_FULL_F 1 "register_operand")
5565 (match_operand:SVE_FULL_F 2 "register_operand")]
5566 SVE_COND_FCADD))]
5567 "TARGET_SVE"
5568{
5569 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5570})
5571
624d0f07 5572;; Predicated FCADD, merging with the first input.
0eb5e901 5573(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
f75cdd2c
RS
5574 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5575 (unspec:SVE_FULL_F
624d0f07 5576 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c 5577 (unspec:SVE_FULL_F
624d0f07 5578 [(match_operand 4)
0eb5e901 5579 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
5580 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5581 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
624d0f07
RS
5582 SVE_COND_FCADD)
5583 (match_dup 2)]
5584 UNSPEC_SEL))]
0eb5e901 5585 "TARGET_SVE"
624d0f07
RS
5586 "@
5587 fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5588 movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5589 "&& !rtx_equal_p (operands[1], operands[4])"
5590 {
5591 operands[4] = copy_rtx (operands[1]);
5592 }
5593 [(set_attr "movprfx" "*,yes")]
5594)
5595
0eb5e901
RS
5596(define_insn "*cond_<optab><mode>_2_strict"
5597 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5598 (unspec:SVE_FULL_F
5599 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5600 (unspec:SVE_FULL_F
5601 [(match_dup 1)
5602 (const_int SVE_STRICT_GP)
5603 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5604 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5605 SVE_COND_FCADD)
5606 (match_dup 2)]
5607 UNSPEC_SEL))]
5608 "TARGET_SVE"
5609 "@
5610 fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5611 movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5612 [(set_attr "movprfx" "*,yes")]
5613)
5614
624d0f07 5615;; Predicated FCADD, merging with an independent value.
0eb5e901 5616(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
f75cdd2c
RS
5617 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
5618 (unspec:SVE_FULL_F
624d0f07 5619 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
f75cdd2c 5620 (unspec:SVE_FULL_F
624d0f07 5621 [(match_operand 5)
0eb5e901 5622 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
5623 (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
5624 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
624d0f07 5625 SVE_COND_FCADD)
f75cdd2c 5626 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
624d0f07 5627 UNSPEC_SEL))]
0eb5e901 5628 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
624d0f07
RS
5629 "@
5630 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5631 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5632 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5633 #"
5634 "&& 1"
5635 {
5636 if (reload_completed
5637 && register_operand (operands[4], <MODE>mode)
5638 && !rtx_equal_p (operands[0], operands[4]))
5639 {
5640 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5641 operands[4], operands[1]));
5642 operands[4] = operands[2] = operands[0];
5643 }
5644 else if (!rtx_equal_p (operands[1], operands[5]))
5645 operands[5] = copy_rtx (operands[1]);
5646 else
5647 FAIL;
5648 }
5649 [(set_attr "movprfx" "yes")]
5650)
5651
0eb5e901
RS
5652(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
5653 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
5654 (unspec:SVE_FULL_F
5655 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5656 (unspec:SVE_FULL_F
5657 [(match_dup 1)
5658 (const_int SVE_STRICT_GP)
5659 (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
5660 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
5661 SVE_COND_FCADD)
5662 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
5663 UNSPEC_SEL))]
5664 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5665 "@
5666 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5667 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5668 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5669 #"
5670 "&& reload_completed
5671 && register_operand (operands[4], <MODE>mode)
5672 && !rtx_equal_p (operands[0], operands[4])"
5673 {
5674 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5675 operands[4], operands[1]));
5676 operands[4] = operands[2] = operands[0];
5677 }
5678 [(set_attr "movprfx" "yes")]
5679)
5680
915d28fe
RS
5681;; -------------------------------------------------------------------------
5682;; ---- [FP] Subtraction
5683;; -------------------------------------------------------------------------
5684;; Includes:
915d28fe
RS
5685;; - FSUB
5686;; - FSUBR
5687;; -------------------------------------------------------------------------
cee99fa0 5688
c9c5a809 5689;; Predicated floating-point subtraction.
624d0f07 5690(define_insn_and_split "@aarch64_pred_<optab><mode>"
f75cdd2c
RS
5691 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w")
5692 (unspec:SVE_FULL_F
624d0f07
RS
5693 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5694 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, Ui1, i, Ui1")
f75cdd2c
RS
5695 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand" "vsA, w, 0, w, vsA, w")
5696 (match_operand:SVE_FULL_F 3 "register_operand" "0, w, w, 0, w, w")]
624d0f07 5697 SVE_COND_FP_SUB))]
2ae21bd1 5698 "TARGET_SVE"
f22d7973 5699 "@
915d28fe 5700 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
2ae21bd1 5701 #
624d0f07
RS
5702 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5703 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5704 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5705 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
915d28fe
RS
5706 ; Split the unpredicated form after reload, so that we don't have
5707 ; the unnecessary PTRUE.
5708 "&& reload_completed
624d0f07
RS
5709 && register_operand (operands[2], <MODE>mode)
5710 && INTVAL (operands[4]) == SVE_RELAXED_GP"
f75cdd2c 5711 [(set (match_dup 0) (minus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
2ae21bd1 5712 ""
624d0f07 5713 [(set_attr "movprfx" "*,*,*,*,yes,yes")]
f22d7973
RS
5714)
5715
a19ba9e1
RS
5716;; Predicated floating-point subtraction from a constant, merging with the
5717;; second input.
0eb5e901 5718(define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed"
f75cdd2c
RS
5719 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5720 (unspec:SVE_FULL_F
a19ba9e1 5721 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c 5722 (unspec:SVE_FULL_F
a19ba9e1 5723 [(match_operand 4)
0eb5e901 5724 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
5725 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5726 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
a19ba9e1
RS
5727 UNSPEC_COND_FSUB)
5728 (match_dup 3)]
5729 UNSPEC_SEL))]
0eb5e901 5730 "TARGET_SVE"
a19ba9e1
RS
5731 "@
5732 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5733 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
5734 "&& !rtx_equal_p (operands[1], operands[4])"
5735 {
5736 operands[4] = copy_rtx (operands[1]);
5737 }
5738 [(set_attr "movprfx" "*,yes")]
5739)
5740
0eb5e901
RS
5741(define_insn "*cond_sub<mode>_3_const_strict"
5742 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5743 (unspec:SVE_FULL_F
5744 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5745 (unspec:SVE_FULL_F
5746 [(match_dup 1)
5747 (const_int SVE_STRICT_GP)
5748 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5749 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5750 UNSPEC_COND_FSUB)
5751 (match_dup 3)]
5752 UNSPEC_SEL))]
5753 "TARGET_SVE"
5754 "@
5755 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5756 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
5757 [(set_attr "movprfx" "*,yes")]
5758)
5759
a19ba9e1
RS
5760;; Predicated floating-point subtraction from a constant, merging with an
5761;; independent value.
0eb5e901 5762(define_insn_and_rewrite "*cond_sub<mode>_const_relaxed"
f75cdd2c
RS
5763 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5764 (unspec:SVE_FULL_F
a19ba9e1 5765 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
f75cdd2c 5766 (unspec:SVE_FULL_F
a19ba9e1 5767 [(match_operand 5)
b648814c 5768 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
5769 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5770 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")]
a19ba9e1 5771 UNSPEC_COND_FSUB)
f75cdd2c 5772 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
a19ba9e1 5773 UNSPEC_SEL))]
b648814c 5774 "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
a19ba9e1
RS
5775 "@
5776 movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5777 movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5778 #"
5779 "&& 1"
5780 {
5781 if (reload_completed
5782 && register_operand (operands[4], <MODE>mode)
5783 && !rtx_equal_p (operands[0], operands[4]))
5784 {
5785 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5786 operands[4], operands[1]));
5787 operands[4] = operands[3] = operands[0];
5788 }
5789 else if (!rtx_equal_p (operands[1], operands[5]))
5790 operands[5] = copy_rtx (operands[1]);
5791 else
5792 FAIL;
5793 }
5794 [(set_attr "movprfx" "yes")]
5795)
5796
0eb5e901 5797(define_insn_and_rewrite "*cond_sub<mode>_const_strict"
b648814c
PW
5798 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5799 (unspec:SVE_FULL_F
5800 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5801 (unspec:SVE_FULL_F
5802 [(match_dup 1)
5803 (const_int SVE_STRICT_GP)
5804 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5805 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")]
5806 UNSPEC_COND_FSUB)
5807 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5808 UNSPEC_SEL))]
5809 "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
5810 "@
5811 movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5812 movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5813 #"
5814 "&& reload_completed
5815 && register_operand (operands[4], <MODE>mode)
5816 && !rtx_equal_p (operands[0], operands[4])"
5817 {
5818 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5819 operands[4], operands[1]));
5820 operands[4] = operands[3] = operands[0];
5821 }
5822 [(set_attr "movprfx" "yes")]
5823)
a19ba9e1 5824;; Register merging forms are handled through SVE_COND_FP_BINARY.
43cacb12 5825
915d28fe
RS
5826;; -------------------------------------------------------------------------
5827;; ---- [FP] Absolute difference
5828;; -------------------------------------------------------------------------
5829;; Includes:
5830;; - FABD
5831;; -------------------------------------------------------------------------
5832
5833;; Predicated floating-point absolute difference.
624d0f07 5834(define_expand "@aarch64_pred_abd<mode>"
f75cdd2c
RS
5835 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5836 (unspec:SVE_FULL_F
624d0f07
RS
5837 [(match_operand:<VPRED> 1 "register_operand")
5838 (match_operand:SI 4 "aarch64_sve_gp_strictness")
f75cdd2c 5839 (unspec:SVE_FULL_F
624d0f07
RS
5840 [(match_dup 1)
5841 (match_dup 4)
f75cdd2c
RS
5842 (match_operand:SVE_FULL_F 2 "register_operand")
5843 (match_operand:SVE_FULL_F 3 "register_operand")]
624d0f07
RS
5844 UNSPEC_COND_FSUB)]
5845 UNSPEC_COND_FABS))]
5846 "TARGET_SVE"
5847)
5848
5849;; Predicated floating-point absolute difference.
0eb5e901 5850(define_insn_and_rewrite "*aarch64_pred_abd<mode>_relaxed"
f75cdd2c
RS
5851 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5852 (unspec:SVE_FULL_F
5e176a61 5853 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
c9c5a809 5854 (match_operand:SI 4 "aarch64_sve_gp_strictness")
f75cdd2c 5855 (unspec:SVE_FULL_F
c9c5a809 5856 [(match_operand 5)
0eb5e901 5857 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
5858 (match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
5859 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
6fe679cc
RS
5860 UNSPEC_COND_FSUB)]
5861 UNSPEC_COND_FABS))]
0eb5e901 5862 "TARGET_SVE"
5e176a61
RS
5863 "@
5864 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5865 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
c9c5a809
RS
5866 "&& !rtx_equal_p (operands[1], operands[5])"
5867 {
5868 operands[5] = copy_rtx (operands[1]);
5869 }
5e176a61 5870 [(set_attr "movprfx" "*,yes")]
43cacb12
RS
5871)
5872
0eb5e901
RS
5873(define_insn "*aarch64_pred_abd<mode>_strict"
5874 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5875 (unspec:SVE_FULL_F
5876 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5877 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5878 (unspec:SVE_FULL_F
5879 [(match_dup 1)
5880 (const_int SVE_STRICT_GP)
5881 (match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
5882 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5883 UNSPEC_COND_FSUB)]
5884 UNSPEC_COND_FABS))]
5885 "TARGET_SVE"
5886 "@
5887 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5888 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5889 [(set_attr "movprfx" "*,yes")]
5890)
5891
624d0f07 5892(define_expand "@aarch64_cond_abd<mode>"
f75cdd2c
RS
5893 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5894 (unspec:SVE_FULL_F
624d0f07 5895 [(match_operand:<VPRED> 1 "register_operand")
f75cdd2c 5896 (unspec:SVE_FULL_F
624d0f07
RS
5897 [(match_dup 1)
5898 (const_int SVE_STRICT_GP)
f75cdd2c 5899 (unspec:SVE_FULL_F
624d0f07
RS
5900 [(match_dup 1)
5901 (const_int SVE_STRICT_GP)
f75cdd2c
RS
5902 (match_operand:SVE_FULL_F 2 "register_operand")
5903 (match_operand:SVE_FULL_F 3 "register_operand")]
624d0f07
RS
5904 UNSPEC_COND_FSUB)]
5905 UNSPEC_COND_FABS)
f75cdd2c 5906 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
624d0f07
RS
5907 UNSPEC_SEL))]
5908 "TARGET_SVE"
5909{
5910 if (rtx_equal_p (operands[3], operands[4]))
5911 std::swap (operands[2], operands[3]);
5912})
5913
5914;; Predicated floating-point absolute difference, merging with the first
5915;; input.
0eb5e901 5916(define_insn_and_rewrite "*aarch64_cond_abd<mode>_2_relaxed"
f75cdd2c
RS
5917 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5918 (unspec:SVE_FULL_F
624d0f07 5919 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c 5920 (unspec:SVE_FULL_F
bf30864e 5921 [(match_operand 4)
0eb5e901 5922 (const_int SVE_RELAXED_GP)
f75cdd2c 5923 (unspec:SVE_FULL_F
0eb5e901
RS
5924 [(match_operand 5)
5925 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
5926 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5927 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
bf30864e
RS
5928 UNSPEC_COND_FSUB)]
5929 UNSPEC_COND_FABS)
5930 (match_dup 2)]
5931 UNSPEC_SEL))]
0eb5e901 5932 "TARGET_SVE"
bf30864e
RS
5933 "@
5934 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5935 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5936 "&& (!rtx_equal_p (operands[1], operands[4])
0eb5e901 5937 || !rtx_equal_p (operands[1], operands[5]))"
bf30864e
RS
5938 {
5939 operands[4] = copy_rtx (operands[1]);
0eb5e901 5940 operands[5] = copy_rtx (operands[1]);
bf30864e
RS
5941 }
5942 [(set_attr "movprfx" "*,yes")]
5943)
5944
0eb5e901
RS
5945(define_insn "*aarch64_cond_abd<mode>_2_strict"
5946 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5947 (unspec:SVE_FULL_F
5948 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5949 (unspec:SVE_FULL_F
5950 [(match_dup 1)
5951 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5952 (unspec:SVE_FULL_F
5953 [(match_dup 1)
5954 (match_operand:SI 5 "aarch64_sve_gp_strictness")
5955 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5956 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5957 UNSPEC_COND_FSUB)]
5958 UNSPEC_COND_FABS)
5959 (match_dup 2)]
5960 UNSPEC_SEL))]
5961 "TARGET_SVE"
5962 "@
5963 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5964 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5965 [(set_attr "movprfx" "*,yes")]
5966)
5967
bf30864e
RS
5968;; Predicated floating-point absolute difference, merging with the second
5969;; input.
0eb5e901 5970(define_insn_and_rewrite "*aarch64_cond_abd<mode>_3_relaxed"
f75cdd2c
RS
5971 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5972 (unspec:SVE_FULL_F
bf30864e 5973 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c 5974 (unspec:SVE_FULL_F
bf30864e 5975 [(match_operand 4)
0eb5e901 5976 (const_int SVE_RELAXED_GP)
f75cdd2c 5977 (unspec:SVE_FULL_F
0eb5e901
RS
5978 [(match_operand 5)
5979 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
5980 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
5981 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
bf30864e
RS
5982 UNSPEC_COND_FSUB)]
5983 UNSPEC_COND_FABS)
5984 (match_dup 3)]
5985 UNSPEC_SEL))]
0eb5e901 5986 "TARGET_SVE"
bf30864e
RS
5987 "@
5988 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5989 movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
5990 "&& (!rtx_equal_p (operands[1], operands[4])
0eb5e901 5991 || !rtx_equal_p (operands[1], operands[5]))"
bf30864e
RS
5992 {
5993 operands[4] = copy_rtx (operands[1]);
0eb5e901 5994 operands[5] = copy_rtx (operands[1]);
bf30864e
RS
5995 }
5996 [(set_attr "movprfx" "*,yes")]
5997)
5998
0eb5e901
RS
5999(define_insn "*aarch64_cond_abd<mode>_3_strict"
6000 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6001 (unspec:SVE_FULL_F
6002 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6003 (unspec:SVE_FULL_F
6004 [(match_dup 1)
6005 (match_operand:SI 4 "aarch64_sve_gp_strictness")
6006 (unspec:SVE_FULL_F
6007 [(match_dup 1)
6008 (match_operand:SI 5 "aarch64_sve_gp_strictness")
6009 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6010 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
6011 UNSPEC_COND_FSUB)]
6012 UNSPEC_COND_FABS)
6013 (match_dup 3)]
6014 UNSPEC_SEL))]
6015 "TARGET_SVE"
6016 "@
6017 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6018 movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
6019 [(set_attr "movprfx" "*,yes")]
6020)
6021
bf30864e
RS
6022;; Predicated floating-point absolute difference, merging with an
6023;; independent value.
0eb5e901 6024(define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_relaxed"
f75cdd2c
RS
6025 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
6026 (unspec:SVE_FULL_F
bf30864e 6027 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
f75cdd2c 6028 (unspec:SVE_FULL_F
bf30864e 6029 [(match_operand 5)
0eb5e901 6030 (const_int SVE_RELAXED_GP)
f75cdd2c 6031 (unspec:SVE_FULL_F
0eb5e901
RS
6032 [(match_operand 6)
6033 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
6034 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
6035 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
bf30864e
RS
6036 UNSPEC_COND_FSUB)]
6037 UNSPEC_COND_FABS)
f75cdd2c 6038 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
bf30864e
RS
6039 UNSPEC_SEL))]
6040 "TARGET_SVE
6041 && !rtx_equal_p (operands[2], operands[4])
0eb5e901 6042 && !rtx_equal_p (operands[3], operands[4])"
bf30864e
RS
6043 "@
6044 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6045 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6046 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6047 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6048 #"
6049 "&& 1"
6050 {
6051 if (reload_completed
0eb5e901
RS
6052 && register_operand (operands[4], <MODE>mode)
6053 && !rtx_equal_p (operands[0], operands[4]))
bf30864e
RS
6054 {
6055 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6056 operands[4], operands[1]));
6057 operands[4] = operands[3] = operands[0];
6058 }
6059 else if (!rtx_equal_p (operands[1], operands[5])
0eb5e901 6060 || !rtx_equal_p (operands[1], operands[6]))
bf30864e
RS
6061 {
6062 operands[5] = copy_rtx (operands[1]);
0eb5e901 6063 operands[6] = copy_rtx (operands[1]);
bf30864e
RS
6064 }
6065 else
6066 FAIL;
6067 }
6068 [(set_attr "movprfx" "yes")]
6069)
6070
0eb5e901
RS
6071(define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_strict"
6072 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
6073 (unspec:SVE_FULL_F
6074 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
6075 (unspec:SVE_FULL_F
6076 [(match_dup 1)
6077 (match_operand:SI 5 "aarch64_sve_gp_strictness")
6078 (unspec:SVE_FULL_F
6079 [(match_dup 1)
6080 (match_operand:SI 6 "aarch64_sve_gp_strictness")
6081 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
6082 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
6083 UNSPEC_COND_FSUB)]
6084 UNSPEC_COND_FABS)
6085 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
6086 UNSPEC_SEL))]
6087 "TARGET_SVE
6088 && !rtx_equal_p (operands[2], operands[4])
6089 && !rtx_equal_p (operands[3], operands[4])"
6090 "@
6091 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6092 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6093 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6094 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6095 #"
6096 "&& reload_completed
6097 && register_operand (operands[4], <MODE>mode)
6098 && !rtx_equal_p (operands[0], operands[4])"
6099 {
6100 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6101 operands[4], operands[1]));
6102 operands[4] = operands[3] = operands[0];
6103 }
6104 [(set_attr "movprfx" "yes")]
6105)
6106
915d28fe
RS
6107;; -------------------------------------------------------------------------
6108;; ---- [FP] Multiplication
6109;; -------------------------------------------------------------------------
6110;; Includes:
6111;; - FMUL
6112;; -------------------------------------------------------------------------
6113
c9c5a809 6114;; Predicated floating-point multiplication.
624d0f07 6115(define_insn_and_split "@aarch64_pred_<optab><mode>"
f75cdd2c
RS
6116 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, ?&w, ?&w")
6117 (unspec:SVE_FULL_F
624d0f07
RS
6118 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
6119 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, i, Ui1")
f75cdd2c
RS
6120 (match_operand:SVE_FULL_F 2 "register_operand" "%0, w, 0, w, w")
6121 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand" "vsM, w, w, vsM, w")]
624d0f07 6122 SVE_COND_FP_MUL))]
43cacb12 6123 "TARGET_SVE"
915d28fe
RS
6124 "@
6125 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5e176a61 6126 #
624d0f07
RS
6127 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6128 movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6129 movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
915d28fe
RS
6130 ; Split the unpredicated form after reload, so that we don't have
6131 ; the unnecessary PTRUE.
6132 "&& reload_completed
624d0f07
RS
6133 && register_operand (operands[3], <MODE>mode)
6134 && INTVAL (operands[4]) == SVE_RELAXED_GP"
f75cdd2c 6135 [(set (match_dup 0) (mult:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5e176a61 6136 ""
624d0f07 6137 [(set_attr "movprfx" "*,*,*,yes,yes")]
43cacb12
RS
6138)
6139
a19ba9e1
RS
6140;; Merging forms are handled through SVE_COND_FP_BINARY and
6141;; SVE_COND_FP_BINARY_I1.
915d28fe 6142
624d0f07
RS
6143;; Unpredicated multiplication by selected lanes.
6144(define_insn "@aarch64_mul_lane_<mode>"
f75cdd2c
RS
6145 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
6146 (mult:SVE_FULL_F
6147 (unspec:SVE_FULL_F
6148 [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>")
624d0f07
RS
6149 (match_operand:SI 3 "const_int_operand")]
6150 UNSPEC_SVE_LANE_SELECT)
f75cdd2c 6151 (match_operand:SVE_FULL_F 1 "register_operand" "w")))]
624d0f07
RS
6152 "TARGET_SVE"
6153 "fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
6154)
6155
04f307cb
RS
6156;; -------------------------------------------------------------------------
6157;; ---- [FP] Division
6158;; -------------------------------------------------------------------------
6159;; The patterns in this section are synthetic.
6160;; -------------------------------------------------------------------------
6161
6162(define_expand "div<mode>3"
6163 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6164 (unspec:SVE_FULL_F
6165 [(match_dup 3)
6166 (const_int SVE_RELAXED_GP)
6167 (match_operand:SVE_FULL_F 1 "nonmemory_operand")
6168 (match_operand:SVE_FULL_F 2 "register_operand")]
6169 UNSPEC_COND_FDIV))]
6170 "TARGET_SVE"
6171 {
6172 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
6173 DONE;
6174
6175 operands[1] = force_reg (<MODE>mode, operands[1]);
6176 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
6177 }
6178)
6179
6180(define_expand "@aarch64_frecpe<mode>"
6181 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6182 (unspec:SVE_FULL_F
6183 [(match_operand:SVE_FULL_F 1 "register_operand")]
6184 UNSPEC_FRECPE))]
6185 "TARGET_SVE"
6186)
6187
6188(define_expand "@aarch64_frecps<mode>"
6189 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6190 (unspec:SVE_FULL_F
6191 [(match_operand:SVE_FULL_F 1 "register_operand")
6192 (match_operand:SVE_FULL_F 2 "register_operand")]
6193 UNSPEC_FRECPS))]
6194 "TARGET_SVE"
6195)
6196
915d28fe
RS
6197;; -------------------------------------------------------------------------
6198;; ---- [FP] Binary logical operations
6199;; -------------------------------------------------------------------------
6200;; Includes
6201;; - AND
6202;; - EOR
6203;; - ORR
6204;; -------------------------------------------------------------------------
6205
6206;; Binary logical operations on floating-point modes. We avoid subregs
6207;; by providing this, but we need to use UNSPECs since rtx logical ops
6208;; aren't defined for floating-point modes.
6209(define_insn "*<optab><mode>3"
f75cdd2c
RS
6210 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
6211 (unspec:SVE_FULL_F
6212 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
6213 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
6214 LOGICALF))]
43cacb12 6215 "TARGET_SVE"
915d28fe 6216 "<logicalf_op>\t%0.d, %1.d, %2.d"
43cacb12
RS
6217)
6218
915d28fe
RS
6219;; -------------------------------------------------------------------------
6220;; ---- [FP] Sign copying
6221;; -------------------------------------------------------------------------
6222;; The patterns in this section are synthetic.
6223;; -------------------------------------------------------------------------
6224
6225(define_expand "copysign<mode>3"
f75cdd2c
RS
6226 [(match_operand:SVE_FULL_F 0 "register_operand")
6227 (match_operand:SVE_FULL_F 1 "register_operand")
6228 (match_operand:SVE_FULL_F 2 "register_operand")]
43cacb12
RS
6229 "TARGET_SVE"
6230 {
915d28fe
RS
6231 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6232 rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
6233 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6234 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
43cacb12 6235
915d28fe
RS
6236 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
6237 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6238
6239 emit_insn (gen_and<v_int_equiv>3
6240 (sign, arg2,
6241 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6242 HOST_WIDE_INT_M1U
6243 << bits)));
6244 emit_insn (gen_and<v_int_equiv>3
6245 (mant, arg1,
6246 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6247 ~(HOST_WIDE_INT_M1U
6248 << bits))));
6249 emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
6250 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6251 DONE;
43cacb12
RS
6252 }
6253)
6254
915d28fe 6255(define_expand "xorsign<mode>3"
f75cdd2c
RS
6256 [(match_operand:SVE_FULL_F 0 "register_operand")
6257 (match_operand:SVE_FULL_F 1 "register_operand")
6258 (match_operand:SVE_FULL_F 2 "register_operand")]
43cacb12
RS
6259 "TARGET_SVE"
6260 {
915d28fe
RS
6261 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6262 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6263 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
6264
6265 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
6266 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6267
6268 emit_insn (gen_and<v_int_equiv>3
6269 (sign, arg2,
6270 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6271 HOST_WIDE_INT_M1U
6272 << bits)));
6273 emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
6274 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6275 DONE;
43cacb12
RS
6276 }
6277)
6278
915d28fe
RS
6279;; -------------------------------------------------------------------------
6280;; ---- [FP] Maximum and minimum
6281;; -------------------------------------------------------------------------
6282;; Includes:
624d0f07 6283;; - FMAX
915d28fe 6284;; - FMAXNM
624d0f07 6285;; - FMIN
915d28fe
RS
6286;; - FMINNM
6287;; -------------------------------------------------------------------------
43cacb12 6288
0254ed79 6289;; Unpredicated fmax/fmin (the libm functions). The optabs for the
70613000 6290;; smax/smin rtx codes are handled in the generic section above.
6d331688 6291(define_expand "<fmaxmin><mode>3"
f75cdd2c
RS
6292 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6293 (unspec:SVE_FULL_F
43cacb12 6294 [(match_dup 3)
c9c5a809 6295 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
6296 (match_operand:SVE_FULL_F 1 "register_operand")
6297 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_maxmin_operand")]
214c42fa 6298 SVE_COND_FP_MAXMIN_PUBLIC))]
43cacb12
RS
6299 "TARGET_SVE"
6300 {
16de3637 6301 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
6302 }
6303)
6304
70613000
RS
6305;; Predicated fmax/fmin (the libm functions). The optabs for the
6306;; smax/smin rtx codes are handled in the generic section above.
6307(define_expand "cond_<fmaxmin><mode>"
6308 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6309 (unspec:SVE_FULL_F
6310 [(match_operand:<VPRED> 1 "register_operand")
6311 (unspec:SVE_FULL_F
6312 [(match_dup 1)
6313 (const_int SVE_RELAXED_GP)
6314 (match_operand:SVE_FULL_F 2 "register_operand")
6315 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")]
6316 SVE_COND_FP_MAXMIN_PUBLIC)
6317 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6318 UNSPEC_SEL))]
6319 "TARGET_SVE"
6320)
6321
214c42fa 6322;; Predicated floating-point maximum/minimum.
624d0f07 6323(define_insn "@aarch64_pred_<optab><mode>"
f75cdd2c
RS
6324 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w, ?&w")
6325 (unspec:SVE_FULL_F
75079ddf 6326 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
c9c5a809 6327 (match_operand:SI 4 "aarch64_sve_gp_strictness")
f75cdd2c
RS
6328 (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, w")
6329 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand" "vsB, w, vsB, w")]
624d0f07 6330 SVE_COND_FP_MAXMIN))]
43cacb12 6331 "TARGET_SVE"
a08acce8 6332 "@
75079ddf 6333 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
214c42fa 6334 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
75079ddf 6335 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
214c42fa 6336 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
75079ddf 6337 [(set_attr "movprfx" "*,*,yes,yes")]
43cacb12
RS
6338)
6339
a19ba9e1
RS
6340;; Merging forms are handled through SVE_COND_FP_BINARY and
6341;; SVE_COND_FP_BINARY_I1.
915d28fe
RS
6342
6343;; -------------------------------------------------------------------------
6344;; ---- [PRED] Binary logical operations
6345;; -------------------------------------------------------------------------
6346;; Includes:
6347;; - AND
6348;; - ANDS
6349;; - EOR
6350;; - EORS
6351;; - ORR
6352;; - ORRS
6353;; -------------------------------------------------------------------------
6354
6355;; Predicate AND. We can reuse one of the inputs as the GP.
2d2388f8
RS
6356;; Doubling the second operand is the preferred implementation
6357;; of the MOV alias, so we use that instead of %1/z, %1, %2.
915d28fe
RS
6358(define_insn "and<mode>3"
6359 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6360 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
6361 (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
9d4ac06e 6362 "TARGET_SVE"
2d2388f8 6363 "and\t%0.b, %1/z, %2.b, %2.b"
a08acce8 6364)
9d4ac06e 6365
915d28fe
RS
6366;; Unpredicated predicate EOR and ORR.
6367(define_expand "<optab><mode>3"
6368 [(set (match_operand:PRED_ALL 0 "register_operand")
6369 (and:PRED_ALL
6370 (LOGICAL_OR:PRED_ALL
6371 (match_operand:PRED_ALL 1 "register_operand")
6372 (match_operand:PRED_ALL 2 "register_operand"))
6373 (match_dup 3)))]
6c4fd4a9 6374 "TARGET_SVE"
915d28fe
RS
6375 {
6376 operands[3] = aarch64_ptrue_reg (<MODE>mode);
6377 }
a08acce8 6378)
6c4fd4a9 6379
915d28fe 6380;; Predicated predicate AND, EOR and ORR.
34467289 6381(define_insn "@aarch64_pred_<optab><mode>_z"
915d28fe
RS
6382 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6383 (and:PRED_ALL
6384 (LOGICAL:PRED_ALL
6385 (match_operand:PRED_ALL 2 "register_operand" "Upa")
6386 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6387 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
6388 "TARGET_SVE"
6389 "<logical>\t%0.b, %1/z, %2.b, %3.b"
6390)
6391
6392;; Perform a logical operation on operands 2 and 3, using operand 1 as
34467289
RS
6393;; the GP. Store the result in operand 0 and set the flags in the same
6394;; way as for PTEST.
915d28fe
RS
6395(define_insn "*<optab><mode>3_cc"
6396 [(set (reg:CC_NZC CC_REGNUM)
6397 (unspec:CC_NZC
34467289
RS
6398 [(match_operand:VNx16BI 1 "register_operand" "Upa")
6399 (match_operand 4)
6400 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
915d28fe
RS
6401 (and:PRED_ALL
6402 (LOGICAL:PRED_ALL
6403 (match_operand:PRED_ALL 2 "register_operand" "Upa")
6404 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
34467289
RS
6405 (match_dup 4))]
6406 UNSPEC_PTEST))
915d28fe
RS
6407 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6408 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
34467289 6409 (match_dup 4)))]
915d28fe
RS
6410 "TARGET_SVE"
6411 "<logical>s\t%0.b, %1/z, %2.b, %3.b"
6412)
6413
624d0f07
RS
6414;; Same with just the flags result.
6415(define_insn "*<optab><mode>3_ptest"
6416 [(set (reg:CC_NZC CC_REGNUM)
6417 (unspec:CC_NZC
6418 [(match_operand:VNx16BI 1 "register_operand" "Upa")
6419 (match_operand 4)
6420 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6421 (and:PRED_ALL
6422 (LOGICAL:PRED_ALL
6423 (match_operand:PRED_ALL 2 "register_operand" "Upa")
6424 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6425 (match_dup 4))]
6426 UNSPEC_PTEST))
6427 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
6428 "TARGET_SVE"
6429 "<logical>s\t%0.b, %1/z, %2.b, %3.b"
6430)
6431
915d28fe
RS
6432;; -------------------------------------------------------------------------
6433;; ---- [PRED] Binary logical operations (inverted second input)
6434;; -------------------------------------------------------------------------
6435;; Includes:
6436;; - BIC
6437;; - ORN
6438;; -------------------------------------------------------------------------
6439
6440;; Predicated predicate BIC and ORN.
624d0f07 6441(define_insn "aarch64_pred_<nlogical><mode>_z"
915d28fe
RS
6442 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6443 (and:PRED_ALL
6444 (NLOGICAL:PRED_ALL
35d6c591
RS
6445 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6446 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
915d28fe
RS
6447 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
6448 "TARGET_SVE"
35d6c591 6449 "<nlogical>\t%0.b, %1/z, %2.b, %3.b"
915d28fe
RS
6450)
6451
624d0f07
RS
6452;; Same, but set the flags as a side-effect.
6453(define_insn "*<nlogical><mode>3_cc"
6454 [(set (reg:CC_NZC CC_REGNUM)
6455 (unspec:CC_NZC
6456 [(match_operand:VNx16BI 1 "register_operand" "Upa")
6457 (match_operand 4)
6458 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6459 (and:PRED_ALL
6460 (NLOGICAL:PRED_ALL
6461 (not:PRED_ALL
6462 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6463 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6464 (match_dup 4))]
6465 UNSPEC_PTEST))
6466 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6467 (and:PRED_ALL (NLOGICAL:PRED_ALL
6468 (not:PRED_ALL (match_dup 3))
6469 (match_dup 2))
6470 (match_dup 4)))]
6471 "TARGET_SVE"
6472 "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
6473)
6474
6475;; Same with just the flags result.
6476(define_insn "*<nlogical><mode>3_ptest"
6477 [(set (reg:CC_NZC CC_REGNUM)
6478 (unspec:CC_NZC
6479 [(match_operand:VNx16BI 1 "register_operand" "Upa")
6480 (match_operand 4)
6481 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6482 (and:PRED_ALL
6483 (NLOGICAL:PRED_ALL
6484 (not:PRED_ALL
6485 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6486 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6487 (match_dup 4))]
6488 UNSPEC_PTEST))
6489 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
6490 "TARGET_SVE"
6491 "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
6492)
6493
915d28fe
RS
6494;; -------------------------------------------------------------------------
6495;; ---- [PRED] Binary logical operations (inverted result)
6496;; -------------------------------------------------------------------------
6497;; Includes:
6498;; - NAND
6499;; - NOR
6500;; -------------------------------------------------------------------------
6501
6502;; Predicated predicate NAND and NOR.
624d0f07 6503(define_insn "aarch64_pred_<logical_nn><mode>_z"
915d28fe
RS
6504 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6505 (and:PRED_ALL
6506 (NLOGICAL:PRED_ALL
6507 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6508 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
6509 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
6510 "TARGET_SVE"
6511 "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
6512)
6513
624d0f07
RS
6514;; Same, but set the flags as a side-effect.
6515(define_insn "*<logical_nn><mode>3_cc"
6516 [(set (reg:CC_NZC CC_REGNUM)
6517 (unspec:CC_NZC
6518 [(match_operand:VNx16BI 1 "register_operand" "Upa")
6519 (match_operand 4)
6520 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6521 (and:PRED_ALL
6522 (NLOGICAL:PRED_ALL
6523 (not:PRED_ALL
6524 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6525 (not:PRED_ALL
6526 (match_operand:PRED_ALL 3 "register_operand" "Upa")))
6527 (match_dup 4))]
6528 UNSPEC_PTEST))
6529 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6530 (and:PRED_ALL (NLOGICAL:PRED_ALL
6531 (not:PRED_ALL (match_dup 2))
6532 (not:PRED_ALL (match_dup 3)))
6533 (match_dup 4)))]
6534 "TARGET_SVE"
6535 "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
6536)
6537
6538;; Same with just the flags result.
6539(define_insn "*<logical_nn><mode>3_ptest"
6540 [(set (reg:CC_NZC CC_REGNUM)
6541 (unspec:CC_NZC
6542 [(match_operand:VNx16BI 1 "register_operand" "Upa")
6543 (match_operand 4)
6544 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6545 (and:PRED_ALL
6546 (NLOGICAL:PRED_ALL
6547 (not:PRED_ALL
6548 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6549 (not:PRED_ALL
6550 (match_operand:PRED_ALL 3 "register_operand" "Upa")))
6551 (match_dup 4))]
6552 UNSPEC_PTEST))
6553 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
6554 "TARGET_SVE"
6555 "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
6556)
6557
915d28fe
RS
6558;; =========================================================================
6559;; == Ternary arithmetic
6560;; =========================================================================
6561
6562;; -------------------------------------------------------------------------
6563;; ---- [INT] MLA and MAD
6564;; -------------------------------------------------------------------------
6565;; Includes:
6566;; - MAD
6567;; - MLA
6568;; -------------------------------------------------------------------------
6569
b6c3aea1
RS
6570;; Unpredicated integer addition of product.
6571(define_expand "fma<mode>4"
cf7a3353
RS
6572 [(set (match_operand:SVE_I 0 "register_operand")
6573 (plus:SVE_I
6574 (unspec:SVE_I
b6c3aea1 6575 [(match_dup 4)
cf7a3353
RS
6576 (mult:SVE_I
6577 (match_operand:SVE_I 1 "register_operand")
6578 (match_operand:SVE_I 2 "nonmemory_operand"))]
b6c3aea1 6579 UNSPEC_PRED_X)
cf7a3353 6580 (match_operand:SVE_I 3 "register_operand")))]
b6c3aea1
RS
6581 "TARGET_SVE"
6582 {
6583 if (aarch64_prepare_sve_int_fma (operands, PLUS))
6584 DONE;
6585 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6586 }
6587)
6588
915d28fe 6589;; Predicated integer addition of product.
624d0f07 6590(define_insn "@aarch64_pred_fma<mode>"
cf7a3353
RS
6591 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
6592 (plus:SVE_I
6593 (unspec:SVE_I
915d28fe 6594 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
cf7a3353
RS
6595 (mult:SVE_I
6596 (match_operand:SVE_I 2 "register_operand" "%0, w, w")
6597 (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
06308276 6598 UNSPEC_PRED_X)
cf7a3353 6599 (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
a08acce8
RH
6600 "TARGET_SVE"
6601 "@
915d28fe
RS
6602 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6603 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6604 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6605 [(set_attr "movprfx" "*,*,yes")]
a08acce8
RH
6606)
6607
b6c3aea1
RS
6608;; Predicated integer addition of product with merging.
6609(define_expand "cond_fma<mode>"
cf7a3353
RS
6610 [(set (match_operand:SVE_I 0 "register_operand")
6611 (unspec:SVE_I
b6c3aea1 6612 [(match_operand:<VPRED> 1 "register_operand")
cf7a3353
RS
6613 (plus:SVE_I
6614 (mult:SVE_I
6615 (match_operand:SVE_I 2 "register_operand")
6616 (match_operand:SVE_I 3 "general_operand"))
6617 (match_operand:SVE_I 4 "register_operand"))
6618 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
b6c3aea1
RS
6619 UNSPEC_SEL))]
6620 "TARGET_SVE"
6621 {
6622 if (aarch64_prepare_sve_cond_int_fma (operands, PLUS))
6623 DONE;
6624 /* Swap the multiplication operands if the fallback value is the
6625 second of the two. */
6626 if (rtx_equal_p (operands[3], operands[5]))
6627 std::swap (operands[2], operands[3]);
6628 }
6629)
6630
6631;; Predicated integer addition of product, merging with the first input.
6632(define_insn "*cond_fma<mode>_2"
cf7a3353
RS
6633 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
6634 (unspec:SVE_I
b6c3aea1 6635 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
cf7a3353
RS
6636 (plus:SVE_I
6637 (mult:SVE_I
6638 (match_operand:SVE_I 2 "register_operand" "0, w")
6639 (match_operand:SVE_I 3 "register_operand" "w, w"))
6640 (match_operand:SVE_I 4 "register_operand" "w, w"))
b6c3aea1
RS
6641 (match_dup 2)]
6642 UNSPEC_SEL))]
6643 "TARGET_SVE"
6644 "@
6645 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6646 movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6647 [(set_attr "movprfx" "*,yes")]
6648)
6649
6650;; Predicated integer addition of product, merging with the third input.
6651(define_insn "*cond_fma<mode>_4"
cf7a3353
RS
6652 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
6653 (unspec:SVE_I
b6c3aea1 6654 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
cf7a3353
RS
6655 (plus:SVE_I
6656 (mult:SVE_I
6657 (match_operand:SVE_I 2 "register_operand" "w, w")
6658 (match_operand:SVE_I 3 "register_operand" "w, w"))
6659 (match_operand:SVE_I 4 "register_operand" "0, w"))
b6c3aea1
RS
6660 (match_dup 4)]
6661 UNSPEC_SEL))]
6662 "TARGET_SVE"
6663 "@
6664 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6665 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6666 [(set_attr "movprfx" "*,yes")]
6667)
6668
6669;; Predicated integer addition of product, merging with an independent value.
6670(define_insn_and_rewrite "*cond_fma<mode>_any"
cf7a3353
RS
6671 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6672 (unspec:SVE_I
b6c3aea1 6673 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
cf7a3353
RS
6674 (plus:SVE_I
6675 (mult:SVE_I
6676 (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w")
6677 (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w"))
6678 (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w"))
6679 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
b6c3aea1
RS
6680 UNSPEC_SEL))]
6681 "TARGET_SVE
6682 && !rtx_equal_p (operands[2], operands[5])
6683 && !rtx_equal_p (operands[3], operands[5])
6684 && !rtx_equal_p (operands[4], operands[5])"
6685 "@
6686 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6687 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6688 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6689 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6690 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6691 #"
6692 "&& reload_completed
6693 && register_operand (operands[5], <MODE>mode)
6694 && !rtx_equal_p (operands[0], operands[5])"
6695 {
6696 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6697 operands[5], operands[1]));
6698 operands[5] = operands[4] = operands[0];
6699 }
6700 [(set_attr "movprfx" "yes")]
6701)
6702
915d28fe
RS
6703;; -------------------------------------------------------------------------
6704;; ---- [INT] MLS and MSB
6705;; -------------------------------------------------------------------------
6706;; Includes:
6707;; - MLS
6708;; - MSB
6709;; -------------------------------------------------------------------------
6710
b6c3aea1
RS
6711;; Unpredicated integer subtraction of product.
6712(define_expand "fnma<mode>4"
264a1269
RS
6713 [(set (match_operand:SVE_I 0 "register_operand")
6714 (minus:SVE_I
6715 (match_operand:SVE_I 3 "register_operand")
6716 (unspec:SVE_I
b6c3aea1 6717 [(match_dup 4)
264a1269
RS
6718 (mult:SVE_I
6719 (match_operand:SVE_I 1 "register_operand")
6720 (match_operand:SVE_I 2 "general_operand"))]
b6c3aea1
RS
6721 UNSPEC_PRED_X)))]
6722 "TARGET_SVE"
6723 {
6724 if (aarch64_prepare_sve_int_fma (operands, MINUS))
6725 DONE;
6726 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6727 }
6728)
6729
915d28fe 6730;; Predicated integer subtraction of product.
624d0f07 6731(define_insn "@aarch64_pred_fnma<mode>"
264a1269
RS
6732 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
6733 (minus:SVE_I
6734 (match_operand:SVE_I 4 "register_operand" "w, 0, w")
6735 (unspec:SVE_I
915d28fe 6736 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
264a1269
RS
6737 (mult:SVE_I
6738 (match_operand:SVE_I 2 "register_operand" "%0, w, w")
6739 (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
06308276 6740 UNSPEC_PRED_X)))]
915d28fe
RS
6741 "TARGET_SVE"
6742 "@
6743 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6744 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6745 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6746 [(set_attr "movprfx" "*,*,yes")]
6747)
6748
b6c3aea1
RS
6749;; Predicated integer subtraction of product with merging.
6750(define_expand "cond_fnma<mode>"
264a1269
RS
6751 [(set (match_operand:SVE_I 0 "register_operand")
6752 (unspec:SVE_I
b6c3aea1 6753 [(match_operand:<VPRED> 1 "register_operand")
264a1269
RS
6754 (minus:SVE_I
6755 (match_operand:SVE_I 4 "register_operand")
6756 (mult:SVE_I
6757 (match_operand:SVE_I 2 "register_operand")
6758 (match_operand:SVE_I 3 "general_operand")))
6759 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
b6c3aea1
RS
6760 UNSPEC_SEL))]
6761 "TARGET_SVE"
6762 {
6763 if (aarch64_prepare_sve_cond_int_fma (operands, MINUS))
6764 DONE;
6765 /* Swap the multiplication operands if the fallback value is the
6766 second of the two. */
6767 if (rtx_equal_p (operands[3], operands[5]))
6768 std::swap (operands[2], operands[3]);
6769 }
6770)
6771
6772;; Predicated integer subtraction of product, merging with the first input.
6773(define_insn "*cond_fnma<mode>_2"
264a1269
RS
6774 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
6775 (unspec:SVE_I
b6c3aea1 6776 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
264a1269
RS
6777 (minus:SVE_I
6778 (match_operand:SVE_I 4 "register_operand" "w, w")
6779 (mult:SVE_I
6780 (match_operand:SVE_I 2 "register_operand" "0, w")
6781 (match_operand:SVE_I 3 "register_operand" "w, w")))
b6c3aea1
RS
6782 (match_dup 2)]
6783 UNSPEC_SEL))]
6784 "TARGET_SVE"
6785 "@
6786 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6787 movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6788 [(set_attr "movprfx" "*,yes")]
6789)
6790
6791;; Predicated integer subtraction of product, merging with the third input.
6792(define_insn "*cond_fnma<mode>_4"
264a1269
RS
6793 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
6794 (unspec:SVE_I
b6c3aea1 6795 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
264a1269
RS
6796 (minus:SVE_I
6797 (match_operand:SVE_I 4 "register_operand" "0, w")
6798 (mult:SVE_I
6799 (match_operand:SVE_I 2 "register_operand" "w, w")
6800 (match_operand:SVE_I 3 "register_operand" "w, w")))
b6c3aea1
RS
6801 (match_dup 4)]
6802 UNSPEC_SEL))]
6803 "TARGET_SVE"
6804 "@
6805 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6806 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6807 [(set_attr "movprfx" "*,yes")]
6808)
6809
6810;; Predicated integer subtraction of product, merging with an
6811;; independent value.
6812(define_insn_and_rewrite "*cond_fnma<mode>_any"
264a1269
RS
6813 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6814 (unspec:SVE_I
b6c3aea1 6815 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
264a1269
RS
6816 (minus:SVE_I
6817 (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w")
6818 (mult:SVE_I
6819 (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w")
6820 (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w")))
6821 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
b6c3aea1
RS
6822 UNSPEC_SEL))]
6823 "TARGET_SVE
6824 && !rtx_equal_p (operands[2], operands[5])
6825 && !rtx_equal_p (operands[3], operands[5])
6826 && !rtx_equal_p (operands[4], operands[5])"
6827 "@
6828 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6829 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6830 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6831 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6832 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6833 #"
6834 "&& reload_completed
6835 && register_operand (operands[5], <MODE>mode)
6836 && !rtx_equal_p (operands[0], operands[5])"
6837 {
6838 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6839 operands[5], operands[1]));
6840 operands[5] = operands[4] = operands[0];
6841 }
6842 [(set_attr "movprfx" "yes")]
6843)
6844
915d28fe
RS
6845;; -------------------------------------------------------------------------
6846;; ---- [INT] Dot product
6847;; -------------------------------------------------------------------------
6848;; Includes:
6849;; - SDOT
36696774 6850;; - SUDOT (I8MM)
915d28fe 6851;; - UDOT
36696774 6852;; - USDOT (I8MM)
915d28fe
RS
6853;; -------------------------------------------------------------------------
6854
6855;; Four-element integer dot-product with accumulation.
6856(define_insn "<sur>dot_prod<vsi2qi>"
f75cdd2c
RS
6857 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6858 (plus:SVE_FULL_SDI
6859 (unspec:SVE_FULL_SDI
915d28fe
RS
6860 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6861 (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
6862 DOTPROD)
f75cdd2c 6863 (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")))]
a08acce8
RH
6864 "TARGET_SVE"
6865 "@
915d28fe
RS
6866 <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
6867 movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
a08acce8
RH
6868 [(set_attr "movprfx" "*,yes")]
6869)
6870
624d0f07
RS
6871;; Four-element integer dot-product by selected lanes with accumulation.
6872(define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
f75cdd2c
RS
6873 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6874 (plus:SVE_FULL_SDI
6875 (unspec:SVE_FULL_SDI
624d0f07
RS
6876 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6877 (unspec:<VSI2QI>
6878 [(match_operand:<VSI2QI> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
6879 (match_operand:SI 3 "const_int_operand")]
6880 UNSPEC_SVE_LANE_SELECT)]
6881 DOTPROD)
f75cdd2c 6882 (match_operand:SVE_FULL_SDI 4 "register_operand" "0, w")))]
624d0f07
RS
6883 "TARGET_SVE"
6884 "@
6885 <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]
6886 movprfx\t%0, %4\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]"
6887 [(set_attr "movprfx" "*,yes")]
6888)
6889
752045ed 6890(define_insn "@<sur>dot_prod<vsi2qi>"
36696774
RS
6891 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6892 (plus:VNx4SI_ONLY
6893 (unspec:VNx4SI_ONLY
6894 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6895 (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
6896 DOTPROD_US_ONLY)
6897 (match_operand:VNx4SI_ONLY 3 "register_operand" "0, w")))]
6898 "TARGET_SVE_I8MM"
6899 "@
6900 <sur>dot\\t%0.s, %1.b, %2.b
6901 movprfx\t%0, %3\;<sur>dot\\t%0.s, %1.b, %2.b"
6902 [(set_attr "movprfx" "*,yes")]
6903)
6904
6905(define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
6906 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6907 (plus:VNx4SI_ONLY
6908 (unspec:VNx4SI_ONLY
6909 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6910 (unspec:<VSI2QI>
6911 [(match_operand:<VSI2QI> 2 "register_operand" "y, y")
6912 (match_operand:SI 3 "const_int_operand")]
6913 UNSPEC_SVE_LANE_SELECT)]
6914 DOTPROD_I8MM)
6915 (match_operand:VNx4SI_ONLY 4 "register_operand" "0, w")))]
6916 "TARGET_SVE_I8MM"
6917 "@
6918 <sur>dot\\t%0.s, %1.b, %2.b[%3]
6919 movprfx\t%0, %4\;<sur>dot\\t%0.s, %1.b, %2.b[%3]"
6920 [(set_attr "movprfx" "*,yes")]
6921)
6922
915d28fe
RS
6923;; -------------------------------------------------------------------------
6924;; ---- [INT] Sum of absolute differences
6925;; -------------------------------------------------------------------------
6926;; The patterns in this section are synthetic.
6927;; -------------------------------------------------------------------------
6928
6929;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
6930;; operands 1 and 2. The sequence also has to perform a widening reduction of
6931;; the difference into a vector and accumulate that into operand 3 before
6932;; copying that into the result operand 0.
6933;; Perform that with a sequence of:
6934;; MOV ones.b, #1
6935;; [SU]ABD diff.b, p0/m, op1.b, op2.b
6936;; MOVPRFX op0, op3 // If necessary
6937;; UDOT op0.s, diff.b, ones.b
6938(define_expand "<sur>sad<vsi2qi>"
f75cdd2c 6939 [(use (match_operand:SVE_FULL_SDI 0 "register_operand"))
915d28fe
RS
6940 (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
6941 (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
f75cdd2c 6942 (use (match_operand:SVE_FULL_SDI 3 "register_operand"))]
915d28fe
RS
6943 "TARGET_SVE"
6944 {
6945 rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
6946 rtx diff = gen_reg_rtx (<VSI2QI>mode);
6947 emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
6948 emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
6949 DONE;
6950 }
6951)
6952
36696774
RS
6953;; -------------------------------------------------------------------------
6954;; ---- [INT] Matrix multiply-accumulate
6955;; -------------------------------------------------------------------------
6956;; Includes:
6957;; - SMMLA (I8MM)
6958;; - UMMLA (I8MM)
6959;; - USMMLA (I8MM)
6960;; -------------------------------------------------------------------------
6961
6962(define_insn "@aarch64_sve_add_<optab><vsi2qi>"
6963 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6964 (plus:VNx4SI_ONLY
6965 (unspec:VNx4SI_ONLY
6966 [(match_operand:<VSI2QI> 2 "register_operand" "w, w")
6967 (match_operand:<VSI2QI> 3 "register_operand" "w, w")]
6968 MATMUL)
6969 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
6970 "TARGET_SVE_I8MM"
6971 "@
6972 <sur>mmla\\t%0.s, %2.b, %3.b
6973 movprfx\t%0, %1\;<sur>mmla\\t%0.s, %2.b, %3.b"
6974 [(set_attr "movprfx" "*,yes")]
6975)
6976
915d28fe
RS
6977;; -------------------------------------------------------------------------
6978;; ---- [FP] General ternary arithmetic corresponding to unspecs
6979;; -------------------------------------------------------------------------
6980;; Includes merging patterns for:
6981;; - FMAD
6982;; - FMLA
6983;; - FMLS
6984;; - FMSB
6985;; - FNMAD
6986;; - FNMLA
6987;; - FNMLS
6988;; - FNMSB
6989;; -------------------------------------------------------------------------
6990
0d80d083
RS
6991;; Unpredicated floating-point ternary operations.
6992(define_expand "<optab><mode>4"
f75cdd2c
RS
6993 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6994 (unspec:SVE_FULL_F
0d80d083 6995 [(match_dup 4)
c9c5a809 6996 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
6997 (match_operand:SVE_FULL_F 1 "register_operand")
6998 (match_operand:SVE_FULL_F 2 "register_operand")
6999 (match_operand:SVE_FULL_F 3 "register_operand")]
0d80d083
RS
7000 SVE_COND_FP_TERNARY))]
7001 "TARGET_SVE"
7002 {
7003 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7004 }
7005)
7006
7007;; Predicated floating-point ternary operations.
624d0f07 7008(define_insn "@aarch64_pred_<optab><mode>"
f75cdd2c
RS
7009 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
7010 (unspec:SVE_FULL_F
0d80d083 7011 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
c9c5a809 7012 (match_operand:SI 5 "aarch64_sve_gp_strictness")
f75cdd2c
RS
7013 (match_operand:SVE_FULL_F 2 "register_operand" "%w, 0, w")
7014 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")
7015 (match_operand:SVE_FULL_F 4 "register_operand" "0, w, w")]
0d80d083
RS
7016 SVE_COND_FP_TERNARY))]
7017 "TARGET_SVE"
7018 "@
7019 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7020 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7021 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
7022 [(set_attr "movprfx" "*,*,yes")]
7023)
7024
915d28fe 7025;; Predicated floating-point ternary operations with merging.
624d0f07 7026(define_expand "@cond_<optab><mode>"
f75cdd2c
RS
7027 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7028 (unspec:SVE_FULL_F
915d28fe 7029 [(match_operand:<VPRED> 1 "register_operand")
f75cdd2c 7030 (unspec:SVE_FULL_F
0d80d083 7031 [(match_dup 1)
c9c5a809 7032 (const_int SVE_STRICT_GP)
f75cdd2c
RS
7033 (match_operand:SVE_FULL_F 2 "register_operand")
7034 (match_operand:SVE_FULL_F 3 "register_operand")
7035 (match_operand:SVE_FULL_F 4 "register_operand")]
915d28fe 7036 SVE_COND_FP_TERNARY)
f75cdd2c 7037 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
915d28fe
RS
7038 UNSPEC_SEL))]
7039 "TARGET_SVE"
7040{
7041 /* Swap the multiplication operands if the fallback value is the
7042 second of the two. */
7043 if (rtx_equal_p (operands[3], operands[5]))
7044 std::swap (operands[2], operands[3]);
7045})
7046
7047;; Predicated floating-point ternary operations, merging with the
7048;; first input.
0eb5e901 7049(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
f75cdd2c
RS
7050 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7051 (unspec:SVE_FULL_F
a08acce8 7052 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c 7053 (unspec:SVE_FULL_F
c9c5a809 7054 [(match_operand 5)
0eb5e901 7055 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
7056 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
7057 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7058 (match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
915d28fe
RS
7059 SVE_COND_FP_TERNARY)
7060 (match_dup 2)]
a08acce8 7061 UNSPEC_SEL))]
0eb5e901 7062 "TARGET_SVE"
a08acce8 7063 "@
915d28fe
RS
7064 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7065 movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
c9c5a809
RS
7066 "&& !rtx_equal_p (operands[1], operands[5])"
7067 {
7068 operands[5] = copy_rtx (operands[1]);
7069 }
a08acce8
RH
7070 [(set_attr "movprfx" "*,yes")]
7071)
7072
0eb5e901
RS
7073(define_insn "*cond_<optab><mode>_2_strict"
7074 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7075 (unspec:SVE_FULL_F
7076 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7077 (unspec:SVE_FULL_F
7078 [(match_dup 1)
7079 (const_int SVE_STRICT_GP)
7080 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
7081 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7082 (match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
7083 SVE_COND_FP_TERNARY)
7084 (match_dup 2)]
7085 UNSPEC_SEL))]
7086 "TARGET_SVE"
7087 "@
7088 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7089 movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
7090 [(set_attr "movprfx" "*,yes")]
7091)
7092
915d28fe
RS
7093;; Predicated floating-point ternary operations, merging with the
7094;; third input.
0eb5e901 7095(define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
f75cdd2c
RS
7096 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7097 (unspec:SVE_FULL_F
a08acce8 7098 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c 7099 (unspec:SVE_FULL_F
c9c5a809 7100 [(match_operand 5)
0eb5e901 7101 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
7102 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7103 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7104 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
915d28fe
RS
7105 SVE_COND_FP_TERNARY)
7106 (match_dup 4)]
a08acce8 7107 UNSPEC_SEL))]
0eb5e901 7108 "TARGET_SVE"
a08acce8 7109 "@
915d28fe
RS
7110 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7111 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
c9c5a809
RS
7112 "&& !rtx_equal_p (operands[1], operands[5])"
7113 {
7114 operands[5] = copy_rtx (operands[1]);
7115 }
a08acce8
RH
7116 [(set_attr "movprfx" "*,yes")]
7117)
7118
0eb5e901
RS
7119(define_insn "*cond_<optab><mode>_4_strict"
7120 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7121 (unspec:SVE_FULL_F
7122 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7123 (unspec:SVE_FULL_F
7124 [(match_dup 1)
7125 (const_int SVE_STRICT_GP)
7126 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7127 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7128 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
7129 SVE_COND_FP_TERNARY)
7130 (match_dup 4)]
7131 UNSPEC_SEL))]
7132 "TARGET_SVE"
7133 "@
7134 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7135 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
7136 [(set_attr "movprfx" "*,yes")]
7137)
7138
915d28fe
RS
7139;; Predicated floating-point ternary operations, merging with an
7140;; independent value.
0eb5e901 7141(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
f75cdd2c
RS
7142 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
7143 (unspec:SVE_FULL_F
432b29c1 7144 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
f75cdd2c 7145 (unspec:SVE_FULL_F
c9c5a809 7146 [(match_operand 6)
0eb5e901 7147 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
7148 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
7149 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
7150 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
915d28fe 7151 SVE_COND_FP_TERNARY)
f75cdd2c 7152 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
0d2b3bca 7153 UNSPEC_SEL))]
f4fde1b3 7154 "TARGET_SVE
915d28fe
RS
7155 && !rtx_equal_p (operands[2], operands[5])
7156 && !rtx_equal_p (operands[3], operands[5])
0eb5e901 7157 && !rtx_equal_p (operands[4], operands[5])"
32cf949c 7158 "@
915d28fe 7159 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
432b29c1
RS
7160 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7161 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7162 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
915d28fe 7163 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
32cf949c 7164 #"
c9c5a809 7165 "&& 1"
f4fde1b3 7166 {
c9c5a809
RS
7167 if (reload_completed
7168 && register_operand (operands[5], <MODE>mode)
7169 && !rtx_equal_p (operands[0], operands[5]))
7170 {
7171 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7172 operands[5], operands[1]));
7173 operands[5] = operands[4] = operands[0];
7174 }
7175 else if (!rtx_equal_p (operands[1], operands[6]))
7176 operands[6] = copy_rtx (operands[1]);
7177 else
7178 FAIL;
f4fde1b3 7179 }
32cf949c 7180 [(set_attr "movprfx" "yes")]
0d2b3bca
RS
7181)
7182
0eb5e901
RS
7183(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
7184 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
7185 (unspec:SVE_FULL_F
7186 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
7187 (unspec:SVE_FULL_F
7188 [(match_dup 1)
7189 (const_int SVE_STRICT_GP)
7190 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
7191 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
7192 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
7193 SVE_COND_FP_TERNARY)
7194 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
7195 UNSPEC_SEL))]
7196 "TARGET_SVE
7197 && !rtx_equal_p (operands[2], operands[5])
7198 && !rtx_equal_p (operands[3], operands[5])
7199 && !rtx_equal_p (operands[4], operands[5])"
7200 "@
7201 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7202 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7203 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7204 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7205 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7206 #"
7207 "&& reload_completed
7208 && register_operand (operands[5], <MODE>mode)
7209 && !rtx_equal_p (operands[0], operands[5])"
7210 {
7211 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7212 operands[5], operands[1]));
7213 operands[5] = operands[4] = operands[0];
7214 }
7215 [(set_attr "movprfx" "yes")]
7216)
7217
624d0f07
RS
7218;; Unpredicated FMLA and FMLS by selected lanes. It doesn't seem worth using
7219;; (fma ...) since target-independent code won't understand the indexing.
7220(define_insn "@aarch64_<optab>_lane_<mode>"
f75cdd2c
RS
7221 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7222 (unspec:SVE_FULL_F
7223 [(match_operand:SVE_FULL_F 1 "register_operand" "w, w")
7224 (unspec:SVE_FULL_F
7225 [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
624d0f07
RS
7226 (match_operand:SI 3 "const_int_operand")]
7227 UNSPEC_SVE_LANE_SELECT)
f75cdd2c 7228 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
624d0f07
RS
7229 SVE_FP_TERNARY_LANE))]
7230 "TARGET_SVE"
7231 "@
7232 <sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
7233 movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
7234 [(set_attr "movprfx" "*,yes")]
7235)
7236
7237;; -------------------------------------------------------------------------
7238;; ---- [FP] Complex multiply-add
7239;; -------------------------------------------------------------------------
7240;; Includes merging patterns for:
7241;; - FCMLA
7242;; -------------------------------------------------------------------------
7243
7244;; Predicated FCMLA.
7245(define_insn "@aarch64_pred_<optab><mode>"
f75cdd2c
RS
7246 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7247 (unspec:SVE_FULL_F
624d0f07
RS
7248 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7249 (match_operand:SI 5 "aarch64_sve_gp_strictness")
f75cdd2c
RS
7250 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7251 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7252 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
624d0f07
RS
7253 SVE_COND_FCMLA))]
7254 "TARGET_SVE"
7255 "@
7256 fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7257 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
7258 [(set_attr "movprfx" "*,yes")]
7259)
7260
ad260343
TC
7261;; unpredicated optab pattern for auto-vectorizer
7262;; The complex mla/mls operations always need to expand to two instructions.
7263;; The first operation does half the computation and the second does the
7264;; remainder. Because of this, expand early.
7265(define_expand "cml<fcmac1><conj_op><mode>4"
7266 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7267 (unspec:SVE_FULL_F
7268 [(match_dup 4)
7269 (match_dup 5)
7270 (match_operand:SVE_FULL_F 1 "register_operand")
7271 (match_operand:SVE_FULL_F 2 "register_operand")
7272 (match_operand:SVE_FULL_F 3 "register_operand")]
7273 FCMLA_OP))]
7274 "TARGET_SVE"
7275{
7276 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7277 operands[5] = gen_int_mode (SVE_RELAXED_GP, SImode);
7278 rtx tmp = gen_reg_rtx (<MODE>mode);
7279 emit_insn
7280 (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, operands[4],
7281 operands[3], operands[2],
7282 operands[1], operands[5]));
7283 emit_insn
7284 (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], operands[4],
7285 operands[3], operands[2],
7286 tmp, operands[5]));
7287 DONE;
7288})
7289
7290;; unpredicated optab pattern for auto-vectorizer
7291;; The complex mul operations always need to expand to two instructions.
7292;; The first operation does half the computation and the second does the
7293;; remainder. Because of this, expand early.
7294(define_expand "cmul<conj_op><mode>3"
7295 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7296 (unspec:SVE_FULL_F
7297 [(match_operand:SVE_FULL_F 1 "register_operand")
7298 (match_operand:SVE_FULL_F 2 "register_operand")]
7299 FCMUL_OP))]
7300 "TARGET_SVE"
7301{
7302 rtx pred_reg = aarch64_ptrue_reg (<VPRED>mode);
7303 rtx gp_mode = gen_int_mode (SVE_RELAXED_GP, SImode);
7304 rtx accum = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
7305 rtx tmp = gen_reg_rtx (<MODE>mode);
7306 emit_insn
7307 (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, pred_reg,
7308 operands[2], operands[1],
7309 accum, gp_mode));
7310 emit_insn
7311 (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], pred_reg,
7312 operands[2], operands[1],
7313 tmp, gp_mode));
7314 DONE;
7315})
7316
624d0f07
RS
7317;; Predicated FCMLA with merging.
7318(define_expand "@cond_<optab><mode>"
f75cdd2c
RS
7319 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7320 (unspec:SVE_FULL_F
624d0f07 7321 [(match_operand:<VPRED> 1 "register_operand")
f75cdd2c 7322 (unspec:SVE_FULL_F
624d0f07
RS
7323 [(match_dup 1)
7324 (const_int SVE_STRICT_GP)
f75cdd2c
RS
7325 (match_operand:SVE_FULL_F 2 "register_operand")
7326 (match_operand:SVE_FULL_F 3 "register_operand")
7327 (match_operand:SVE_FULL_F 4 "register_operand")]
624d0f07 7328 SVE_COND_FCMLA)
f75cdd2c 7329 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
624d0f07
RS
7330 UNSPEC_SEL))]
7331 "TARGET_SVE"
7332)
7333
7334;; Predicated FCMLA, merging with the third input.
0eb5e901 7335(define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
f75cdd2c
RS
7336 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7337 (unspec:SVE_FULL_F
624d0f07 7338 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
f75cdd2c 7339 (unspec:SVE_FULL_F
624d0f07 7340 [(match_operand 5)
0eb5e901 7341 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
7342 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7343 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7344 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
624d0f07
RS
7345 SVE_COND_FCMLA)
7346 (match_dup 4)]
7347 UNSPEC_SEL))]
0eb5e901 7348 "TARGET_SVE"
624d0f07
RS
7349 "@
7350 fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7351 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
7352 "&& !rtx_equal_p (operands[1], operands[5])"
7353 {
7354 operands[5] = copy_rtx (operands[1]);
7355 }
7356 [(set_attr "movprfx" "*,yes")]
7357)
7358
0eb5e901
RS
7359(define_insn "*cond_<optab><mode>_4_strict"
7360 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7361 (unspec:SVE_FULL_F
7362 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7363 (unspec:SVE_FULL_F
7364 [(match_dup 1)
7365 (const_int SVE_STRICT_GP)
7366 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7367 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7368 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
7369 SVE_COND_FCMLA)
7370 (match_dup 4)]
7371 UNSPEC_SEL))]
7372 "TARGET_SVE"
7373 "@
7374 fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7375 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
7376 [(set_attr "movprfx" "*,yes")]
7377)
7378
624d0f07 7379;; Predicated FCMLA, merging with an independent value.
0eb5e901 7380(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
f75cdd2c
RS
7381 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
7382 (unspec:SVE_FULL_F
624d0f07 7383 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
f75cdd2c 7384 (unspec:SVE_FULL_F
624d0f07 7385 [(match_operand 6)
0eb5e901 7386 (const_int SVE_RELAXED_GP)
f75cdd2c
RS
7387 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
7388 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
7389 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
624d0f07 7390 SVE_COND_FCMLA)
f75cdd2c 7391 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
624d0f07 7392 UNSPEC_SEL))]
0eb5e901 7393 "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
624d0f07
RS
7394 "@
7395 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7396 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7397 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7398 #"
7399 "&& 1"
7400 {
7401 if (reload_completed
7402 && register_operand (operands[5], <MODE>mode)
7403 && !rtx_equal_p (operands[0], operands[5]))
7404 {
7405 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7406 operands[5], operands[1]));
7407 operands[5] = operands[4] = operands[0];
7408 }
7409 else if (!rtx_equal_p (operands[1], operands[6]))
7410 operands[6] = copy_rtx (operands[1]);
7411 else
7412 FAIL;
7413 }
7414 [(set_attr "movprfx" "yes")]
7415)
7416
0eb5e901
RS
7417(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
7418 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
7419 (unspec:SVE_FULL_F
7420 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
7421 (unspec:SVE_FULL_F
7422 [(match_dup 1)
7423 (const_int SVE_STRICT_GP)
7424 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
7425 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
7426 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
7427 SVE_COND_FCMLA)
7428 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
7429 UNSPEC_SEL))]
7430 "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
7431 "@
7432 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7433 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7434 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7435 #"
7436 "&& reload_completed
7437 && register_operand (operands[5], <MODE>mode)
7438 && !rtx_equal_p (operands[0], operands[5])"
7439 {
7440 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7441 operands[5], operands[1]));
7442 operands[5] = operands[4] = operands[0];
7443 }
7444 [(set_attr "movprfx" "yes")]
7445)
7446
624d0f07
RS
7447;; Unpredicated FCMLA with indexing.
7448(define_insn "@aarch64_<optab>_lane_<mode>"
f75cdd2c
RS
7449 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w")
7450 (unspec:SVE_FULL_HSF
7451 [(match_operand:SVE_FULL_HSF 1 "register_operand" "w, w")
7452 (unspec:SVE_FULL_HSF
7453 [(match_operand:SVE_FULL_HSF 2 "register_operand" "<sve_lane_pair_con>, <sve_lane_pair_con>")
624d0f07
RS
7454 (match_operand:SI 3 "const_int_operand")]
7455 UNSPEC_SVE_LANE_SELECT)
f75cdd2c 7456 (match_operand:SVE_FULL_HSF 4 "register_operand" "0, w")]
624d0f07
RS
7457 FCMLA))]
7458 "TARGET_SVE"
7459 "@
7460 fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>
7461 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>"
7462 [(set_attr "movprfx" "*,yes")]
7463)
7464
7465;; -------------------------------------------------------------------------
7466;; ---- [FP] Trigonometric multiply-add
7467;; -------------------------------------------------------------------------
7468;; Includes:
7469;; - FTMAD
7470;; -------------------------------------------------------------------------
7471
7472(define_insn "@aarch64_sve_tmad<mode>"
f75cdd2c
RS
7473 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7474 (unspec:SVE_FULL_F
7475 [(match_operand:SVE_FULL_F 1 "register_operand" "0, w")
7476 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7477 (match_operand:DI 3 "const_int_operand")]
7478 UNSPEC_FTMAD))]
624d0f07
RS
7479 "TARGET_SVE"
7480 "@
7481 ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
7482 movprfx\t%0, %1\;ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3"
7483 [(set_attr "movprfx" "*,yes")]
7484)
7485
896dff99
RS
7486;; -------------------------------------------------------------------------
7487;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
7488;; -------------------------------------------------------------------------
7489;; Includes:
7490;; - BFDOT (BF16)
7491;; - BFMLALB (BF16)
7492;; - BFMLALT (BF16)
7493;; - BFMMLA (BF16)
7494;; -------------------------------------------------------------------------
7495
7496(define_insn "@aarch64_sve_<sve_fp_op>vnx4sf"
7497 [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
7498 (unspec:VNx4SF
7499 [(match_operand:VNx4SF 1 "register_operand" "0, w")
7500 (match_operand:VNx8BF 2 "register_operand" "w, w")
7501 (match_operand:VNx8BF 3 "register_operand" "w, w")]
7502 SVE_BFLOAT_TERNARY_LONG))]
7503 "TARGET_SVE_BF16"
7504 "@
7505 <sve_fp_op>\t%0.s, %2.h, %3.h
7506 movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h"
7507 [(set_attr "movprfx" "*,yes")]
7508)
7509
7510;; The immediate range is enforced before generating the instruction.
7511(define_insn "@aarch64_sve_<sve_fp_op>_lanevnx4sf"
7512 [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
7513 (unspec:VNx4SF
7514 [(match_operand:VNx4SF 1 "register_operand" "0, w")
7515 (match_operand:VNx8BF 2 "register_operand" "w, w")
7516 (match_operand:VNx8BF 3 "register_operand" "y, y")
7517 (match_operand:SI 4 "const_int_operand")]
7518 SVE_BFLOAT_TERNARY_LONG_LANE))]
7519 "TARGET_SVE_BF16"
7520 "@
7521 <sve_fp_op>\t%0.s, %2.h, %3.h[%4]
7522 movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]"
7523 [(set_attr "movprfx" "*,yes")]
7524)
7525
36696774
RS
7526;; -------------------------------------------------------------------------
7527;; ---- [FP] Matrix multiply-accumulate
7528;; -------------------------------------------------------------------------
7529;; Includes:
7530;; - FMMLA (F32MM,F64MM)
7531;; -------------------------------------------------------------------------
7532
7533;; The mode iterator enforces the target requirements.
7534(define_insn "@aarch64_sve_<sve_fp_op><mode>"
7535 [(set (match_operand:SVE_MATMULF 0 "register_operand" "=w, ?&w")
7536 (unspec:SVE_MATMULF
7537 [(match_operand:SVE_MATMULF 2 "register_operand" "w, w")
7538 (match_operand:SVE_MATMULF 3 "register_operand" "w, w")
7539 (match_operand:SVE_MATMULF 1 "register_operand" "0, w")]
7540 FMMLA))]
7541 "TARGET_SVE"
7542 "@
7543 <sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
7544 movprfx\t%0, %1\;<sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
7545 [(set_attr "movprfx" "*,yes")]
7546)
7547
915d28fe
RS
7548;; =========================================================================
7549;; == Comparisons and selects
7550;; =========================================================================
7551
7552;; -------------------------------------------------------------------------
7553;; ---- [INT,FP] Select based on predicates
7554;; -------------------------------------------------------------------------
7555;; Includes merging patterns for:
d29f7dd5 7556;; - FMOV
915d28fe
RS
7557;; - MOV
7558;; - SEL
7559;; -------------------------------------------------------------------------
7560
7561;; vcond_mask operand order: true, false, mask
7562;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
7563;; SEL operand order: mask, true, false
b1c9ec72 7564(define_expand "@vcond_mask_<mode><vpred>"
46c705e7
RS
7565 [(set (match_operand:SVE_ALL 0 "register_operand")
7566 (unspec:SVE_ALL
d29f7dd5 7567 [(match_operand:<VPRED> 3 "register_operand")
46c705e7
RS
7568 (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
7569 (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
915d28fe
RS
7570 UNSPEC_SEL))]
7571 "TARGET_SVE"
d29f7dd5
RS
7572 {
7573 if (register_operand (operands[1], <MODE>mode))
7574 operands[2] = force_reg (<MODE>mode, operands[2]);
7575 }
915d28fe
RS
7576)
7577
d29f7dd5
RS
7578;; Selects between:
7579;; - two registers
7580;; - a duplicated immediate and a register
7581;; - a duplicated immediate and zero
46c705e7
RS
7582;;
7583;; For unpacked vectors, it doesn't really matter whether SEL uses the
7584;; the container size or the element size. If SEL used the container size,
7585;; it would ignore undefined bits of the predicate but would copy the
7586;; upper (undefined) bits of each container along with the defined bits.
7587;; If SEL used the element size, it would use undefined bits of the predicate
7588;; to select between undefined elements in each input vector. Thus the only
7589;; difference is whether the undefined bits in a container always come from
7590;; the same input as the defined bits, or whether the choice can vary
7591;; independently of the defined bits.
7592;;
7593;; For the other instructions, using the element size is more natural,
7594;; so we do that for SEL as well.
d29f7dd5 7595(define_insn "*vcond_mask_<mode><vpred>"
46c705e7
RS
7596 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
7597 (unspec:SVE_ALL
d29f7dd5 7598 [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl")
46c705e7
RS
7599 (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
7600 (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
915d28fe 7601 UNSPEC_SEL))]
d29f7dd5
RS
7602 "TARGET_SVE
7603 && (!register_operand (operands[1], <MODE>mode)
7604 || register_operand (operands[2], <MODE>mode))"
7605 "@
7606 sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
7607 mov\t%0.<Vetype>, %3/m, #%I1
7608 mov\t%0.<Vetype>, %3/z, #%I1
7609 fmov\t%0.<Vetype>, %3/m, #%1
7610 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
7611 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
7612 movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1"
7613 [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
43cacb12
RS
7614)
7615
88a37c4d
RS
7616;; Optimize selects between a duplicated scalar variable and another vector,
7617;; the latter of which can be a zero constant or a variable. Treat duplicates
7618;; of GPRs as being more expensive than duplicates of FPRs, since they
7619;; involve a cross-file move.
624d0f07 7620(define_insn "@aarch64_sel_dup<mode>"
46c705e7
RS
7621 [(set (match_operand:SVE_ALL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w")
7622 (unspec:SVE_ALL
3c2707f3 7623 [(match_operand:<VPRED> 3 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
46c705e7 7624 (vec_duplicate:SVE_ALL
88a37c4d 7625 (match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w"))
46c705e7 7626 (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")]
88a37c4d
RS
7627 UNSPEC_SEL))]
7628 "TARGET_SVE"
7629 "@
7630 mov\t%0.<Vetype>, %3/m, %<vwcore>1
7631 mov\t%0.<Vetype>, %3/m, %<Vetype>1
7632 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
7633 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
7634 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
7635 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1"
7636 [(set_attr "movprfx" "*,*,yes,yes,yes,yes")]
7637)
7638
915d28fe
RS
7639;; -------------------------------------------------------------------------
7640;; ---- [INT,FP] Compare and select
7641;; -------------------------------------------------------------------------
7642;; The patterns in this section are synthetic.
7643;; -------------------------------------------------------------------------
43cacb12 7644
915d28fe
RS
7645;; Integer (signed) vcond. Don't enforce an immediate range here, since it
7646;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
46c705e7
RS
7647(define_expand "vcond<SVE_ALL:mode><SVE_I:mode>"
7648 [(set (match_operand:SVE_ALL 0 "register_operand")
7649 (if_then_else:SVE_ALL
915d28fe 7650 (match_operator 3 "comparison_operator"
46c705e7
RS
7651 [(match_operand:SVE_I 4 "register_operand")
7652 (match_operand:SVE_I 5 "nonmemory_operand")])
7653 (match_operand:SVE_ALL 1 "nonmemory_operand")
7654 (match_operand:SVE_ALL 2 "nonmemory_operand")))]
7655 "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>"
898f07b0 7656 {
46c705e7 7657 aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands);
915d28fe 7658 DONE;
898f07b0
RS
7659 }
7660)
7661
915d28fe
RS
7662;; Integer vcondu. Don't enforce an immediate range here, since it
7663;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
46c705e7
RS
7664(define_expand "vcondu<SVE_ALL:mode><SVE_I:mode>"
7665 [(set (match_operand:SVE_ALL 0 "register_operand")
7666 (if_then_else:SVE_ALL
915d28fe 7667 (match_operator 3 "comparison_operator"
46c705e7
RS
7668 [(match_operand:SVE_I 4 "register_operand")
7669 (match_operand:SVE_I 5 "nonmemory_operand")])
7670 (match_operand:SVE_ALL 1 "nonmemory_operand")
7671 (match_operand:SVE_ALL 2 "nonmemory_operand")))]
7672 "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>"
915d28fe 7673 {
46c705e7 7674 aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands);
915d28fe
RS
7675 DONE;
7676 }
898f07b0
RS
7677)
7678
915d28fe
RS
7679;; Floating-point vcond. All comparisons except FCMUO allow a zero operand;
7680;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
7681(define_expand "vcond<mode><v_fp_equiv>"
f75cdd2c
RS
7682 [(set (match_operand:SVE_FULL_HSD 0 "register_operand")
7683 (if_then_else:SVE_FULL_HSD
915d28fe
RS
7684 (match_operator 3 "comparison_operator"
7685 [(match_operand:<V_FP_EQUIV> 4 "register_operand")
7686 (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
f75cdd2c
RS
7687 (match_operand:SVE_FULL_HSD 1 "nonmemory_operand")
7688 (match_operand:SVE_FULL_HSD 2 "nonmemory_operand")))]
b781a135
RS
7689 "TARGET_SVE"
7690 {
915d28fe
RS
7691 aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
7692 DONE;
b781a135
RS
7693 }
7694)
7695
915d28fe
RS
7696;; -------------------------------------------------------------------------
7697;; ---- [INT] Comparisons
7698;; -------------------------------------------------------------------------
624d0f07 7699;; Includes:
915d28fe
RS
7700;; - CMPEQ
7701;; - CMPGE
7702;; - CMPGT
7703;; - CMPHI
7704;; - CMPHS
7705;; - CMPLE
7706;; - CMPLO
7707;; - CMPLS
7708;; - CMPLT
7709;; - CMPNE
7710;; -------------------------------------------------------------------------
b781a135 7711
915d28fe
RS
7712;; Signed integer comparisons. Don't enforce an immediate range here, since
7713;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
7714;; instead.
7715(define_expand "vec_cmp<mode><vpred>"
7716 [(parallel
7717 [(set (match_operand:<VPRED> 0 "register_operand")
7718 (match_operator:<VPRED> 1 "comparison_operator"
46c705e7
RS
7719 [(match_operand:SVE_I 2 "register_operand")
7720 (match_operand:SVE_I 3 "nonmemory_operand")]))
915d28fe 7721 (clobber (reg:CC_NZC CC_REGNUM))])]
b781a135 7722 "TARGET_SVE"
915d28fe
RS
7723 {
7724 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
7725 operands[2], operands[3]);
7726 DONE;
7727 }
b781a135
RS
7728)
7729
915d28fe
RS
7730;; Unsigned integer comparisons. Don't enforce an immediate range here, since
7731;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
7732;; instead.
7733(define_expand "vec_cmpu<mode><vpred>"
7734 [(parallel
7735 [(set (match_operand:<VPRED> 0 "register_operand")
7736 (match_operator:<VPRED> 1 "comparison_operator"
46c705e7
RS
7737 [(match_operand:SVE_I 2 "register_operand")
7738 (match_operand:SVE_I 3 "nonmemory_operand")]))
915d28fe 7739 (clobber (reg:CC_NZC CC_REGNUM))])]
43cacb12
RS
7740 "TARGET_SVE"
7741 {
915d28fe
RS
7742 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
7743 operands[2], operands[3]);
7744 DONE;
43cacb12
RS
7745 }
7746)
7747
00fa90d9 7748;; Predicated integer comparisons.
46c705e7
RS
7749;;
7750;; For unpacked vectors, only the lowpart element in each input container
7751;; has a defined value, and only the predicate bits associated with
7752;; those elements are defined. For example, when comparing two VNx2SIs:
7753;;
7754;; - The VNx2SIs can be seem as VNx2DIs in which the low halves of each
7755;; DI container store an SI element. The upper bits of each DI container
7756;; are undefined.
7757;;
7758;; - Alternatively, the VNx2SIs can be seen as VNx4SIs in which the
7759;; even elements are defined and the odd elements are undefined.
7760;;
7761;; - The associated predicate mode is VNx2BI. This means that only the
7762;; low bit in each predicate byte is defined (on input and on output).
7763;;
7764;; - We use a .s comparison to compare VNx2SIs, under the control of a
7765;; VNx2BI governing predicate, to produce a VNx2BI result. If we view
7766;; the .s operation as operating on VNx4SIs then for odd lanes:
7767;;
7768;; - the input governing predicate bit is undefined
7769;; - the SI elements being compared are undefined
7770;; - the predicate result bit is therefore undefined, but
7771;; - the predicate result bit is in the undefined part of a VNx2BI,
7772;; so its value doesn't matter anyway.
00fa90d9 7773(define_insn "@aarch64_pred_cmp<cmp_op><mode>"
915d28fe
RS
7774 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7775 (unspec:<VPRED>
7776 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
00fa90d9 7777 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
915d28fe 7778 (SVE_INT_CMP:<VPRED>
46c705e7
RS
7779 (match_operand:SVE_I 3 "register_operand" "w, w")
7780 (match_operand:SVE_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
00fa90d9 7781 UNSPEC_PRED_Z))
915d28fe 7782 (clobber (reg:CC_NZC CC_REGNUM))]
43cacb12
RS
7783 "TARGET_SVE"
7784 "@
00fa90d9
RS
7785 cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
7786 cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
43cacb12
RS
7787)
7788
00fa90d9
RS
7789;; Predicated integer comparisons in which both the flag and predicate
7790;; results are interesting.
7791(define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
915d28fe
RS
7792 [(set (reg:CC_NZC CC_REGNUM)
7793 (unspec:CC_NZC
34467289
RS
7794 [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
7795 (match_operand 4)
7796 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
915d28fe 7797 (unspec:<VPRED>
00fa90d9
RS
7798 [(match_operand 6)
7799 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
915d28fe 7800 (SVE_INT_CMP:<VPRED>
46c705e7
RS
7801 (match_operand:SVE_I 2 "register_operand" "w, w")
7802 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
00fa90d9 7803 UNSPEC_PRED_Z)]
34467289 7804 UNSPEC_PTEST))
915d28fe
RS
7805 (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7806 (unspec:<VPRED>
00fa90d9
RS
7807 [(match_dup 6)
7808 (match_dup 7)
915d28fe
RS
7809 (SVE_INT_CMP:<VPRED>
7810 (match_dup 2)
7811 (match_dup 3))]
00fa90d9
RS
7812 UNSPEC_PRED_Z))]
7813 "TARGET_SVE
7814 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
915d28fe
RS
7815 "@
7816 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
7817 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
00fa90d9
RS
7818 "&& !rtx_equal_p (operands[4], operands[6])"
7819 {
7820 operands[6] = copy_rtx (operands[4]);
7821 operands[7] = operands[5];
7822 }
43cacb12
RS
7823)
7824
00fa90d9
RS
7825;; Predicated integer comparisons in which only the flags result is
7826;; interesting.
7827(define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
915d28fe
RS
7828 [(set (reg:CC_NZC CC_REGNUM)
7829 (unspec:CC_NZC
34467289
RS
7830 [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
7831 (match_operand 4)
7832 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
915d28fe 7833 (unspec:<VPRED>
00fa90d9
RS
7834 [(match_operand 6)
7835 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
915d28fe 7836 (SVE_INT_CMP:<VPRED>
46c705e7
RS
7837 (match_operand:SVE_I 2 "register_operand" "w, w")
7838 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
00fa90d9 7839 UNSPEC_PRED_Z)]
34467289 7840 UNSPEC_PTEST))
915d28fe 7841 (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
00fa90d9
RS
7842 "TARGET_SVE
7843 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
43cacb12 7844 "@
915d28fe
RS
7845 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
7846 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
00fa90d9
RS
7847 "&& !rtx_equal_p (operands[4], operands[6])"
7848 {
7849 operands[6] = copy_rtx (operands[4]);
7850 operands[7] = operands[5];
7851 }
43cacb12
RS
7852)
7853
915d28fe
RS
7854;; Predicated integer comparisons, formed by combining a PTRUE-predicated
7855;; comparison with an AND. Split the instruction into its preferred form
00fa90d9
RS
7856;; at the earliest opportunity, in order to get rid of the redundant
7857;; operand 4.
7858(define_insn_and_split "*cmp<cmp_op><mode>_and"
915d28fe 7859 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
00fa90d9
RS
7860 (and:<VPRED>
7861 (unspec:<VPRED>
7862 [(match_operand 4)
7863 (const_int SVE_KNOWN_PTRUE)
7864 (SVE_INT_CMP:<VPRED>
46c705e7
RS
7865 (match_operand:SVE_I 2 "register_operand" "w, w")
7866 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
00fa90d9
RS
7867 UNSPEC_PRED_Z)
7868 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
915d28fe
RS
7869 (clobber (reg:CC_NZC CC_REGNUM))]
7870 "TARGET_SVE"
7871 "#"
7872 "&& 1"
7873 [(parallel
7874 [(set (match_dup 0)
00fa90d9
RS
7875 (unspec:<VPRED>
7876 [(match_dup 1)
7877 (const_int SVE_MAYBE_NOT_PTRUE)
7878 (SVE_INT_CMP:<VPRED>
7879 (match_dup 2)
7880 (match_dup 3))]
7881 UNSPEC_PRED_Z))
915d28fe 7882 (clobber (reg:CC_NZC CC_REGNUM))])]
43cacb12
RS
7883)
7884
624d0f07
RS
7885;; Predicated integer wide comparisons.
7886(define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide"
7887 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7888 (unspec:<VPRED>
7889 [(match_operand:VNx16BI 1 "register_operand" "Upl")
7890 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7891 (unspec:<VPRED>
f75cdd2c 7892 [(match_operand:SVE_FULL_BHSI 3 "register_operand" "w")
624d0f07
RS
7893 (match_operand:VNx2DI 4 "register_operand" "w")]
7894 SVE_COND_INT_CMP_WIDE)]
7895 UNSPEC_PRED_Z))
915d28fe 7896 (clobber (reg:CC_NZC CC_REGNUM))]
43cacb12 7897 "TARGET_SVE"
624d0f07 7898 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d"
43cacb12
RS
7899)
7900
624d0f07
RS
7901;; Predicated integer wide comparisons in which both the flag and
7902;; predicate results are interesting.
7903(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
915d28fe
RS
7904 [(set (reg:CC_NZC CC_REGNUM)
7905 (unspec:CC_NZC
624d0f07 7906 [(match_operand:VNx16BI 1 "register_operand" "Upl")
34467289 7907 (match_operand 4)
624d0f07
RS
7908 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7909 (unspec:<VPRED>
7910 [(match_operand:VNx16BI 6 "register_operand" "Upl")
7911 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7912 (unspec:<VPRED>
f75cdd2c 7913 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
624d0f07
RS
7914 (match_operand:VNx2DI 3 "register_operand" "w")]
7915 SVE_COND_INT_CMP_WIDE)]
7916 UNSPEC_PRED_Z)]
34467289 7917 UNSPEC_PTEST))
624d0f07
RS
7918 (set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7919 (unspec:<VPRED>
7920 [(match_dup 6)
7921 (match_dup 7)
7922 (unspec:<VPRED>
7923 [(match_dup 2)
7924 (match_dup 3)]
7925 SVE_COND_INT_CMP_WIDE)]
7926 UNSPEC_PRED_Z))]
7927 "TARGET_SVE
7928 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7929 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
7930)
7931
7932;; Predicated integer wide comparisons in which only the flags result
7933;; is interesting.
7934(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
7935 [(set (reg:CC_NZC CC_REGNUM)
7936 (unspec:CC_NZC
7937 [(match_operand:VNx16BI 1 "register_operand" "Upl")
7938 (match_operand 4)
7939 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7940 (unspec:<VPRED>
7941 [(match_operand:VNx16BI 6 "register_operand" "Upl")
7942 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7943 (unspec:<VPRED>
f75cdd2c 7944 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
624d0f07
RS
7945 (match_operand:VNx2DI 3 "register_operand" "w")]
7946 SVE_COND_INT_CMP_WIDE)]
7947 UNSPEC_PRED_Z)]
7948 UNSPEC_PTEST))
7949 (clobber (match_scratch:<VPRED> 0 "=Upa"))]
7950 "TARGET_SVE
7951 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7952 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
7953)
7954
7955;; -------------------------------------------------------------------------
7956;; ---- [INT] While tests
7957;; -------------------------------------------------------------------------
7958;; Includes:
0a09a948
RS
7959;; - WHILEGE (SVE2)
7960;; - WHILEGT (SVE2)
7961;; - WHILEHI (SVE2)
7962;; - WHILEHS (SVE2)
624d0f07
RS
7963;; - WHILELE
7964;; - WHILELO
7965;; - WHILELS
7966;; - WHILELT
bad5e58a
RS
7967;; - WHILERW (SVE2)
7968;; - WHILEWR (SVE2)
624d0f07
RS
7969;; -------------------------------------------------------------------------
7970
7971;; Set element I of the result if (cmp (plus operand1 J) operand2) is
7972;; true for all J in [0, I].
7973(define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>"
7974 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7975 (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
7976 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
7977 SVE_WHILE))
7978 (clobber (reg:CC_NZC CC_REGNUM))]
7979 "TARGET_SVE"
7980 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
7981)
7982
7983;; The WHILE instructions set the flags in the same way as a PTEST with
7984;; a PTRUE GP. Handle the case in which both results are useful. The GP
7985;; operands to the PTEST aren't needed, so we allow them to be anything.
7986(define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_cc"
7987 [(set (reg:CC_NZC CC_REGNUM)
7988 (unspec:CC_NZC
7989 [(match_operand 3)
7990 (match_operand 4)
7991 (const_int SVE_KNOWN_PTRUE)
7992 (unspec:PRED_ALL
7993 [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
7994 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
7995 SVE_WHILE)]
7996 UNSPEC_PTEST))
7997 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7998 (unspec:PRED_ALL [(match_dup 1)
7999 (match_dup 2)]
8000 SVE_WHILE))]
8001 "TARGET_SVE"
8002 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
8003 ;; Force the compiler to drop the unused predicate operand, so that we
8004 ;; don't have an unnecessary PTRUE.
8005 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
8006 {
8007 operands[3] = CONSTM1_RTX (VNx16BImode);
8008 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8009 }
8010)
8011
8012;; Same, but handle the case in which only the flags result is useful.
bad5e58a 8013(define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest"
624d0f07
RS
8014 [(set (reg:CC_NZC CC_REGNUM)
8015 (unspec:CC_NZC
8016 [(match_operand 3)
8017 (match_operand 4)
8018 (const_int SVE_KNOWN_PTRUE)
8019 (unspec:PRED_ALL
8020 [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
8021 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
8022 SVE_WHILE)]
8023 UNSPEC_PTEST))
8024 (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
8025 "TARGET_SVE"
8026 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
8027 ;; Force the compiler to drop the unused predicate operand, so that we
8028 ;; don't have an unnecessary PTRUE.
8029 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
8030 {
8031 operands[3] = CONSTM1_RTX (VNx16BImode);
8032 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8033 }
8034)
8035
915d28fe 8036;; -------------------------------------------------------------------------
42b4e87d 8037;; ---- [FP] Direct comparisons
915d28fe
RS
8038;; -------------------------------------------------------------------------
8039;; Includes:
8040;; - FCMEQ
8041;; - FCMGE
8042;; - FCMGT
8043;; - FCMLE
8044;; - FCMLT
8045;; - FCMNE
8046;; - FCMUO
8047;; -------------------------------------------------------------------------
8048
8049;; Floating-point comparisons. All comparisons except FCMUO allow a zero
8050;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
8051;; with zero.
8052(define_expand "vec_cmp<mode><vpred>"
8053 [(set (match_operand:<VPRED> 0 "register_operand")
8054 (match_operator:<VPRED> 1 "comparison_operator"
f75cdd2c
RS
8055 [(match_operand:SVE_FULL_F 2 "register_operand")
8056 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))]
43cacb12
RS
8057 "TARGET_SVE"
8058 {
915d28fe
RS
8059 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
8060 operands[2], operands[3], false);
8061 DONE;
43cacb12
RS
8062 }
8063)
8064
4a942af6 8065;; Predicated floating-point comparisons.
624d0f07 8066(define_insn "@aarch64_pred_fcm<cmp_op><mode>"
915d28fe
RS
8067 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
8068 (unspec:<VPRED>
8069 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
624d0f07 8070 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
f75cdd2c
RS
8071 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
8072 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, w")]
4a942af6 8073 SVE_COND_FP_CMP_I0))]
43cacb12
RS
8074 "TARGET_SVE"
8075 "@
624d0f07
RS
8076 fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0
8077 fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
43cacb12
RS
8078)
8079
915d28fe 8080;; Same for unordered comparisons.
624d0f07 8081(define_insn "@aarch64_pred_fcmuo<mode>"
915d28fe
RS
8082 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8083 (unspec:<VPRED>
8084 [(match_operand:<VPRED> 1 "register_operand" "Upl")
624d0f07 8085 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
f75cdd2c
RS
8086 (match_operand:SVE_FULL_F 3 "register_operand" "w")
8087 (match_operand:SVE_FULL_F 4 "register_operand" "w")]
4a942af6 8088 UNSPEC_COND_FCMUO))]
43cacb12 8089 "TARGET_SVE"
624d0f07 8090 "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
43cacb12
RS
8091)
8092
915d28fe
RS
8093;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
8094;; with another predicate P. This does not have the same trapping behavior
8095;; as predicating the comparison itself on P, but it's a legitimate fold,
8096;; since we can drop any potentially-trapping operations whose results
8097;; are not needed.
8098;;
8099;; Split the instruction into its preferred form (below) at the earliest
8100;; opportunity, in order to get rid of the redundant operand 1.
8101(define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
8102 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
8103 (and:<VPRED>
8104 (unspec:<VPRED>
8105 [(match_operand:<VPRED> 1)
4a942af6 8106 (const_int SVE_KNOWN_PTRUE)
f75cdd2c
RS
8107 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
8108 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
4a942af6 8109 SVE_COND_FP_CMP_I0)
915d28fe 8110 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
43cacb12 8111 "TARGET_SVE"
915d28fe
RS
8112 "#"
8113 "&& 1"
8114 [(set (match_dup 0)
4a942af6
RS
8115 (unspec:<VPRED>
8116 [(match_dup 4)
8117 (const_int SVE_MAYBE_NOT_PTRUE)
8118 (match_dup 2)
8119 (match_dup 3)]
8120 SVE_COND_FP_CMP_I0))]
43cacb12
RS
8121)
8122
915d28fe
RS
8123;; Same for unordered comparisons.
8124(define_insn_and_split "*fcmuo<mode>_and_combine"
8125 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8126 (and:<VPRED>
8127 (unspec:<VPRED>
8128 [(match_operand:<VPRED> 1)
4a942af6 8129 (const_int SVE_KNOWN_PTRUE)
f75cdd2c
RS
8130 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8131 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
4a942af6 8132 UNSPEC_COND_FCMUO)
915d28fe 8133 (match_operand:<VPRED> 4 "register_operand" "Upl")))]
43cacb12 8134 "TARGET_SVE"
915d28fe
RS
8135 "#"
8136 "&& 1"
8137 [(set (match_dup 0)
915d28fe 8138 (unspec:<VPRED>
4a942af6
RS
8139 [(match_dup 4)
8140 (const_int SVE_MAYBE_NOT_PTRUE)
8141 (match_dup 2)
8142 (match_dup 3)]
8143 UNSPEC_COND_FCMUO))]
43cacb12
RS
8144)
8145
e36206c9
TC
8146;; Similar to *fcm<cmp_op><mode>_and_combine, but for BIC rather than AND.
8147;; In this case, we still need a separate NOT/BIC operation, but predicating
8148;; the comparison on the BIC operand removes the need for a PTRUE.
8149(define_insn_and_split "*fcm<cmp_op><mode>_bic_combine"
8150 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8151 (and:<VPRED>
8152 (and:<VPRED>
8153 (not:<VPRED>
8154 (unspec:<VPRED>
8155 [(match_operand:<VPRED> 1)
8156 (const_int SVE_KNOWN_PTRUE)
8157 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8158 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8159 SVE_COND_FP_CMP_I0))
8160 (match_operand:<VPRED> 4 "register_operand" "Upa"))
8161 (match_dup:<VPRED> 1)))
8162 (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8163 "TARGET_SVE"
8164 "#"
8165 "&& 1"
8166 [(set (match_dup 5)
8167 (unspec:<VPRED>
8168 [(match_dup 4)
8169 (const_int SVE_MAYBE_NOT_PTRUE)
8170 (match_dup 2)
8171 (match_dup 3)]
8172 SVE_COND_FP_CMP_I0))
8173 (set (match_dup 0)
8174 (and:<VPRED>
8175 (not:<VPRED>
8176 (match_dup 5))
8177 (match_dup 4)))]
8178{
8179 if (can_create_pseudo_p ())
8180 operands[5] = gen_reg_rtx (<VPRED>mode);
8181}
8182)
8183
8184;; Make sure that we expand to a nor when the operand 4 of
8185;; *fcm<cmp_op><mode>_bic_combine is a not.
8186(define_insn_and_split "*fcm<cmp_op><mode>_nor_combine"
8187 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8188 (and:<VPRED>
8189 (and:<VPRED>
8190 (not:<VPRED>
8191 (unspec:<VPRED>
8192 [(match_operand:<VPRED> 1)
8193 (const_int SVE_KNOWN_PTRUE)
8194 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8195 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8196 SVE_COND_FP_CMP_I0))
8197 (not:<VPRED>
8198 (match_operand:<VPRED> 4 "register_operand" "Upa")))
8199 (match_dup:<VPRED> 1)))
8200 (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8201 "TARGET_SVE"
8202 "#"
8203 "&& 1"
8204 [(set (match_dup 5)
8205 (unspec:<VPRED>
8206 [(match_dup 1)
8207 (const_int SVE_KNOWN_PTRUE)
8208 (match_dup 2)
8209 (match_dup 3)]
8210 SVE_COND_FP_CMP_I0))
8211 (set (match_dup 0)
8212 (and:<VPRED>
8213 (and:<VPRED>
8214 (not:<VPRED>
8215 (match_dup 5))
8216 (not:<VPRED>
8217 (match_dup 4)))
8218 (match_dup 1)))]
8219{
8220 if (can_create_pseudo_p ())
8221 operands[5] = gen_reg_rtx (<VPRED>mode);
8222}
8223)
8224
8225(define_insn_and_split "*fcmuo<mode>_bic_combine"
8226 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8227 (and:<VPRED>
8228 (and:<VPRED>
8229 (not:<VPRED>
8230 (unspec:<VPRED>
8231 [(match_operand:<VPRED> 1)
8232 (const_int SVE_KNOWN_PTRUE)
8233 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8234 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8235 UNSPEC_COND_FCMUO))
8236 (match_operand:<VPRED> 4 "register_operand" "Upa"))
8237 (match_dup:<VPRED> 1)))
8238 (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8239 "TARGET_SVE"
8240 "#"
8241 "&& 1"
8242 [(set (match_dup 5)
8243 (unspec:<VPRED>
8244 [(match_dup 4)
8245 (const_int SVE_MAYBE_NOT_PTRUE)
8246 (match_dup 2)
8247 (match_dup 3)]
8248 UNSPEC_COND_FCMUO))
8249 (set (match_dup 0)
8250 (and:<VPRED>
8251 (not:<VPRED>
8252 (match_dup 5))
8253 (match_dup 4)))]
8254{
8255 if (can_create_pseudo_p ())
8256 operands[5] = gen_reg_rtx (<VPRED>mode);
8257}
8258)
8259
8260;; Same for unordered comparisons.
8261(define_insn_and_split "*fcmuo<mode>_nor_combine"
8262 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8263 (and:<VPRED>
8264 (and:<VPRED>
8265 (not:<VPRED>
8266 (unspec:<VPRED>
8267 [(match_operand:<VPRED> 1)
8268 (const_int SVE_KNOWN_PTRUE)
8269 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8270 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8271 UNSPEC_COND_FCMUO))
8272 (not:<VPRED>
8273 (match_operand:<VPRED> 4 "register_operand" "Upa")))
8274 (match_dup:<VPRED> 1)))
8275 (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8276 "TARGET_SVE"
8277 "#"
8278 "&& 1"
8279 [(set (match_dup 5)
8280 (unspec:<VPRED>
8281 [(match_dup 1)
8282 (const_int SVE_KNOWN_PTRUE)
8283 (match_dup 2)
8284 (match_dup 3)]
8285 UNSPEC_COND_FCMUO))
8286 (set (match_dup 0)
8287 (and:<VPRED>
8288 (and:<VPRED>
8289 (not:<VPRED>
8290 (match_dup 5))
8291 (not:<VPRED>
8292 (match_dup 4)))
8293 (match_dup 1)))]
8294{
8295 if (can_create_pseudo_p ())
8296 operands[5] = gen_reg_rtx (<VPRED>mode);
8297}
8298)
8299
42b4e87d
RS
8300;; -------------------------------------------------------------------------
8301;; ---- [FP] Absolute comparisons
8302;; -------------------------------------------------------------------------
8303;; Includes:
8304;; - FACGE
8305;; - FACGT
8306;; - FACLE
8307;; - FACLT
8308;; -------------------------------------------------------------------------
8309
8310;; Predicated floating-point absolute comparisons.
624d0f07
RS
8311(define_expand "@aarch64_pred_fac<cmp_op><mode>"
8312 [(set (match_operand:<VPRED> 0 "register_operand")
8313 (unspec:<VPRED>
8314 [(match_operand:<VPRED> 1 "register_operand")
8315 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
f75cdd2c 8316 (unspec:SVE_FULL_F
624d0f07
RS
8317 [(match_dup 1)
8318 (match_dup 2)
f75cdd2c 8319 (match_operand:SVE_FULL_F 3 "register_operand")]
624d0f07 8320 UNSPEC_COND_FABS)
f75cdd2c 8321 (unspec:SVE_FULL_F
624d0f07
RS
8322 [(match_dup 1)
8323 (match_dup 2)
f75cdd2c 8324 (match_operand:SVE_FULL_F 4 "register_operand")]
624d0f07
RS
8325 UNSPEC_COND_FABS)]
8326 SVE_COND_FP_ABS_CMP))]
8327 "TARGET_SVE"
8328)
8329
0eb5e901 8330(define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>_relaxed"
42b4e87d
RS
8331 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8332 (unspec:<VPRED>
8333 [(match_operand:<VPRED> 1 "register_operand" "Upl")
8334 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
f75cdd2c 8335 (unspec:SVE_FULL_F
42b4e87d 8336 [(match_operand 5)
0eb5e901 8337 (const_int SVE_RELAXED_GP)
f75cdd2c 8338 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
42b4e87d 8339 UNSPEC_COND_FABS)
f75cdd2c 8340 (unspec:SVE_FULL_F
0eb5e901
RS
8341 [(match_operand 6)
8342 (const_int SVE_RELAXED_GP)
f75cdd2c 8343 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
42b4e87d
RS
8344 UNSPEC_COND_FABS)]
8345 SVE_COND_FP_ABS_CMP))]
0eb5e901 8346 "TARGET_SVE"
42b4e87d
RS
8347 "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
8348 "&& (!rtx_equal_p (operands[1], operands[5])
0eb5e901 8349 || !rtx_equal_p (operands[1], operands[6]))"
42b4e87d
RS
8350 {
8351 operands[5] = copy_rtx (operands[1]);
0eb5e901 8352 operands[6] = copy_rtx (operands[1]);
42b4e87d
RS
8353 }
8354)
8355
0eb5e901
RS
8356(define_insn "*aarch64_pred_fac<cmp_op><mode>_strict"
8357 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8358 (unspec:<VPRED>
8359 [(match_operand:<VPRED> 1 "register_operand" "Upl")
8360 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8361 (unspec:SVE_FULL_F
8362 [(match_dup 1)
8363 (match_operand:SI 5 "aarch64_sve_gp_strictness")
8364 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8365 UNSPEC_COND_FABS)
8366 (unspec:SVE_FULL_F
8367 [(match_dup 1)
8368 (match_operand:SI 6 "aarch64_sve_gp_strictness")
8369 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8370 UNSPEC_COND_FABS)]
8371 SVE_COND_FP_ABS_CMP))]
8372 "TARGET_SVE"
8373 "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
8374)
8375
624d0f07
RS
8376;; -------------------------------------------------------------------------
8377;; ---- [PRED] Select
8378;; -------------------------------------------------------------------------
8379;; Includes:
8380;; - SEL
8381;; -------------------------------------------------------------------------
8382
8383(define_insn "@vcond_mask_<mode><mode>"
8384 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8385 (ior:PRED_ALL
8386 (and:PRED_ALL
8387 (match_operand:PRED_ALL 3 "register_operand" "Upa")
8388 (match_operand:PRED_ALL 1 "register_operand" "Upa"))
8389 (and:PRED_ALL
8390 (not (match_dup 3))
8391 (match_operand:PRED_ALL 2 "register_operand" "Upa"))))]
8392 "TARGET_SVE"
8393 "sel\t%0.b, %3, %1.b, %2.b"
8394)
8395
915d28fe
RS
8396;; -------------------------------------------------------------------------
8397;; ---- [PRED] Test bits
8398;; -------------------------------------------------------------------------
8399;; Includes:
8400;; - PTEST
8401;; -------------------------------------------------------------------------
8402
8403;; Branch based on predicate equality or inequality.
8404(define_expand "cbranch<mode>4"
8405 [(set (pc)
8406 (if_then_else
8407 (match_operator 0 "aarch64_equality_operator"
8408 [(match_operand:PRED_ALL 1 "register_operand")
8409 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
8410 (label_ref (match_operand 3 ""))
8411 (pc)))]
8412 ""
43cacb12 8413 {
34467289
RS
8414 rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
8415 rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
8416 rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
915d28fe
RS
8417 rtx pred;
8418 if (operands[2] == CONST0_RTX (<MODE>mode))
8419 pred = operands[1];
8420 else
8421 {
8422 pred = gen_reg_rtx (<MODE>mode);
34467289
RS
8423 emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
8424 operands[2]));
915d28fe 8425 }
34467289 8426 emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
915d28fe
RS
8427 operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
8428 operands[2] = const0_rtx;
43cacb12
RS
8429 }
8430)
8431
34467289
RS
8432;; See "Description of UNSPEC_PTEST" above for details.
8433(define_insn "aarch64_ptest<mode>"
915d28fe 8434 [(set (reg:CC_NZC CC_REGNUM)
34467289
RS
8435 (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
8436 (match_operand 1)
8437 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8438 (match_operand:PRED_ALL 3 "register_operand" "Upa")]
8439 UNSPEC_PTEST))]
43cacb12 8440 "TARGET_SVE"
34467289 8441 "ptest\t%0, %3.b"
43cacb12
RS
8442)
8443
915d28fe
RS
8444;; =========================================================================
8445;; == Reductions
8446;; =========================================================================
8447
8448;; -------------------------------------------------------------------------
8449;; ---- [INT,FP] Conditional reductions
8450;; -------------------------------------------------------------------------
8451;; Includes:
624d0f07 8452;; - CLASTA
915d28fe
RS
8453;; - CLASTB
8454;; -------------------------------------------------------------------------
8455
8456;; Set operand 0 to the last active element in operand 3, or to tied
8457;; operand 1 if no elements are active.
624d0f07 8458(define_insn "@fold_extract_<last_op>_<mode>"
801790b3 8459 [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
915d28fe
RS
8460 (unspec:<VEL>
8461 [(match_operand:<VEL> 1 "register_operand" "0, 0")
8462 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
f75cdd2c 8463 (match_operand:SVE_FULL 3 "register_operand" "w, w")]
624d0f07
RS
8464 CLAST))]
8465 "TARGET_SVE"
8466 "@
8467 clast<ab>\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
8468 clast<ab>\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
8469)
8470
8471(define_insn "@aarch64_fold_extract_vector_<last_op>_<mode>"
f75cdd2c
RS
8472 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
8473 (unspec:SVE_FULL
8474 [(match_operand:SVE_FULL 1 "register_operand" "0, w")
624d0f07 8475 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
f75cdd2c 8476 (match_operand:SVE_FULL 3 "register_operand" "w, w")]
624d0f07 8477 CLAST))]
3db85990 8478 "TARGET_SVE"
915d28fe 8479 "@
624d0f07
RS
8480 clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
8481 movprfx\t%0, %1\;clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>"
3db85990
ST
8482)
8483
915d28fe
RS
8484;; -------------------------------------------------------------------------
8485;; ---- [INT] Tree reductions
8486;; -------------------------------------------------------------------------
8487;; Includes:
8488;; - ANDV
8489;; - EORV
8490;; - ORV
624d0f07 8491;; - SADDV
915d28fe
RS
8492;; - SMAXV
8493;; - SMINV
8494;; - UADDV
8495;; - UMAXV
8496;; - UMINV
8497;; -------------------------------------------------------------------------
8498
8499;; Unpredicated integer add reduction.
8500(define_expand "reduc_plus_scal_<mode>"
624d0f07 8501 [(match_operand:<VEL> 0 "register_operand")
f75cdd2c 8502 (match_operand:SVE_FULL_I 1 "register_operand")]
43cacb12
RS
8503 "TARGET_SVE"
8504 {
624d0f07
RS
8505 rtx pred = aarch64_ptrue_reg (<VPRED>mode);
8506 rtx tmp = <VEL>mode == DImode ? operands[0] : gen_reg_rtx (DImode);
8507 emit_insn (gen_aarch64_pred_reduc_uadd_<mode> (tmp, pred, operands[1]));
8508 if (tmp != operands[0])
8509 emit_move_insn (operands[0], gen_lowpart (<VEL>mode, tmp));
8510 DONE;
43cacb12
RS
8511 }
8512)
8513
915d28fe 8514;; Predicated integer add reduction. The result is always 64-bits.
624d0f07
RS
8515(define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8516 [(set (match_operand:DI 0 "register_operand" "=w")
8517 (unspec:DI [(match_operand:<VPRED> 1 "register_operand" "Upl")
f75cdd2c 8518 (match_operand:SVE_FULL_I 2 "register_operand" "w")]
624d0f07
RS
8519 SVE_INT_ADDV))]
8520 "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
8521 "<su>addv\t%d0, %1, %2.<Vetype>"
43cacb12
RS
8522)
8523
b0760a40 8524;; Unpredicated integer reductions.
915d28fe
RS
8525(define_expand "reduc_<optab>_scal_<mode>"
8526 [(set (match_operand:<VEL> 0 "register_operand")
8527 (unspec:<VEL> [(match_dup 2)
f75cdd2c 8528 (match_operand:SVE_FULL_I 1 "register_operand")]
b0760a40 8529 SVE_INT_REDUCTION))]
43cacb12 8530 "TARGET_SVE"
915d28fe
RS
8531 {
8532 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8533 }
43cacb12
RS
8534)
8535
b0760a40 8536;; Predicated integer reductions.
624d0f07 8537(define_insn "@aarch64_pred_reduc_<optab>_<mode>"
915d28fe
RS
8538 [(set (match_operand:<VEL> 0 "register_operand" "=w")
8539 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
f75cdd2c 8540 (match_operand:SVE_FULL_I 2 "register_operand" "w")]
b0760a40 8541 SVE_INT_REDUCTION))]
43cacb12 8542 "TARGET_SVE"
b0760a40 8543 "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>"
43cacb12
RS
8544)
8545
915d28fe
RS
8546;; -------------------------------------------------------------------------
8547;; ---- [FP] Tree reductions
8548;; -------------------------------------------------------------------------
8549;; Includes:
8550;; - FADDV
8551;; - FMAXNMV
8552;; - FMAXV
8553;; - FMINNMV
8554;; - FMINV
8555;; -------------------------------------------------------------------------
8556
b0760a40
RS
8557;; Unpredicated floating-point tree reductions.
8558(define_expand "reduc_<optab>_scal_<mode>"
915d28fe
RS
8559 [(set (match_operand:<VEL> 0 "register_operand")
8560 (unspec:<VEL> [(match_dup 2)
f75cdd2c 8561 (match_operand:SVE_FULL_F 1 "register_operand")]
b0760a40 8562 SVE_FP_REDUCTION))]
43cacb12 8563 "TARGET_SVE"
915d28fe
RS
8564 {
8565 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8566 }
43cacb12
RS
8567)
8568
e32b9eb3
RS
8569(define_expand "reduc_<fmaxmin>_scal_<mode>"
8570 [(match_operand:<VEL> 0 "register_operand")
8571 (unspec:<VEL> [(match_operand:SVE_FULL_F 1 "register_operand")]
8572 FMAXMINNMV)]
8573 "TARGET_SVE"
8574 {
8575 emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
8576 DONE;
8577 }
8578)
8579
b0760a40 8580;; Predicated floating-point tree reductions.
624d0f07 8581(define_insn "@aarch64_pred_reduc_<optab>_<mode>"
915d28fe
RS
8582 [(set (match_operand:<VEL> 0 "register_operand" "=w")
8583 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
f75cdd2c 8584 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
b0760a40 8585 SVE_FP_REDUCTION))]
43cacb12 8586 "TARGET_SVE"
b0760a40 8587 "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>"
43cacb12
RS
8588)
8589
915d28fe
RS
8590;; -------------------------------------------------------------------------
8591;; ---- [FP] Left-to-right reductions
8592;; -------------------------------------------------------------------------
8593;; Includes:
8594;; - FADDA
8595;; -------------------------------------------------------------------------
8596
8597;; Unpredicated in-order FP reductions.
8598(define_expand "fold_left_plus_<mode>"
8599 [(set (match_operand:<VEL> 0 "register_operand")
8600 (unspec:<VEL> [(match_dup 3)
8601 (match_operand:<VEL> 1 "register_operand")
f75cdd2c 8602 (match_operand:SVE_FULL_F 2 "register_operand")]
915d28fe 8603 UNSPEC_FADDA))]
43cacb12 8604 "TARGET_SVE"
915d28fe
RS
8605 {
8606 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
8607 }
43cacb12
RS
8608)
8609
915d28fe
RS
8610;; Predicated in-order FP reductions.
8611(define_insn "mask_fold_left_plus_<mode>"
8612 [(set (match_operand:<VEL> 0 "register_operand" "=w")
8613 (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
8614 (match_operand:<VEL> 1 "register_operand" "0")
f75cdd2c 8615 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
915d28fe 8616 UNSPEC_FADDA))]
43cacb12 8617 "TARGET_SVE"
915d28fe 8618 "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
43cacb12
RS
8619)
8620
915d28fe
RS
8621;; =========================================================================
8622;; == Permutes
8623;; =========================================================================
8624
8625;; -------------------------------------------------------------------------
8626;; ---- [INT,FP] General permutes
8627;; -------------------------------------------------------------------------
8628;; Includes:
8629;; - TBL
8630;; -------------------------------------------------------------------------
8631
8632(define_expand "vec_perm<mode>"
f75cdd2c
RS
8633 [(match_operand:SVE_FULL 0 "register_operand")
8634 (match_operand:SVE_FULL 1 "register_operand")
8635 (match_operand:SVE_FULL 2 "register_operand")
915d28fe
RS
8636 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
8637 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
9bfb28ed 8638 {
915d28fe
RS
8639 aarch64_expand_sve_vec_perm (operands[0], operands[1],
8640 operands[2], operands[3]);
9bfb28ed
RS
8641 DONE;
8642 }
8643)
8644
624d0f07 8645(define_insn "@aarch64_sve_tbl<mode>"
f75cdd2c
RS
8646 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
8647 (unspec:SVE_FULL
8648 [(match_operand:SVE_FULL 1 "register_operand" "w")
915d28fe
RS
8649 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
8650 UNSPEC_TBL))]
43cacb12 8651 "TARGET_SVE"
915d28fe 8652 "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
43cacb12
RS
8653)
8654
915d28fe
RS
8655;; -------------------------------------------------------------------------
8656;; ---- [INT,FP] Special-purpose unary permutes
8657;; -------------------------------------------------------------------------
8658;; Includes:
624d0f07 8659;; - COMPACT
915d28fe
RS
8660;; - DUP
8661;; - REV
915d28fe
RS
8662;; -------------------------------------------------------------------------
8663
624d0f07
RS
8664;; Compact active elements and pad with zeros.
8665(define_insn "@aarch64_sve_compact<mode>"
f75cdd2c
RS
8666 [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w")
8667 (unspec:SVE_FULL_SD
8668 [(match_operand:<VPRED> 1 "register_operand" "Upl")
8669 (match_operand:SVE_FULL_SD 2 "register_operand" "w")]
8670 UNSPEC_SVE_COMPACT))]
624d0f07
RS
8671 "TARGET_SVE"
8672 "compact\t%0.<Vetype>, %1, %2.<Vetype>"
8673)
8674
915d28fe 8675;; Duplicate one element of a vector.
624d0f07 8676(define_insn "@aarch64_sve_dup_lane<mode>"
6c3ce63b
RS
8677 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
8678 (vec_duplicate:SVE_ALL
915d28fe 8679 (vec_select:<VEL>
6c3ce63b 8680 (match_operand:SVE_ALL 1 "register_operand" "w")
915d28fe
RS
8681 (parallel [(match_operand:SI 2 "const_int_operand")]))))]
8682 "TARGET_SVE
6c3ce63b
RS
8683 && IN_RANGE (INTVAL (operands[2]) * <container_bits> / 8, 0, 63)"
8684 "dup\t%0.<Vctype>, %1.<Vctype>[%2]"
915d28fe
RS
8685)
8686
624d0f07
RS
8687;; Use DUP.Q to duplicate a 128-bit segment of a register.
8688;;
8689;; The vec_select:<V128> sets memory lane number N of the V128 to lane
8690;; number op2 + N of op1. (We don't need to distinguish between memory
8691;; and architectural register lane numbering for op1 or op0, since the
8692;; two numbering schemes are the same for SVE.)
8693;;
f75cdd2c 8694;; The vec_duplicate:SVE_FULL then copies memory lane number N of the
624d0f07
RS
8695;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP
8696;; of op0. We therefore get the correct result for both endiannesses.
8697;;
8698;; The wrinkle is that for big-endian V128 registers, memory lane numbering
8699;; is in the opposite order to architectural register lane numbering.
8700;; Thus if we were to do this operation via a V128 temporary register,
8701;; the vec_select and vec_duplicate would both involve a reverse operation
8702;; for big-endian targets. In this fused pattern the two reverses cancel
8703;; each other out.
8704(define_insn "@aarch64_sve_dupq_lane<mode>"
f75cdd2c
RS
8705 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
8706 (vec_duplicate:SVE_FULL
624d0f07 8707 (vec_select:<V128>
f75cdd2c 8708 (match_operand:SVE_FULL 1 "register_operand" "w")
624d0f07
RS
8709 (match_operand 2 "ascending_int_parallel"))))]
8710 "TARGET_SVE
8711 && (INTVAL (XVECEXP (operands[2], 0, 0))
8712 * GET_MODE_SIZE (<VEL>mode)) % 16 == 0
8713 && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0))
8714 * GET_MODE_SIZE (<VEL>mode), 0, 63)"
8715 {
8716 unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0))
8717 * GET_MODE_SIZE (<VEL>mode));
8718 operands[2] = gen_int_mode (byte / 16, DImode);
8719 return "dup\t%0.q, %1.q[%2]";
8720 }
8721)
8722
915d28fe
RS
8723;; Reverse the order of elements within a full vector.
8724(define_insn "@aarch64_sve_rev<mode>"
6c3ce63b
RS
8725 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
8726 (unspec:SVE_ALL
8727 [(match_operand:SVE_ALL 1 "register_operand" "w")]
f75cdd2c 8728 UNSPEC_REV))]
9bfb28ed 8729 "TARGET_SVE"
6c3ce63b 8730 "rev\t%0.<Vctype>, %1.<Vctype>")
915d28fe 8731
915d28fe
RS
8732;; -------------------------------------------------------------------------
8733;; ---- [INT,FP] Special-purpose binary permutes
8734;; -------------------------------------------------------------------------
8735;; Includes:
6c3ce63b 8736;; - EXT
624d0f07 8737;; - SPLICE
915d28fe
RS
8738;; - TRN1
8739;; - TRN2
8740;; - UZP1
8741;; - UZP2
8742;; - ZIP1
8743;; - ZIP2
8744;; -------------------------------------------------------------------------
8745
624d0f07
RS
8746;; Like EXT, but start at the first active element.
8747(define_insn "@aarch64_sve_splice<mode>"
f75cdd2c
RS
8748 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
8749 (unspec:SVE_FULL
8750 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
8751 (match_operand:SVE_FULL 2 "register_operand" "0, w")
8752 (match_operand:SVE_FULL 3 "register_operand" "w, w")]
8753 UNSPEC_SVE_SPLICE))]
624d0f07
RS
8754 "TARGET_SVE"
8755 "@
8756 splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>
8757 movprfx\t%0, %2\;splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>"
8758 [(set_attr "movprfx" "*, yes")]
8759)
8760
915d28fe
RS
8761;; Permutes that take half the elements from one vector and half the
8762;; elements from the other.
624d0f07 8763(define_insn "@aarch64_sve_<perm_insn><mode>"
6c3ce63b
RS
8764 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
8765 (unspec:SVE_ALL
8766 [(match_operand:SVE_ALL 1 "register_operand" "w")
8767 (match_operand:SVE_ALL 2 "register_operand" "w")]
f75cdd2c 8768 PERMUTE))]
9bfb28ed 8769 "TARGET_SVE"
6c3ce63b 8770 "<perm_insn>\t%0.<Vctype>, %1.<Vctype>, %2.<Vctype>"
915d28fe
RS
8771)
8772
36696774
RS
8773;; Apply PERMUTE to 128-bit sequences. The behavior of these patterns
8774;; doesn't depend on the mode.
8775(define_insn "@aarch64_sve_<optab><mode>"
8776 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
8777 (unspec:SVE_FULL
8778 [(match_operand:SVE_FULL 1 "register_operand" "w")
8779 (match_operand:SVE_FULL 2 "register_operand" "w")]
8780 PERMUTEQ))]
8781 "TARGET_SVE_F64MM"
8782 "<perm_insn>\t%0.q, %1.q, %2.q"
8783)
8784
915d28fe
RS
8785;; Concatenate two vectors and extract a subvector. Note that the
8786;; immediate (third) operand is the lane index not the byte index.
624d0f07 8787(define_insn "@aarch64_sve_ext<mode>"
6c3ce63b
RS
8788 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w")
8789 (unspec:SVE_ALL
8790 [(match_operand:SVE_ALL 1 "register_operand" "0, w")
8791 (match_operand:SVE_ALL 2 "register_operand" "w, w")
f75cdd2c
RS
8792 (match_operand:SI 3 "const_int_operand")]
8793 UNSPEC_EXT))]
915d28fe 8794 "TARGET_SVE
6c3ce63b 8795 && IN_RANGE (INTVAL (operands[3]) * <container_bits> / 8, 0, 255)"
9bfb28ed 8796 {
6c3ce63b 8797 operands[3] = GEN_INT (INTVAL (operands[3]) * <container_bits> / 8);
06b3ba23
RS
8798 return (which_alternative == 0
8799 ? "ext\\t%0.b, %0.b, %2.b, #%3"
8800 : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3");
43cacb12 8801 }
06b3ba23 8802 [(set_attr "movprfx" "*,yes")]
43cacb12
RS
8803)
8804
28350fd1
RS
8805;; -------------------------------------------------------------------------
8806;; ---- [PRED] Special-purpose unary permutes
8807;; -------------------------------------------------------------------------
8808;; Includes:
8809;; - REV
8810;; -------------------------------------------------------------------------
8811
8812(define_insn "@aarch64_sve_rev<mode>"
8813 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8814 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")]
8815 UNSPEC_REV))]
8816 "TARGET_SVE"
8817 "rev\t%0.<Vetype>, %1.<Vetype>")
8818
915d28fe
RS
8819;; -------------------------------------------------------------------------
8820;; ---- [PRED] Special-purpose binary permutes
8821;; -------------------------------------------------------------------------
8822;; Includes:
8823;; - TRN1
8824;; - TRN2
8825;; - UZP1
8826;; - UZP2
8827;; - ZIP1
8828;; - ZIP2
8829;; -------------------------------------------------------------------------
8830
8831;; Permutes that take half the elements from one vector and half the
8832;; elements from the other.
2803bc3b 8833(define_insn "@aarch64_sve_<perm_insn><mode>"
915d28fe
RS
8834 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8835 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
8836 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
8837 PERMUTE))]
43cacb12 8838 "TARGET_SVE"
3e2751ce 8839 "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
43cacb12
RS
8840)
8841
8535755a
TC
8842;; Special purpose permute used by the predicate generation instructions.
8843;; Unlike the normal permute patterns, these instructions operate on VNx16BI
8844;; regardless of the element size, so that all input and output bits are
8845;; well-defined. Operand 3 then indicates the size of the permute.
8846(define_insn "@aarch64_sve_trn1_conv<mode>"
8847 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
8848 (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa")
8849 (match_operand:VNx16BI 2 "register_operand" "Upa")
8850 (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")]
8851 UNSPEC_TRN1_CONV))]
8852 "TARGET_SVE"
8853 "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
8854)
8855
915d28fe
RS
8856;; =========================================================================
8857;; == Conversions
8858;; =========================================================================
8859
8860;; -------------------------------------------------------------------------
8861;; ---- [INT<-INT] Packs
8862;; -------------------------------------------------------------------------
8863;; Includes:
8864;; - UZP1
8865;; -------------------------------------------------------------------------
8866
43cacb12
RS
8867;; Integer pack. Use UZP1 on the narrower type, which discards
8868;; the high part of each wide element.
8869(define_insn "vec_pack_trunc_<Vwide>"
f75cdd2c
RS
8870 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
8871 (unspec:SVE_FULL_BHSI
43cacb12
RS
8872 [(match_operand:<VWIDE> 1 "register_operand" "w")
8873 (match_operand:<VWIDE> 2 "register_operand" "w")]
8874 UNSPEC_PACK))]
8875 "TARGET_SVE"
8876 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
8877)
8878
915d28fe
RS
8879;; -------------------------------------------------------------------------
8880;; ---- [INT<-INT] Unpacks
8881;; -------------------------------------------------------------------------
8882;; Includes:
8883;; - SUNPKHI
8884;; - SUNPKLO
8885;; - UUNPKHI
8886;; - UUNPKLO
8887;; -------------------------------------------------------------------------
8888
8889;; Unpack the low or high half of a vector, where "high" refers to
8890;; the low-numbered lanes for big-endian and the high-numbered lanes
8891;; for little-endian.
f75cdd2c 8892(define_expand "vec_unpack<su>_<perm_hilo>_<SVE_FULL_BHSI:mode>"
915d28fe 8893 [(match_operand:<VWIDE> 0 "register_operand")
f75cdd2c
RS
8894 (unspec:<VWIDE>
8895 [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)]
43cacb12
RS
8896 "TARGET_SVE"
8897 {
915d28fe 8898 emit_insn ((<hi_lanes_optab>
f75cdd2c
RS
8899 ? gen_aarch64_sve_<su>unpkhi_<SVE_FULL_BHSI:mode>
8900 : gen_aarch64_sve_<su>unpklo_<SVE_FULL_BHSI:mode>)
915d28fe
RS
8901 (operands[0], operands[1]));
8902 DONE;
8903 }
8904)
8905
f75cdd2c 8906(define_insn "@aarch64_sve_<su>unpk<perm_hilo>_<SVE_FULL_BHSI:mode>"
915d28fe 8907 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
f75cdd2c
RS
8908 (unspec:<VWIDE>
8909 [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")]
8910 UNPACK))]
915d28fe
RS
8911 "TARGET_SVE"
8912 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
8913)
8914
8915;; -------------------------------------------------------------------------
8916;; ---- [INT<-FP] Conversions
8917;; -------------------------------------------------------------------------
8918;; Includes:
8919;; - FCVTZS
8920;; - FCVTZU
8921;; -------------------------------------------------------------------------
8922
8923;; Unpredicated conversion of floats to integers of the same size (HF to HI,
8924;; SF to SI or DF to DI).
99361551 8925(define_expand "<optab><mode><v_int_equiv>2"
915d28fe
RS
8926 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
8927 (unspec:<V_INT_EQUIV>
8928 [(match_dup 2)
99361551 8929 (const_int SVE_RELAXED_GP)
f75cdd2c 8930 (match_operand:SVE_FULL_F 1 "register_operand")]
99361551 8931 SVE_COND_FCVTI))]
915d28fe
RS
8932 "TARGET_SVE"
8933 {
8934 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
43cacb12
RS
8935 }
8936)
8937
95eb5537 8938;; Predicated float-to-integer conversion, either to the same width or wider.
f75cdd2c 8939(define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
a4d9837e 8940 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
f75cdd2c 8941 (unspec:SVE_FULL_HSDI
a4d9837e 8942 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
99361551 8943 (match_operand:SI 3 "aarch64_sve_gp_strictness")
a4d9837e 8944 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
99361551 8945 SVE_COND_FCVTI))]
f75cdd2c 8946 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
a4d9837e
RS
8947 "@
8948 fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
8949 movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
8950 [(set_attr "movprfx" "*,yes")]
915d28fe
RS
8951)
8952
95eb5537 8953;; Predicated narrowing float-to-integer conversion.
624d0f07 8954(define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
a4d9837e 8955 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
95eb5537 8956 (unspec:VNx4SI_ONLY
a4d9837e 8957 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl")
99361551 8958 (match_operand:SI 3 "aarch64_sve_gp_strictness")
a4d9837e 8959 (match_operand:VNx2DF_ONLY 2 "register_operand" "0, w")]
99361551 8960 SVE_COND_FCVTI))]
915d28fe 8961 "TARGET_SVE"
a4d9837e
RS
8962 "@
8963 fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
8964 movprfx\t%0, %2\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
8965 [(set_attr "movprfx" "*,yes")]
915d28fe
RS
8966)
8967
c5e16983
RS
8968;; Predicated float-to-integer conversion with merging, either to the same
8969;; width or wider.
f75cdd2c
RS
8970(define_expand "@cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
8971 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
8972 (unspec:SVE_FULL_HSDI
8973 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
8974 (unspec:SVE_FULL_HSDI
624d0f07
RS
8975 [(match_dup 1)
8976 (const_int SVE_STRICT_GP)
f75cdd2c 8977 (match_operand:SVE_FULL_F 2 "register_operand")]
624d0f07 8978 SVE_COND_FCVTI)
f75cdd2c 8979 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
624d0f07 8980 UNSPEC_SEL))]
f75cdd2c 8981 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
624d0f07
RS
8982)
8983
c5e16983
RS
8984;; The first alternative doesn't need the earlyclobber, but the only case
8985;; it would help is the uninteresting one in which operands 2 and 3 are
8986;; the same register (despite having different modes). Making all the
8987;; alternatives earlyclobber makes things more consistent for the
8988;; register allocator.
0eb5e901 8989(define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_relaxed"
f75cdd2c
RS
8990 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
8991 (unspec:SVE_FULL_HSDI
8992 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8993 (unspec:SVE_FULL_HSDI
c5e16983 8994 [(match_operand 4)
0eb5e901 8995 (const_int SVE_RELAXED_GP)
f75cdd2c 8996 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
c5e16983 8997 SVE_COND_FCVTI)
f75cdd2c 8998 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
c5e16983 8999 UNSPEC_SEL))]
0eb5e901 9000 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
c5e16983 9001 "@
f75cdd2c
RS
9002 fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9003 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9004 movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
c5e16983
RS
9005 "&& !rtx_equal_p (operands[1], operands[4])"
9006 {
9007 operands[4] = copy_rtx (operands[1]);
9008 }
9009 [(set_attr "movprfx" "*,yes,yes")]
9010)
9011
0eb5e901
RS
9012(define_insn "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_strict"
9013 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
9014 (unspec:SVE_FULL_HSDI
9015 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
9016 (unspec:SVE_FULL_HSDI
9017 [(match_dup 1)
9018 (const_int SVE_STRICT_GP)
9019 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
9020 SVE_COND_FCVTI)
9021 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9022 UNSPEC_SEL))]
9023 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9024 "@
9025 fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9026 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9027 movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
9028 [(set_attr "movprfx" "*,yes,yes")]
9029)
9030
624d0f07
RS
9031;; Predicated narrowing float-to-integer conversion with merging.
9032(define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
9033 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
9034 (unspec:VNx4SI_ONLY
9035 [(match_operand:VNx2BI 1 "register_operand")
9036 (unspec:VNx4SI_ONLY
9037 [(match_dup 1)
9038 (const_int SVE_STRICT_GP)
9039 (match_operand:VNx2DF_ONLY 2 "register_operand")]
9040 SVE_COND_FCVTI)
9041 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
9042 UNSPEC_SEL))]
9043 "TARGET_SVE"
9044)
9045
9046(define_insn "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
9047 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=&w, &w, ?&w")
9048 (unspec:VNx4SI_ONLY
9049 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
9050 (unspec:VNx4SI_ONLY
9051 [(match_dup 1)
9052 (match_operand:SI 4 "aarch64_sve_gp_strictness")
9053 (match_operand:VNx2DF_ONLY 2 "register_operand" "w, w, w")]
9054 SVE_COND_FCVTI)
9055 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9056 UNSPEC_SEL))]
9057 "TARGET_SVE"
9058 "@
9059 fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9060 movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9061 movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
9062 [(set_attr "movprfx" "*,yes,yes")]
9063)
9064
915d28fe
RS
9065;; -------------------------------------------------------------------------
9066;; ---- [INT<-FP] Packs
9067;; -------------------------------------------------------------------------
9068;; The patterns in this section are synthetic.
9069;; -------------------------------------------------------------------------
9070
43cacb12
RS
9071;; Convert two vectors of DF to SI and pack the results into a single vector.
9072(define_expand "vec_pack_<su>fix_trunc_vnx2df"
9073 [(set (match_dup 4)
9074 (unspec:VNx4SI
9075 [(match_dup 3)
99361551
RS
9076 (const_int SVE_RELAXED_GP)
9077 (match_operand:VNx2DF 1 "register_operand")]
9078 SVE_COND_FCVTI))
43cacb12
RS
9079 (set (match_dup 5)
9080 (unspec:VNx4SI
9081 [(match_dup 3)
99361551
RS
9082 (const_int SVE_RELAXED_GP)
9083 (match_operand:VNx2DF 2 "register_operand")]
9084 SVE_COND_FCVTI))
43cacb12
RS
9085 (set (match_operand:VNx4SI 0 "register_operand")
9086 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
9087 "TARGET_SVE"
9088 {
16de3637 9089 operands[3] = aarch64_ptrue_reg (VNx2BImode);
43cacb12
RS
9090 operands[4] = gen_reg_rtx (VNx4SImode);
9091 operands[5] = gen_reg_rtx (VNx4SImode);
9092 }
9093)
f1739b48 9094
915d28fe
RS
9095;; -------------------------------------------------------------------------
9096;; ---- [INT<-FP] Unpacks
9097;; -------------------------------------------------------------------------
9098;; No patterns here yet!
9099;; -------------------------------------------------------------------------
9d4ac06e 9100
915d28fe
RS
9101;; -------------------------------------------------------------------------
9102;; ---- [FP<-INT] Conversions
9103;; -------------------------------------------------------------------------
9104;; Includes:
9105;; - SCVTF
9106;; - UCVTF
9107;; -------------------------------------------------------------------------
a08acce8 9108
915d28fe
RS
9109;; Unpredicated conversion of integers to floats of the same size
9110;; (HI to HF, SI to SF or DI to DF).
9111(define_expand "<optab><v_int_equiv><mode>2"
f75cdd2c
RS
9112 [(set (match_operand:SVE_FULL_F 0 "register_operand")
9113 (unspec:SVE_FULL_F
915d28fe 9114 [(match_dup 2)
99361551
RS
9115 (const_int SVE_RELAXED_GP)
9116 (match_operand:<V_INT_EQUIV> 1 "register_operand")]
9117 SVE_COND_ICVTF))]
a08acce8 9118 "TARGET_SVE"
f4fde1b3 9119 {
915d28fe 9120 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
f4fde1b3 9121 }
b41d1f6e
RS
9122)
9123
95eb5537
RS
9124;; Predicated integer-to-float conversion, either to the same width or
9125;; narrower.
f75cdd2c 9126(define_insn "@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
a4d9837e 9127 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
f75cdd2c 9128 (unspec:SVE_FULL_F
a4d9837e 9129 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
99361551 9130 (match_operand:SI 3 "aarch64_sve_gp_strictness")
a4d9837e 9131 (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")]
99361551 9132 SVE_COND_ICVTF))]
f75cdd2c 9133 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
a4d9837e
RS
9134 "@
9135 <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9136 movprfx\t%0, %2\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
9137 [(set_attr "movprfx" "*,yes")]
f1739b48 9138)
6c9c7b73 9139
95eb5537 9140;; Predicated widening integer-to-float conversion.
624d0f07 9141(define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
a4d9837e 9142 [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w")
95eb5537 9143 (unspec:VNx2DF_ONLY
a4d9837e 9144 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl")
99361551 9145 (match_operand:SI 3 "aarch64_sve_gp_strictness")
a4d9837e 9146 (match_operand:VNx4SI_ONLY 2 "register_operand" "0, w")]
99361551 9147 SVE_COND_ICVTF))]
6c9c7b73 9148 "TARGET_SVE"
a4d9837e
RS
9149 "@
9150 <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9151 movprfx\t%0, %2\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
9152 [(set_attr "movprfx" "*,yes")]
915d28fe 9153)
6c9c7b73 9154
c5e16983
RS
9155;; Predicated integer-to-float conversion with merging, either to the same
9156;; width or narrower.
f75cdd2c
RS
9157(define_expand "@cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
9158 [(set (match_operand:SVE_FULL_F 0 "register_operand")
9159 (unspec:SVE_FULL_F
9160 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9161 (unspec:SVE_FULL_F
624d0f07
RS
9162 [(match_dup 1)
9163 (const_int SVE_STRICT_GP)
f75cdd2c 9164 (match_operand:SVE_FULL_HSDI 2 "register_operand")]
624d0f07 9165 SVE_COND_ICVTF)
f75cdd2c 9166 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
624d0f07 9167 UNSPEC_SEL))]
f75cdd2c 9168 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
624d0f07
RS
9169)
9170
c5e16983
RS
9171;; The first alternative doesn't need the earlyclobber, but the only case
9172;; it would help is the uninteresting one in which operands 2 and 3 are
9173;; the same register (despite having different modes). Making all the
9174;; alternatives earlyclobber makes things more consistent for the
9175;; register allocator.
0eb5e901 9176(define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_relaxed"
f75cdd2c
RS
9177 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
9178 (unspec:SVE_FULL_F
9179 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
9180 (unspec:SVE_FULL_F
c5e16983 9181 [(match_operand 4)
0eb5e901 9182 (const_int SVE_RELAXED_GP)
f75cdd2c 9183 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
c5e16983 9184 SVE_COND_ICVTF)
f75cdd2c 9185 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
c5e16983 9186 UNSPEC_SEL))]
0eb5e901 9187 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
c5e16983 9188 "@
f75cdd2c
RS
9189 <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9190 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9191 movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
c5e16983
RS
9192 "&& !rtx_equal_p (operands[1], operands[4])"
9193 {
9194 operands[4] = copy_rtx (operands[1]);
9195 }
9196 [(set_attr "movprfx" "*,yes,yes")]
0eb5e901
RS
9197)
9198
9199(define_insn "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_strict"
9200 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
9201 (unspec:SVE_FULL_F
9202 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
9203 (unspec:SVE_FULL_F
9204 [(match_dup 1)
9205 (const_int SVE_STRICT_GP)
9206 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
9207 SVE_COND_ICVTF)
9208 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9209 UNSPEC_SEL))]
9210 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9211 "@
9212 <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9213 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9214 movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
9215 [(set_attr "movprfx" "*,yes,yes")]
c5e16983
RS
9216)
9217
624d0f07
RS
9218;; Predicated widening integer-to-float conversion with merging.
9219(define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9220 [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
9221 (unspec:VNx2DF_ONLY
9222 [(match_operand:VNx2BI 1 "register_operand")
9223 (unspec:VNx2DF_ONLY
9224 [(match_dup 1)
9225 (const_int SVE_STRICT_GP)
9226 (match_operand:VNx4SI_ONLY 2 "register_operand")]
9227 SVE_COND_ICVTF)
9228 (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")]
9229 UNSPEC_SEL))]
9230 "TARGET_SVE"
9231)
9232
9233(define_insn "*cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9234 [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
9235 (unspec:VNx2DF_ONLY
9236 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
9237 (unspec:VNx2DF_ONLY
9238 [(match_dup 1)
9239 (match_operand:SI 4 "aarch64_sve_gp_strictness")
9240 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")]
9241 SVE_COND_ICVTF)
9242 (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9243 UNSPEC_SEL))]
9244 "TARGET_SVE"
9245 "@
9246 <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9247 movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9248 movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
9249 [(set_attr "movprfx" "*,yes,yes")]
9250)
9251
915d28fe
RS
9252;; -------------------------------------------------------------------------
9253;; ---- [FP<-INT] Packs
9254;; -------------------------------------------------------------------------
9255;; No patterns here yet!
9256;; -------------------------------------------------------------------------
6c9c7b73 9257
915d28fe
RS
9258;; -------------------------------------------------------------------------
9259;; ---- [FP<-INT] Unpacks
9260;; -------------------------------------------------------------------------
9261;; The patterns in this section are synthetic.
9262;; -------------------------------------------------------------------------
9263
9264;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI
9265;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
9266;; unpacked VNx4SI to VNx2DF.
9267(define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
9268 [(match_operand:VNx2DF 0 "register_operand")
9269 (FLOATUORS:VNx2DF
9270 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
9271 UNPACK_UNSIGNED))]
9272 "TARGET_SVE"
9273 {
9274 /* Use ZIP to do the unpack, since we don't care about the upper halves
9275 and since it has the nice property of not needing any subregs.
9276 If using UUNPK* turns out to be preferable, we could model it as
9277 a ZIP whose first operand is zero. */
9278 rtx temp = gen_reg_rtx (VNx4SImode);
9279 emit_insn ((<hi_lanes_optab>
9280 ? gen_aarch64_sve_zip2vnx4si
9281 : gen_aarch64_sve_zip1vnx4si)
9282 (temp, operands[1], operands[1]));
9283 rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
99361551 9284 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
95eb5537 9285 emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df
99361551 9286 (operands[0], ptrue, temp, strictness));
6c9c7b73
AM
9287 DONE;
9288 }
9289)
9290
915d28fe
RS
9291;; -------------------------------------------------------------------------
9292;; ---- [FP<-FP] Packs
9293;; -------------------------------------------------------------------------
9294;; Includes:
9295;; - FCVT
9296;; -------------------------------------------------------------------------
9297
9298;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
9299;; the results into a single vector.
9300(define_expand "vec_pack_trunc_<Vwide>"
9301 [(set (match_dup 4)
f75cdd2c 9302 (unspec:SVE_FULL_HSF
915d28fe 9303 [(match_dup 3)
99361551
RS
9304 (const_int SVE_RELAXED_GP)
9305 (match_operand:<VWIDE> 1 "register_operand")]
9306 UNSPEC_COND_FCVT))
915d28fe 9307 (set (match_dup 5)
f75cdd2c 9308 (unspec:SVE_FULL_HSF
915d28fe 9309 [(match_dup 3)
99361551
RS
9310 (const_int SVE_RELAXED_GP)
9311 (match_operand:<VWIDE> 2 "register_operand")]
9312 UNSPEC_COND_FCVT))
f75cdd2c
RS
9313 (set (match_operand:SVE_FULL_HSF 0 "register_operand")
9314 (unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
6c9c7b73
AM
9315 "TARGET_SVE"
9316 {
915d28fe
RS
9317 operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
9318 operands[4] = gen_reg_rtx (<MODE>mode);
9319 operands[5] = gen_reg_rtx (<MODE>mode);
6c9c7b73
AM
9320 }
9321)
9feeafd7 9322
95eb5537 9323;; Predicated float-to-float truncation.
f75cdd2c 9324(define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
a4d9837e 9325 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w")
f75cdd2c 9326 (unspec:SVE_FULL_HSF
a4d9837e 9327 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl")
99361551 9328 (match_operand:SI 3 "aarch64_sve_gp_strictness")
a4d9837e 9329 (match_operand:SVE_FULL_SDF 2 "register_operand" "0, w")]
95eb5537 9330 SVE_COND_FCVT))]
f75cdd2c 9331 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
a4d9837e
RS
9332 "@
9333 fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9334 movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
9335 [(set_attr "movprfx" "*,yes")]
9feeafd7 9336)
a9fad8fe 9337
624d0f07 9338;; Predicated float-to-float truncation with merging.
f75cdd2c
RS
9339(define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9340 [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
9341 (unspec:SVE_FULL_HSF
9342 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9343 (unspec:SVE_FULL_HSF
624d0f07
RS
9344 [(match_dup 1)
9345 (const_int SVE_STRICT_GP)
f75cdd2c 9346 (match_operand:SVE_FULL_SDF 2 "register_operand")]
624d0f07 9347 SVE_COND_FCVT)
f75cdd2c 9348 (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")]
624d0f07 9349 UNSPEC_SEL))]
f75cdd2c 9350 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
624d0f07
RS
9351)
9352
f75cdd2c
RS
9353(define_insn "*cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9354 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w, ?&w")
9355 (unspec:SVE_FULL_HSF
9356 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
9357 (unspec:SVE_FULL_HSF
624d0f07
RS
9358 [(match_dup 1)
9359 (match_operand:SI 4 "aarch64_sve_gp_strictness")
f75cdd2c 9360 (match_operand:SVE_FULL_SDF 2 "register_operand" "w, w, w")]
624d0f07 9361 SVE_COND_FCVT)
f75cdd2c 9362 (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
624d0f07 9363 UNSPEC_SEL))]
f75cdd2c 9364 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
624d0f07 9365 "@
f75cdd2c
RS
9366 fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9367 movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9368 movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
624d0f07
RS
9369 [(set_attr "movprfx" "*,yes,yes")]
9370)
9371
896dff99
RS
9372;; -------------------------------------------------------------------------
9373;; ---- [FP<-FP] Packs (bfloat16)
9374;; -------------------------------------------------------------------------
9375;; Includes:
9376;; - BFCVT (BF16)
9377;; - BFCVTNT (BF16)
9378;; -------------------------------------------------------------------------
9379
9380;; Predicated BFCVT.
9381(define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
a4d9837e 9382 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w")
896dff99 9383 (unspec:VNx8BF_ONLY
a4d9837e 9384 [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl")
896dff99 9385 (match_operand:SI 3 "aarch64_sve_gp_strictness")
a4d9837e 9386 (match_operand:VNx4SF_ONLY 2 "register_operand" "0, w")]
896dff99
RS
9387 SVE_COND_FCVT))]
9388 "TARGET_SVE_BF16"
a4d9837e
RS
9389 "@
9390 bfcvt\t%0.h, %1/m, %2.s
9391 movprfx\t%0, %2\;bfcvt\t%0.h, %1/m, %2.s"
9392 [(set_attr "movprfx" "*,yes")]
896dff99
RS
9393)
9394
9395;; Predicated BFCVT with merging.
9396(define_expand "@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9397 [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
9398 (unspec:VNx8BF_ONLY
9399 [(match_operand:VNx4BI 1 "register_operand")
9400 (unspec:VNx8BF_ONLY
9401 [(match_dup 1)
9402 (const_int SVE_STRICT_GP)
9403 (match_operand:VNx4SF_ONLY 2 "register_operand")]
9404 SVE_COND_FCVT)
9405 (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
9406 UNSPEC_SEL))]
9407 "TARGET_SVE_BF16"
9408)
9409
9410(define_insn "*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9411 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
9412 (unspec:VNx8BF_ONLY
9413 [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl, Upl")
9414 (unspec:VNx8BF_ONLY
9415 [(match_dup 1)
9416 (match_operand:SI 4 "aarch64_sve_gp_strictness")
9417 (match_operand:VNx4SF_ONLY 2 "register_operand" "w, w, w")]
9418 SVE_COND_FCVT)
9419 (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9420 UNSPEC_SEL))]
9421 "TARGET_SVE_BF16"
9422 "@
9423 bfcvt\t%0.h, %1/m, %2.s
9424 movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s
9425 movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s"
9426 [(set_attr "movprfx" "*,yes,yes")]
9427)
9428
9429;; Predicated BFCVTNT. This doesn't give a natural aarch64_pred_*/cond_*
9430;; pair because the even elements always have to be supplied for active
9431;; elements, even if the inactive elements don't matter.
9432;;
9433;; This instructions does not take MOVPRFX.
9434(define_insn "@aarch64_sve_cvtnt<mode>"
9435 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
9436 (unspec:VNx8BF_ONLY
9437 [(match_operand:VNx4BI 2 "register_operand" "Upl")
9438 (const_int SVE_STRICT_GP)
9439 (match_operand:VNx8BF_ONLY 1 "register_operand" "0")
9440 (match_operand:VNx4SF 3 "register_operand" "w")]
9441 UNSPEC_COND_FCVTNT))]
9442 "TARGET_SVE_BF16"
9443 "bfcvtnt\t%0.h, %2/m, %3.s"
9444)
9445
915d28fe
RS
9446;; -------------------------------------------------------------------------
9447;; ---- [FP<-FP] Unpacks
9448;; -------------------------------------------------------------------------
9449;; Includes:
9450;; - FCVT
9451;; -------------------------------------------------------------------------
9452
9453;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
9454;; First unpack the source without conversion, then float-convert the
9455;; unpacked source.
9456(define_expand "vec_unpacks_<perm_hilo>_<mode>"
9457 [(match_operand:<VWIDE> 0 "register_operand")
f75cdd2c
RS
9458 (unspec:SVE_FULL_HSF
9459 [(match_operand:SVE_FULL_HSF 1 "register_operand")]
9460 UNPACK_UNSIGNED)]
a9fad8fe
AM
9461 "TARGET_SVE"
9462 {
915d28fe
RS
9463 /* Use ZIP to do the unpack, since we don't care about the upper halves
9464 and since it has the nice property of not needing any subregs.
9465 If using UUNPK* turns out to be preferable, we could model it as
9466 a ZIP whose first operand is zero. */
9467 rtx temp = gen_reg_rtx (<MODE>mode);
9468 emit_insn ((<hi_lanes_optab>
9469 ? gen_aarch64_sve_zip2<mode>
9470 : gen_aarch64_sve_zip1<mode>)
9471 (temp, operands[1], operands[1]));
9472 rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
99361551 9473 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
95eb5537 9474 emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide>
99361551 9475 (operands[0], ptrue, temp, strictness));
a9fad8fe
AM
9476 DONE;
9477 }
9478)
9479
95eb5537 9480;; Predicated float-to-float extension.
f75cdd2c 9481(define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
a4d9837e 9482 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w")
f75cdd2c 9483 (unspec:SVE_FULL_SDF
a4d9837e 9484 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl")
99361551 9485 (match_operand:SI 3 "aarch64_sve_gp_strictness")
a4d9837e 9486 (match_operand:SVE_FULL_HSF 2 "register_operand" "0, w")]
95eb5537 9487 SVE_COND_FCVT))]
f75cdd2c 9488 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
a4d9837e
RS
9489 "@
9490 fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9491 movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
9492 [(set_attr "movprfx" "*,yes")]
a9fad8fe
AM
9493)
9494
624d0f07 9495;; Predicated float-to-float extension with merging.
f75cdd2c
RS
9496(define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9497 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
9498 (unspec:SVE_FULL_SDF
9499 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9500 (unspec:SVE_FULL_SDF
624d0f07
RS
9501 [(match_dup 1)
9502 (const_int SVE_STRICT_GP)
f75cdd2c 9503 (match_operand:SVE_FULL_HSF 2 "register_operand")]
624d0f07 9504 SVE_COND_FCVT)
f75cdd2c 9505 (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
624d0f07 9506 UNSPEC_SEL))]
f75cdd2c 9507 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
624d0f07
RS
9508)
9509
f75cdd2c
RS
9510(define_insn "*cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9511 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w, ?&w")
9512 (unspec:SVE_FULL_SDF
9513 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
9514 (unspec:SVE_FULL_SDF
624d0f07
RS
9515 [(match_dup 1)
9516 (match_operand:SI 4 "aarch64_sve_gp_strictness")
f75cdd2c 9517 (match_operand:SVE_FULL_HSF 2 "register_operand" "w, w, w")]
624d0f07 9518 SVE_COND_FCVT)
f75cdd2c 9519 (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
624d0f07 9520 UNSPEC_SEL))]
f75cdd2c 9521 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
624d0f07 9522 "@
f75cdd2c
RS
9523 fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9524 movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9525 movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
624d0f07
RS
9526 [(set_attr "movprfx" "*,yes,yes")]
9527)
9528
915d28fe
RS
9529;; -------------------------------------------------------------------------
9530;; ---- [PRED<-PRED] Packs
9531;; -------------------------------------------------------------------------
9532;; Includes:
9533;; - UZP1
9534;; -------------------------------------------------------------------------
a9fad8fe 9535
915d28fe
RS
9536;; Predicate pack. Use UZP1 on the narrower type, which discards
9537;; the high part of each wide element.
9538(define_insn "vec_pack_trunc_<Vwide>"
9539 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
9540 (unspec:PRED_BHS
9541 [(match_operand:<VWIDE> 1 "register_operand" "Upa")
9542 (match_operand:<VWIDE> 2 "register_operand" "Upa")]
9543 UNSPEC_PACK))]
a9fad8fe 9544 "TARGET_SVE"
915d28fe 9545 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
a9fad8fe 9546)
3a0afad0 9547
915d28fe
RS
9548;; -------------------------------------------------------------------------
9549;; ---- [PRED<-PRED] Unpacks
9550;; -------------------------------------------------------------------------
9551;; Includes:
9552;; - PUNPKHI
9553;; - PUNPKLO
9554;; -------------------------------------------------------------------------
9555
9556;; Unpack the low or high half of a predicate, where "high" refers to
9557;; the low-numbered lanes for big-endian and the high-numbered lanes
9558;; for little-endian.
9559(define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
9560 [(match_operand:<VWIDE> 0 "register_operand")
9561 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
9562 UNPACK)]
3a0afad0
PK
9563 "TARGET_SVE"
9564 {
915d28fe
RS
9565 emit_insn ((<hi_lanes_optab>
9566 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
9567 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
9568 (operands[0], operands[1]));
3a0afad0
PK
9569 DONE;
9570 }
9571)
915d28fe 9572
624d0f07 9573(define_insn "@aarch64_sve_punpk<perm_hilo>_<mode>"
915d28fe
RS
9574 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
9575 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
9576 UNPACK_UNSIGNED))]
9577 "TARGET_SVE"
9578 "punpk<perm_hilo>\t%0.h, %1.b"
9579)
624d0f07
RS
9580
9581;; =========================================================================
9582;; == Vector partitioning
9583;; =========================================================================
9584
9585;; -------------------------------------------------------------------------
9586;; ---- [PRED] Unary partitioning
9587;; -------------------------------------------------------------------------
9588;; Includes:
9589;; - BRKA
9590;; - BRKAS
9591;; - BRKB
9592;; - BRKBS
9593;; -------------------------------------------------------------------------
9594
9595;; Note that unlike most other instructions that have both merging and
9596;; zeroing forms, these instructions don't operate elementwise and so
9597;; don't fit the IFN_COND model.
9598(define_insn "@aarch64_brk<brk_op>"
9599 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
9600 (unspec:VNx16BI
9601 [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
9602 (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
9603 (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
9604 SVE_BRK_UNARY))]
9605 "TARGET_SVE"
9606 "@
9607 brk<brk_op>\t%0.b, %1/z, %2.b
9608 brk<brk_op>\t%0.b, %1/m, %2.b"
9609)
9610
9611;; Same, but also producing a flags result.
9612(define_insn "*aarch64_brk<brk_op>_cc"
9613 [(set (reg:CC_NZC CC_REGNUM)
9614 (unspec:CC_NZC
9615 [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
9616 (match_dup 1)
9617 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
9618 (unspec:VNx16BI
9619 [(match_dup 1)
9620 (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
9621 (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
9622 SVE_BRK_UNARY)]
9623 UNSPEC_PTEST))
9624 (set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
9625 (unspec:VNx16BI
9626 [(match_dup 1)
9627 (match_dup 2)
9628 (match_dup 3)]
9629 SVE_BRK_UNARY))]
9630 "TARGET_SVE"
9631 "@
9632 brk<brk_op>s\t%0.b, %1/z, %2.b
9633 brk<brk_op>s\t%0.b, %1/m, %2.b"
9634)
9635
9636;; Same, but with only the flags result being interesting.
9637(define_insn "*aarch64_brk<brk_op>_ptest"
9638 [(set (reg:CC_NZC CC_REGNUM)
9639 (unspec:CC_NZC
9640 [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
9641 (match_dup 1)
9642 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
9643 (unspec:VNx16BI
9644 [(match_dup 1)
9645 (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
9646 (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
9647 SVE_BRK_UNARY)]
9648 UNSPEC_PTEST))
9649 (clobber (match_scratch:VNx16BI 0 "=Upa, Upa"))]
9650 "TARGET_SVE"
9651 "@
9652 brk<brk_op>s\t%0.b, %1/z, %2.b
9653 brk<brk_op>s\t%0.b, %1/m, %2.b"
9654)
9655
9656;; -------------------------------------------------------------------------
9657;; ---- [PRED] Binary partitioning
9658;; -------------------------------------------------------------------------
9659;; Includes:
9660;; - BRKN
9661;; - BRKNS
9662;; - BRKPA
9663;; - BRKPAS
9664;; - BRKPB
9665;; - BRKPBS
9666;; -------------------------------------------------------------------------
9667
9668;; Binary BRKs (BRKN, BRKPA, BRKPB).
9669(define_insn "@aarch64_brk<brk_op>"
9670 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
9671 (unspec:VNx16BI
9672 [(match_operand:VNx16BI 1 "register_operand" "Upa")
9673 (match_operand:VNx16BI 2 "register_operand" "Upa")
9674 (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
9675 SVE_BRK_BINARY))]
9676 "TARGET_SVE"
9677 "brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
9678)
9679
9680;; Same, but also producing a flags result.
9681(define_insn "*aarch64_brk<brk_op>_cc"
9682 [(set (reg:CC_NZC CC_REGNUM)
9683 (unspec:CC_NZC
9684 [(match_operand:VNx16BI 1 "register_operand" "Upa")
9685 (match_dup 1)
9686 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
9687 (unspec:VNx16BI
9688 [(match_dup 1)
9689 (match_operand:VNx16BI 2 "register_operand" "Upa")
9690 (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
9691 SVE_BRK_BINARY)]
9692 UNSPEC_PTEST))
9693 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
9694 (unspec:VNx16BI
9695 [(match_dup 1)
9696 (match_dup 2)
9697 (match_dup 3)]
9698 SVE_BRK_BINARY))]
9699 "TARGET_SVE"
9700 "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
9701)
9702
9703;; Same, but with only the flags result being interesting.
9704(define_insn "*aarch64_brk<brk_op>_ptest"
9705 [(set (reg:CC_NZC CC_REGNUM)
9706 (unspec:CC_NZC
9707 [(match_operand:VNx16BI 1 "register_operand" "Upa")
9708 (match_dup 1)
9709 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
9710 (unspec:VNx16BI
9711 [(match_dup 1)
9712 (match_operand:VNx16BI 2 "register_operand" "Upa")
9713 (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
9714 SVE_BRK_BINARY)]
9715 UNSPEC_PTEST))
9716 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
9717 "TARGET_SVE"
9718 "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
9719)
9720
9721;; -------------------------------------------------------------------------
9722;; ---- [PRED] Scalarization
9723;; -------------------------------------------------------------------------
9724;; Includes:
9725;; - PFIRST
9726;; - PNEXT
9727;; -------------------------------------------------------------------------
9728
9729(define_insn "@aarch64_sve_<sve_pred_op><mode>"
9730 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
9731 (unspec:PRED_ALL
9732 [(match_operand:PRED_ALL 1 "register_operand" "Upa")
9733 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
9734 (match_operand:PRED_ALL 3 "register_operand" "0")]
9735 SVE_PITER))
9736 (clobber (reg:CC_NZC CC_REGNUM))]
9737 "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
9738 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
9739)
9740
9741;; Same, but also producing a flags result.
9742(define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc"
9743 [(set (reg:CC_NZC CC_REGNUM)
9744 (unspec:CC_NZC
9745 [(match_operand:VNx16BI 1 "register_operand" "Upa")
9746 (match_operand 2)
9747 (match_operand:SI 3 "aarch64_sve_ptrue_flag")
9748 (unspec:PRED_ALL
9749 [(match_operand 4)
9750 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
9751 (match_operand:PRED_ALL 6 "register_operand" "0")]
9752 SVE_PITER)]
9753 UNSPEC_PTEST))
9754 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
9755 (unspec:PRED_ALL
9756 [(match_dup 4)
9757 (match_dup 5)
9758 (match_dup 6)]
9759 SVE_PITER))]
9760 "TARGET_SVE
9761 && <max_elem_bits> >= <elem_bits>
9762 && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
9763 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
9764 "&& !rtx_equal_p (operands[2], operands[4])"
9765 {
9766 operands[4] = operands[2];
9767 operands[5] = operands[3];
9768 }
9769)
9770
9771;; Same, but with only the flags result being interesting.
9772(define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest"
9773 [(set (reg:CC_NZC CC_REGNUM)
9774 (unspec:CC_NZC
9775 [(match_operand:VNx16BI 1 "register_operand" "Upa")
9776 (match_operand 2)
9777 (match_operand:SI 3 "aarch64_sve_ptrue_flag")
9778 (unspec:PRED_ALL
9779 [(match_operand 4)
9780 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
9781 (match_operand:PRED_ALL 6 "register_operand" "0")]
9782 SVE_PITER)]
9783 UNSPEC_PTEST))
9784 (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
9785 "TARGET_SVE
9786 && <max_elem_bits> >= <elem_bits>
9787 && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
9788 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
9789 "&& !rtx_equal_p (operands[2], operands[4])"
9790 {
9791 operands[4] = operands[2];
9792 operands[5] = operands[3];
9793 }
9794)
9795
9796;; =========================================================================
9797;; == Counting elements
9798;; =========================================================================
9799
9800;; -------------------------------------------------------------------------
9801;; ---- [INT] Count elements in a pattern (scalar)
9802;; -------------------------------------------------------------------------
9803;; Includes:
9804;; - CNTB
9805;; - CNTD
9806;; - CNTH
9807;; - CNTW
9808;; -------------------------------------------------------------------------
9809
9810;; Count the number of elements in an svpattern. Operand 1 is the pattern,
9811;; operand 2 is the number of elements that fit in a 128-bit block, and
9812;; operand 3 is a multiplier in the range [1, 16].
9813;;
9814;; Note that this pattern isn't used for SV_ALL (but would work for that too).
9815(define_insn "aarch64_sve_cnt_pat"
9816 [(set (match_operand:DI 0 "register_operand" "=r")
9817 (zero_extend:DI
9818 (unspec:SI [(match_operand:DI 1 "const_int_operand")
9819 (match_operand:DI 2 "const_int_operand")
9820 (match_operand:DI 3 "const_int_operand")]
9821 UNSPEC_SVE_CNT_PAT)))]
9822 "TARGET_SVE"
9823 {
9824 return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1);
9825 }
9826)
9827
9828;; -------------------------------------------------------------------------
9829;; ---- [INT] Increment by the number of elements in a pattern (scalar)
9830;; -------------------------------------------------------------------------
9831;; Includes:
9832;; - INC
9833;; - SQINC
9834;; - UQINC
9835;; -------------------------------------------------------------------------
9836
9837;; Increment a DImode register by the number of elements in an svpattern.
9838;; See aarch64_sve_cnt_pat for the counting behavior.
9839(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9840 [(set (match_operand:DI 0 "register_operand" "=r")
9841 (ANY_PLUS:DI (zero_extend:DI
9842 (unspec:SI [(match_operand:DI 2 "const_int_operand")
9843 (match_operand:DI 3 "const_int_operand")
9844 (match_operand:DI 4 "const_int_operand")]
9845 UNSPEC_SVE_CNT_PAT))
9846 (match_operand:DI_ONLY 1 "register_operand" "0")))]
9847 "TARGET_SVE"
9848 {
9849 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
9850 operands + 2);
9851 }
9852)
9853
9854;; Increment an SImode register by the number of elements in an svpattern
9855;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting
9856;; behavior.
9857(define_insn "*aarch64_sve_incsi_pat"
9858 [(set (match_operand:SI 0 "register_operand" "=r")
9859 (plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand")
9860 (match_operand:DI 3 "const_int_operand")
9861 (match_operand:DI 4 "const_int_operand")]
9862 UNSPEC_SVE_CNT_PAT)
9863 (match_operand:SI 1 "register_operand" "0")))]
9864 "TARGET_SVE"
9865 {
9866 return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2);
9867 }
9868)
9869
9870;; Increment an SImode register by the number of elements in an svpattern
9871;; using saturating arithmetic, extending the result to 64 bits.
9872;;
9873;; See aarch64_sve_cnt_pat for the counting behavior.
9874(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9875 [(set (match_operand:DI 0 "register_operand" "=r")
9876 (<paired_extend>:DI
9877 (SAT_PLUS:SI
9878 (unspec:SI [(match_operand:DI 2 "const_int_operand")
9879 (match_operand:DI 3 "const_int_operand")
9880 (match_operand:DI 4 "const_int_operand")]
9881 UNSPEC_SVE_CNT_PAT)
9882 (match_operand:SI_ONLY 1 "register_operand" "0"))))]
9883 "TARGET_SVE"
9884 {
9885 const char *registers = (<CODE> == SS_PLUS ? "%x0, %w0" : "%w0");
9886 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
9887 operands + 2);
9888 }
9889)
9890
9891;; -------------------------------------------------------------------------
9892;; ---- [INT] Increment by the number of elements in a pattern (vector)
9893;; -------------------------------------------------------------------------
9894;; Includes:
9895;; - INC
9896;; - SQINC
9897;; - UQINC
9898;; -------------------------------------------------------------------------
9899
9900;; Increment a vector of DIs by the number of elements in an svpattern.
9901;; See aarch64_sve_cnt_pat for the counting behavior.
9902(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9903 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
9904 (ANY_PLUS:VNx2DI
9905 (vec_duplicate:VNx2DI
9906 (zero_extend:DI
9907 (unspec:SI [(match_operand:DI 2 "const_int_operand")
9908 (match_operand:DI 3 "const_int_operand")
9909 (match_operand:DI 4 "const_int_operand")]
9910 UNSPEC_SVE_CNT_PAT)))
9911 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
9912 "TARGET_SVE"
9913 {
9914 if (which_alternative == 1)
9915 output_asm_insn ("movprfx\t%0, %1", operands);
9916 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
9917 operands + 2);
9918 }
9919 [(set_attr "movprfx" "*,yes")]
9920)
9921
9922;; Increment a vector of SIs by the number of elements in an svpattern.
9923;; See aarch64_sve_cnt_pat for the counting behavior.
9924(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9925 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
9926 (ANY_PLUS:VNx4SI
9927 (vec_duplicate:VNx4SI
9928 (unspec:SI [(match_operand:DI 2 "const_int_operand")
9929 (match_operand:DI 3 "const_int_operand")
9930 (match_operand:DI 4 "const_int_operand")]
9931 UNSPEC_SVE_CNT_PAT))
9932 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
9933 "TARGET_SVE"
9934 {
9935 if (which_alternative == 1)
9936 output_asm_insn ("movprfx\t%0, %1", operands);
9937 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
9938 operands + 2);
9939 }
9940 [(set_attr "movprfx" "*,yes")]
9941)
9942
9943;; Increment a vector of HIs by the number of elements in an svpattern.
9944;; See aarch64_sve_cnt_pat for the counting behavior.
9945(define_expand "@aarch64_sve_<inc_dec><mode>_pat"
9946 [(set (match_operand:VNx8HI 0 "register_operand")
9947 (ANY_PLUS:VNx8HI
9948 (vec_duplicate:VNx8HI
9949 (truncate:HI
9950 (unspec:SI [(match_operand:DI 2 "const_int_operand")
9951 (match_operand:DI 3 "const_int_operand")
9952 (match_operand:DI 4 "const_int_operand")]
9953 UNSPEC_SVE_CNT_PAT)))
9954 (match_operand:VNx8HI_ONLY 1 "register_operand")))]
9955 "TARGET_SVE"
9956)
9957
9958(define_insn "*aarch64_sve_<inc_dec><mode>_pat"
9959 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
9960 (ANY_PLUS:VNx8HI
9961 (vec_duplicate:VNx8HI
9962 (match_operator:HI 5 "subreg_lowpart_operator"
9963 [(unspec:SI [(match_operand:DI 2 "const_int_operand")
9964 (match_operand:DI 3 "const_int_operand")
9965 (match_operand:DI 4 "const_int_operand")]
9966 UNSPEC_SVE_CNT_PAT)]))
9967 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
9968 "TARGET_SVE"
9969 {
9970 if (which_alternative == 1)
9971 output_asm_insn ("movprfx\t%0, %1", operands);
9972 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
9973 operands + 2);
9974 }
9975 [(set_attr "movprfx" "*,yes")]
9976)
9977
9978;; -------------------------------------------------------------------------
9979;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
9980;; -------------------------------------------------------------------------
9981;; Includes:
9982;; - DEC
9983;; - SQDEC
9984;; - UQDEC
9985;; -------------------------------------------------------------------------
9986
9987;; Decrement a DImode register by the number of elements in an svpattern.
9988;; See aarch64_sve_cnt_pat for the counting behavior.
9989(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9990 [(set (match_operand:DI 0 "register_operand" "=r")
9991 (ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0")
9992 (zero_extend:DI
9993 (unspec:SI [(match_operand:DI 2 "const_int_operand")
9994 (match_operand:DI 3 "const_int_operand")
9995 (match_operand:DI 4 "const_int_operand")]
9996 UNSPEC_SVE_CNT_PAT))))]
9997 "TARGET_SVE"
9998 {
9999 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
10000 operands + 2);
10001 }
10002)
10003
10004;; Decrement an SImode register by the number of elements in an svpattern
10005;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting
10006;; behavior.
10007(define_insn "*aarch64_sve_decsi_pat"
10008 [(set (match_operand:SI 0 "register_operand" "=r")
10009 (minus:SI (match_operand:SI 1 "register_operand" "0")
10010 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10011 (match_operand:DI 3 "const_int_operand")
10012 (match_operand:DI 4 "const_int_operand")]
10013 UNSPEC_SVE_CNT_PAT)))]
10014 "TARGET_SVE"
10015 {
10016 return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2);
10017 }
10018)
10019
10020;; Decrement an SImode register by the number of elements in an svpattern
10021;; using saturating arithmetic, extending the result to 64 bits.
10022;;
10023;; See aarch64_sve_cnt_pat for the counting behavior.
10024(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10025 [(set (match_operand:DI 0 "register_operand" "=r")
10026 (<paired_extend>:DI
10027 (SAT_MINUS:SI
10028 (match_operand:SI_ONLY 1 "register_operand" "0")
10029 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10030 (match_operand:DI 3 "const_int_operand")
10031 (match_operand:DI 4 "const_int_operand")]
10032 UNSPEC_SVE_CNT_PAT))))]
10033 "TARGET_SVE"
10034 {
10035 const char *registers = (<CODE> == SS_MINUS ? "%x0, %w0" : "%w0");
10036 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
10037 operands + 2);
10038 }
10039)
10040
10041;; -------------------------------------------------------------------------
10042;; ---- [INT] Decrement by the number of elements in a pattern (vector)
10043;; -------------------------------------------------------------------------
10044;; Includes:
10045;; - DEC
10046;; - SQDEC
10047;; - UQDEC
10048;; -------------------------------------------------------------------------
10049
10050;; Decrement a vector of DIs by the number of elements in an svpattern.
10051;; See aarch64_sve_cnt_pat for the counting behavior.
10052(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10053 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
10054 (ANY_MINUS:VNx2DI
10055 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
10056 (vec_duplicate:VNx2DI
10057 (zero_extend:DI
10058 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10059 (match_operand:DI 3 "const_int_operand")
10060 (match_operand:DI 4 "const_int_operand")]
10061 UNSPEC_SVE_CNT_PAT)))))]
10062 "TARGET_SVE"
10063 {
10064 if (which_alternative == 1)
10065 output_asm_insn ("movprfx\t%0, %1", operands);
10066 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10067 operands + 2);
10068 }
10069 [(set_attr "movprfx" "*,yes")]
10070)
10071
10072;; Decrement a vector of SIs by the number of elements in an svpattern.
10073;; See aarch64_sve_cnt_pat for the counting behavior.
10074(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10075 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
10076 (ANY_MINUS:VNx4SI
10077 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
10078 (vec_duplicate:VNx4SI
10079 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10080 (match_operand:DI 3 "const_int_operand")
10081 (match_operand:DI 4 "const_int_operand")]
10082 UNSPEC_SVE_CNT_PAT))))]
10083 "TARGET_SVE"
10084 {
10085 if (which_alternative == 1)
10086 output_asm_insn ("movprfx\t%0, %1", operands);
10087 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10088 operands + 2);
10089 }
10090 [(set_attr "movprfx" "*,yes")]
10091)
10092
10093;; Decrement a vector of HIs by the number of elements in an svpattern.
10094;; See aarch64_sve_cnt_pat for the counting behavior.
10095(define_expand "@aarch64_sve_<inc_dec><mode>_pat"
10096 [(set (match_operand:VNx8HI 0 "register_operand")
10097 (ANY_MINUS:VNx8HI
10098 (match_operand:VNx8HI_ONLY 1 "register_operand")
10099 (vec_duplicate:VNx8HI
10100 (truncate:HI
10101 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10102 (match_operand:DI 3 "const_int_operand")
10103 (match_operand:DI 4 "const_int_operand")]
10104 UNSPEC_SVE_CNT_PAT)))))]
10105 "TARGET_SVE"
10106)
10107
10108(define_insn "*aarch64_sve_<inc_dec><mode>_pat"
10109 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
10110 (ANY_MINUS:VNx8HI
10111 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
10112 (vec_duplicate:VNx8HI
10113 (match_operator:HI 5 "subreg_lowpart_operator"
10114 [(unspec:SI [(match_operand:DI 2 "const_int_operand")
10115 (match_operand:DI 3 "const_int_operand")
10116 (match_operand:DI 4 "const_int_operand")]
10117 UNSPEC_SVE_CNT_PAT)]))))]
10118 "TARGET_SVE"
10119 {
10120 if (which_alternative == 1)
10121 output_asm_insn ("movprfx\t%0, %1", operands);
10122 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10123 operands + 2);
10124 }
10125 [(set_attr "movprfx" "*,yes")]
10126)
10127
10128;; -------------------------------------------------------------------------
10129;; ---- [INT] Count elements in a predicate (scalar)
10130;; -------------------------------------------------------------------------
10131;; Includes:
10132;; - CNTP
10133;; -------------------------------------------------------------------------
10134
10135;; Count the number of set bits in a predicate. Operand 3 is true if
10136;; operand 1 is known to be all-true.
10137(define_insn "@aarch64_pred_cntp<mode>"
10138 [(set (match_operand:DI 0 "register_operand" "=r")
10139 (zero_extend:DI
10140 (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
10141 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
10142 (match_operand:PRED_ALL 3 "register_operand" "Upa")]
10143 UNSPEC_CNTP)))]
10144 "TARGET_SVE"
10145 "cntp\t%x0, %1, %3.<Vetype>")
10146
10147;; -------------------------------------------------------------------------
10148;; ---- [INT] Increment by the number of elements in a predicate (scalar)
10149;; -------------------------------------------------------------------------
10150;; Includes:
10151;; - INCP
10152;; - SQINCP
10153;; - UQINCP
10154;; -------------------------------------------------------------------------
10155
10156;; Increment a DImode register by the number of set bits in a predicate.
10157;; See aarch64_sve_cntp for a description of the operands.
10158(define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10159 [(set (match_operand:DI 0 "register_operand")
10160 (ANY_PLUS:DI
10161 (zero_extend:DI
10162 (unspec:SI [(match_dup 3)
10163 (const_int SVE_KNOWN_PTRUE)
10164 (match_operand:PRED_ALL 2 "register_operand")]
10165 UNSPEC_CNTP))
10166 (match_operand:DI_ONLY 1 "register_operand")))]
10167 "TARGET_SVE"
10168 {
10169 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10170 }
10171)
10172
10173(define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10174 [(set (match_operand:DI 0 "register_operand" "=r")
10175 (ANY_PLUS:DI
10176 (zero_extend:DI
10177 (unspec:SI [(match_operand 3)
10178 (const_int SVE_KNOWN_PTRUE)
10179 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10180 UNSPEC_CNTP))
10181 (match_operand:DI_ONLY 1 "register_operand" "0")))]
10182 "TARGET_SVE"
10183 "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
10184 "&& !CONSTANT_P (operands[3])"
10185 {
10186 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10187 }
10188)
10189
10190;; Increment an SImode register by the number of set bits in a predicate
10191;; using modular arithmetic. See aarch64_sve_cntp for a description of
10192;; the operands.
10193(define_insn_and_rewrite "*aarch64_incsi<mode>_cntp"
10194 [(set (match_operand:SI 0 "register_operand" "=r")
10195 (plus:SI
10196 (unspec:SI [(match_operand 3)
10197 (const_int SVE_KNOWN_PTRUE)
10198 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10199 UNSPEC_CNTP)
10200 (match_operand:SI 1 "register_operand" "0")))]
10201 "TARGET_SVE"
10202 "incp\t%x0, %2.<Vetype>"
10203 "&& !CONSTANT_P (operands[3])"
10204 {
10205 operands[3] = CONSTM1_RTX (<MODE>mode);
10206 }
10207)
10208
10209;; Increment an SImode register by the number of set bits in a predicate
10210;; using saturating arithmetic, extending the result to 64 bits.
10211;;
10212;; See aarch64_sve_cntp for a description of the operands.
10213(define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10214 [(set (match_operand:DI 0 "register_operand")
10215 (<paired_extend>:DI
10216 (SAT_PLUS:SI
10217 (unspec:SI [(match_dup 3)
10218 (const_int SVE_KNOWN_PTRUE)
10219 (match_operand:PRED_ALL 2 "register_operand")]
10220 UNSPEC_CNTP)
10221 (match_operand:SI_ONLY 1 "register_operand"))))]
10222 "TARGET_SVE"
10223 {
10224 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10225 }
10226)
10227
10228(define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10229 [(set (match_operand:DI 0 "register_operand" "=r")
10230 (<paired_extend>:DI
10231 (SAT_PLUS:SI
10232 (unspec:SI [(match_operand 3)
10233 (const_int SVE_KNOWN_PTRUE)
10234 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10235 UNSPEC_CNTP)
10236 (match_operand:SI_ONLY 1 "register_operand" "0"))))]
10237 "TARGET_SVE"
10238 {
10239 if (<CODE> == SS_PLUS)
10240 return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
10241 else
10242 return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
10243 }
10244 "&& !CONSTANT_P (operands[3])"
10245 {
10246 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10247 }
10248)
10249
10250;; -------------------------------------------------------------------------
10251;; ---- [INT] Increment by the number of elements in a predicate (vector)
10252;; -------------------------------------------------------------------------
10253;; Includes:
10254;; - INCP
10255;; - SQINCP
10256;; - UQINCP
10257;; -------------------------------------------------------------------------
10258
10259;; Increment a vector of DIs by the number of set bits in a predicate.
10260;; See aarch64_sve_cntp for a description of the operands.
10261(define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10262 [(set (match_operand:VNx2DI 0 "register_operand")
10263 (ANY_PLUS:VNx2DI
10264 (vec_duplicate:VNx2DI
10265 (zero_extend:DI
10266 (unspec:SI
10267 [(match_dup 3)
10268 (const_int SVE_KNOWN_PTRUE)
10269 (match_operand:<VPRED> 2 "register_operand")]
10270 UNSPEC_CNTP)))
10271 (match_operand:VNx2DI_ONLY 1 "register_operand")))]
10272 "TARGET_SVE"
10273 {
10274 operands[3] = CONSTM1_RTX (<VPRED>mode);
10275 }
10276)
10277
10278(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10279 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
10280 (ANY_PLUS:VNx2DI
10281 (vec_duplicate:VNx2DI
10282 (zero_extend:DI
10283 (unspec:SI
10284 [(match_operand 3)
10285 (const_int SVE_KNOWN_PTRUE)
10286 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10287 UNSPEC_CNTP)))
10288 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
10289 "TARGET_SVE"
10290 "@
10291 <inc_dec>p\t%0.d, %2
10292 movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
10293 "&& !CONSTANT_P (operands[3])"
10294 {
10295 operands[3] = CONSTM1_RTX (<VPRED>mode);
10296 }
10297 [(set_attr "movprfx" "*,yes")]
10298)
10299
10300;; Increment a vector of SIs by the number of set bits in a predicate.
10301;; See aarch64_sve_cntp for a description of the operands.
10302(define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10303 [(set (match_operand:VNx4SI 0 "register_operand")
10304 (ANY_PLUS:VNx4SI
10305 (vec_duplicate:VNx4SI
10306 (unspec:SI
10307 [(match_dup 3)
10308 (const_int SVE_KNOWN_PTRUE)
10309 (match_operand:<VPRED> 2 "register_operand")]
10310 UNSPEC_CNTP))
10311 (match_operand:VNx4SI_ONLY 1 "register_operand")))]
10312 "TARGET_SVE"
10313 {
10314 operands[3] = CONSTM1_RTX (<VPRED>mode);
10315 }
10316)
10317
10318(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10319 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
10320 (ANY_PLUS:VNx4SI
10321 (vec_duplicate:VNx4SI
10322 (unspec:SI
10323 [(match_operand 3)
10324 (const_int SVE_KNOWN_PTRUE)
10325 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10326 UNSPEC_CNTP))
10327 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
10328 "TARGET_SVE"
10329 "@
10330 <inc_dec>p\t%0.s, %2
10331 movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
10332 "&& !CONSTANT_P (operands[3])"
10333 {
10334 operands[3] = CONSTM1_RTX (<VPRED>mode);
10335 }
10336 [(set_attr "movprfx" "*,yes")]
10337)
10338
10339;; Increment a vector of HIs by the number of set bits in a predicate.
10340;; See aarch64_sve_cntp for a description of the operands.
10341(define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10342 [(set (match_operand:VNx8HI 0 "register_operand")
10343 (ANY_PLUS:VNx8HI
10344 (vec_duplicate:VNx8HI
10345 (truncate:HI
10346 (unspec:SI
10347 [(match_dup 3)
10348 (const_int SVE_KNOWN_PTRUE)
10349 (match_operand:<VPRED> 2 "register_operand")]
10350 UNSPEC_CNTP)))
10351 (match_operand:VNx8HI_ONLY 1 "register_operand")))]
10352 "TARGET_SVE"
10353 {
10354 operands[3] = CONSTM1_RTX (<VPRED>mode);
10355 }
10356)
10357
10358(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10359 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
10360 (ANY_PLUS:VNx8HI
10361 (vec_duplicate:VNx8HI
10362 (match_operator:HI 3 "subreg_lowpart_operator"
10363 [(unspec:SI
10364 [(match_operand 4)
10365 (const_int SVE_KNOWN_PTRUE)
10366 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10367 UNSPEC_CNTP)]))
10368 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
10369 "TARGET_SVE"
10370 "@
10371 <inc_dec>p\t%0.h, %2
10372 movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
10373 "&& !CONSTANT_P (operands[4])"
10374 {
10375 operands[4] = CONSTM1_RTX (<VPRED>mode);
10376 }
10377 [(set_attr "movprfx" "*,yes")]
10378)
10379
10380;; -------------------------------------------------------------------------
10381;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
10382;; -------------------------------------------------------------------------
10383;; Includes:
10384;; - DECP
10385;; - SQDECP
10386;; - UQDECP
10387;; -------------------------------------------------------------------------
10388
10389;; Decrement a DImode register by the number of set bits in a predicate.
10390;; See aarch64_sve_cntp for a description of the operands.
10391(define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10392 [(set (match_operand:DI 0 "register_operand")
10393 (ANY_MINUS:DI
10394 (match_operand:DI_ONLY 1 "register_operand")
10395 (zero_extend:DI
10396 (unspec:SI [(match_dup 3)
10397 (const_int SVE_KNOWN_PTRUE)
10398 (match_operand:PRED_ALL 2 "register_operand")]
10399 UNSPEC_CNTP))))]
10400 "TARGET_SVE"
10401 {
10402 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10403 }
10404)
10405
10406(define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10407 [(set (match_operand:DI 0 "register_operand" "=r")
10408 (ANY_MINUS:DI
10409 (match_operand:DI_ONLY 1 "register_operand" "0")
10410 (zero_extend:DI
10411 (unspec:SI [(match_operand 3)
10412 (const_int SVE_KNOWN_PTRUE)
10413 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10414 UNSPEC_CNTP))))]
10415 "TARGET_SVE"
10416 "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
10417 "&& !CONSTANT_P (operands[3])"
10418 {
10419 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10420 }
10421)
10422
10423;; Decrement an SImode register by the number of set bits in a predicate
10424;; using modular arithmetic. See aarch64_sve_cntp for a description of the
10425;; operands.
10426(define_insn_and_rewrite "*aarch64_decsi<mode>_cntp"
10427 [(set (match_operand:SI 0 "register_operand" "=r")
10428 (minus:SI
10429 (match_operand:SI 1 "register_operand" "0")
10430 (unspec:SI [(match_operand 3)
10431 (const_int SVE_KNOWN_PTRUE)
10432 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10433 UNSPEC_CNTP)))]
10434 "TARGET_SVE"
10435 "decp\t%x0, %2.<Vetype>"
10436 "&& !CONSTANT_P (operands[3])"
10437 {
10438 operands[3] = CONSTM1_RTX (<MODE>mode);
10439 }
10440)
10441
10442;; Decrement an SImode register by the number of set bits in a predicate
10443;; using saturating arithmetic, extending the result to 64 bits.
10444;;
10445;; See aarch64_sve_cntp for a description of the operands.
10446(define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10447 [(set (match_operand:DI 0 "register_operand")
10448 (<paired_extend>:DI
10449 (SAT_MINUS:SI
10450 (match_operand:SI_ONLY 1 "register_operand")
10451 (unspec:SI [(match_dup 3)
10452 (const_int SVE_KNOWN_PTRUE)
10453 (match_operand:PRED_ALL 2 "register_operand")]
10454 UNSPEC_CNTP))))]
10455 "TARGET_SVE"
10456 {
10457 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10458 }
10459)
10460
10461(define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10462 [(set (match_operand:DI 0 "register_operand" "=r")
10463 (<paired_extend>:DI
10464 (SAT_MINUS:SI
10465 (match_operand:SI_ONLY 1 "register_operand" "0")
10466 (unspec:SI [(match_operand 3)
10467 (const_int SVE_KNOWN_PTRUE)
10468 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10469 UNSPEC_CNTP))))]
10470 "TARGET_SVE"
10471 {
10472 if (<CODE> == SS_MINUS)
10473 return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
10474 else
10475 return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
10476 }
10477 "&& !CONSTANT_P (operands[3])"
10478 {
10479 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10480 }
10481)
10482
10483;; -------------------------------------------------------------------------
10484;; ---- [INT] Decrement by the number of elements in a predicate (vector)
10485;; -------------------------------------------------------------------------
10486;; Includes:
10487;; - DECP
10488;; - SQDECP
10489;; - UQDECP
10490;; -------------------------------------------------------------------------
10491
10492;; Decrement a vector of DIs by the number of set bits in a predicate.
10493;; See aarch64_sve_cntp for a description of the operands.
10494(define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10495 [(set (match_operand:VNx2DI 0 "register_operand")
10496 (ANY_MINUS:VNx2DI
10497 (match_operand:VNx2DI_ONLY 1 "register_operand")
10498 (vec_duplicate:VNx2DI
10499 (zero_extend:DI
10500 (unspec:SI
10501 [(match_dup 3)
10502 (const_int SVE_KNOWN_PTRUE)
10503 (match_operand:<VPRED> 2 "register_operand")]
10504 UNSPEC_CNTP)))))]
10505 "TARGET_SVE"
10506 {
10507 operands[3] = CONSTM1_RTX (<VPRED>mode);
10508 }
10509)
10510
10511(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10512 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
10513 (ANY_MINUS:VNx2DI
10514 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
10515 (vec_duplicate:VNx2DI
10516 (zero_extend:DI
10517 (unspec:SI
10518 [(match_operand 3)
10519 (const_int SVE_KNOWN_PTRUE)
10520 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10521 UNSPEC_CNTP)))))]
10522 "TARGET_SVE"
10523 "@
10524 <inc_dec>p\t%0.d, %2
10525 movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
10526 "&& !CONSTANT_P (operands[3])"
10527 {
10528 operands[3] = CONSTM1_RTX (<VPRED>mode);
10529 }
10530 [(set_attr "movprfx" "*,yes")]
10531)
10532
10533;; Decrement a vector of SIs by the number of set bits in a predicate.
10534;; See aarch64_sve_cntp for a description of the operands.
10535(define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10536 [(set (match_operand:VNx4SI 0 "register_operand")
10537 (ANY_MINUS:VNx4SI
10538 (match_operand:VNx4SI_ONLY 1 "register_operand")
10539 (vec_duplicate:VNx4SI
10540 (unspec:SI
10541 [(match_dup 3)
10542 (const_int SVE_KNOWN_PTRUE)
10543 (match_operand:<VPRED> 2 "register_operand")]
10544 UNSPEC_CNTP))))]
10545 "TARGET_SVE"
10546 {
10547 operands[3] = CONSTM1_RTX (<VPRED>mode);
10548 }
10549)
10550
10551(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10552 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
10553 (ANY_MINUS:VNx4SI
10554 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
10555 (vec_duplicate:VNx4SI
10556 (unspec:SI
10557 [(match_operand 3)
10558 (const_int SVE_KNOWN_PTRUE)
10559 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10560 UNSPEC_CNTP))))]
10561 "TARGET_SVE"
10562 "@
10563 <inc_dec>p\t%0.s, %2
10564 movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
10565 "&& !CONSTANT_P (operands[3])"
10566 {
10567 operands[3] = CONSTM1_RTX (<VPRED>mode);
10568 }
10569 [(set_attr "movprfx" "*,yes")]
10570)
10571
10572;; Decrement a vector of HIs by the number of set bits in a predicate.
10573;; See aarch64_sve_cntp for a description of the operands.
10574(define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10575 [(set (match_operand:VNx8HI 0 "register_operand")
10576 (ANY_MINUS:VNx8HI
10577 (match_operand:VNx8HI_ONLY 1 "register_operand")
10578 (vec_duplicate:VNx8HI
10579 (truncate:HI
10580 (unspec:SI
10581 [(match_dup 3)
10582 (const_int SVE_KNOWN_PTRUE)
10583 (match_operand:<VPRED> 2 "register_operand")]
10584 UNSPEC_CNTP)))))]
10585 "TARGET_SVE"
10586 {
10587 operands[3] = CONSTM1_RTX (<VPRED>mode);
10588 }
10589)
10590
10591(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10592 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
10593 (ANY_MINUS:VNx8HI
10594 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
10595 (vec_duplicate:VNx8HI
10596 (match_operator:HI 3 "subreg_lowpart_operator"
10597 [(unspec:SI
10598 [(match_operand 4)
10599 (const_int SVE_KNOWN_PTRUE)
10600 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10601 UNSPEC_CNTP)]))))]
10602 "TARGET_SVE"
10603 "@
10604 <inc_dec>p\t%0.h, %2
10605 movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
10606 "&& !CONSTANT_P (operands[4])"
10607 {
10608 operands[4] = CONSTM1_RTX (<VPRED>mode);
10609 }
10610 [(set_attr "movprfx" "*,yes")]
10611)