gcc/config/aarch64/aarch64-sve.md

   1 ;; Machine description for AArch64 SVE.
   2 ;; Copyright (C) 2009-2022 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; The file is organised into the following sections (search for the full
  22 ;; line):
  23 ;;
  24 ;; == General notes
  25 ;; ---- Note on the handling of big-endian SVE
  26 ;; ---- Description of UNSPEC_PTEST
  27 ;; ---- Description of UNSPEC_PRED_Z
  28 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
  29 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
  30 ;; ---- Note on FFR handling
  31 ;;
  32 ;; == Moves
  33 ;; ---- Moves of single vectors
  34 ;; ---- Moves of multiple vectors
  35 ;; ---- Moves of predicates
  36 ;; ---- Moves relating to the FFR
  37 ;;
  38 ;; == Loads
  39 ;; ---- Normal contiguous loads
  40 ;; ---- Extending contiguous loads
  41 ;; ---- First-faulting contiguous loads
  42 ;; ---- First-faulting extending contiguous loads
  43 ;; ---- Non-temporal contiguous loads
  44 ;; ---- Normal gather loads
  45 ;; ---- Extending gather loads
  46 ;; ---- First-faulting gather loads
  47 ;; ---- First-faulting extending gather loads
  48 ;;
  49 ;; == Prefetches
  50 ;; ---- Contiguous prefetches
  51 ;; ---- Gather prefetches
  52 ;;
  53 ;; == Stores
  54 ;; ---- Normal contiguous stores
  55 ;; ---- Truncating contiguous stores
  56 ;; ---- Non-temporal contiguous stores
  57 ;; ---- Normal scatter stores
  58 ;; ---- Truncating scatter stores
  59 ;;
  60 ;; == Vector creation
  61 ;; ---- [INT,FP] Duplicate element
  62 ;; ---- [INT,FP] Initialize from individual elements
  63 ;; ---- [INT] Linear series
  64 ;; ---- [PRED] Duplicate element
  65 ;;
  66 ;; == Vector decomposition
  67 ;; ---- [INT,FP] Extract index
  68 ;; ---- [INT,FP] Extract active element
  69 ;; ---- [PRED] Extract index
  70 ;;
  71 ;; == Unary arithmetic
  72 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
  73 ;; ---- [INT] General unary arithmetic corresponding to unspecs
  74 ;; ---- [INT] Sign and zero extension
  75 ;; ---- [INT] Truncation
  76 ;; ---- [INT] Logical inverse
  77 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
  78 ;; ---- [FP] General unary arithmetic corresponding to unspecs
  79 ;; ---- [FP] Square root
  80 ;; ---- [FP] Reciprocal square root
  81 ;; ---- [PRED] Inverse
  82
  83 ;; == Binary arithmetic
  84 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
  85 ;; ---- [INT] Addition
  86 ;; ---- [INT] Subtraction
  87 ;; ---- [INT] Take address
  88 ;; ---- [INT] Absolute difference
  89 ;; ---- [INT] Saturating addition and subtraction
  90 ;; ---- [INT] Highpart multiplication
  91 ;; ---- [INT] Division
  92 ;; ---- [INT] Binary logical operations
  93 ;; ---- [INT] Binary logical operations (inverted second input)
  94 ;; ---- [INT] Shifts (rounding towards -Inf)
  95 ;; ---- [INT] Shifts (rounding towards 0)
  96 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
  97 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
  98 ;; ---- [FP] General binary arithmetic corresponding to unspecs
  99 ;; ---- [FP] Addition
 100 ;; ---- [FP] Complex addition
 101 ;; ---- [FP] Subtraction
 102 ;; ---- [FP] Absolute difference
 103 ;; ---- [FP] Multiplication
 104 ;; ---- [FP] Division
 105 ;; ---- [FP] Binary logical operations
 106 ;; ---- [FP] Sign copying
 107 ;; ---- [FP] Maximum and minimum
 108 ;; ---- [PRED] Binary logical operations
 109 ;; ---- [PRED] Binary logical operations (inverted second input)
 110 ;; ---- [PRED] Binary logical operations (inverted result)
 111 ;;
 112 ;; == Ternary arithmetic
 113 ;; ---- [INT] MLA and MAD
 114 ;; ---- [INT] MLS and MSB
 115 ;; ---- [INT] Dot product
 116 ;; ---- [INT] Sum of absolute differences
 117 ;; ---- [INT] Matrix multiply-accumulate
 118 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
 119 ;; ---- [FP] Complex multiply-add
 120 ;; ---- [FP] Trigonometric multiply-add
 121 ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
 122 ;; ---- [FP] Matrix multiply-accumulate
 123 ;;
 124 ;; == Comparisons and selects
 125 ;; ---- [INT,FP] Select based on predicates
 126 ;; ---- [INT,FP] Compare and select
 127 ;; ---- [INT] Comparisons
 128 ;; ---- [INT] While tests
 129 ;; ---- [FP] Direct comparisons
 130 ;; ---- [FP] Absolute comparisons
 131 ;; ---- [PRED] Select
 132 ;; ---- [PRED] Test bits
 133 ;;
 134 ;; == Reductions
 135 ;; ---- [INT,FP] Conditional reductions
 136 ;; ---- [INT] Tree reductions
 137 ;; ---- [FP] Tree reductions
 138 ;; ---- [FP] Left-to-right reductions
 139 ;;
 140 ;; == Permutes
 141 ;; ---- [INT,FP] General permutes
 142 ;; ---- [INT,FP] Special-purpose unary permutes
 143 ;; ---- [INT,FP] Special-purpose binary permutes
 144 ;; ---- [PRED] Special-purpose unary permutes
 145 ;; ---- [PRED] Special-purpose binary permutes
 146 ;;
 147 ;; == Conversions
 148 ;; ---- [INT<-INT] Packs
 149 ;; ---- [INT<-INT] Unpacks
 150 ;; ---- [INT<-FP] Conversions
 151 ;; ---- [INT<-FP] Packs
 152 ;; ---- [INT<-FP] Unpacks
 153 ;; ---- [FP<-INT] Conversions
 154 ;; ---- [FP<-INT] Packs
 155 ;; ---- [FP<-INT] Unpacks
 156 ;; ---- [FP<-FP] Packs
 157 ;; ---- [FP<-FP] Packs (bfloat16)
 158 ;; ---- [FP<-FP] Unpacks
 159 ;; ---- [PRED<-PRED] Packs
 160 ;; ---- [PRED<-PRED] Unpacks
 161 ;;
 162 ;; == Vector partitioning
 163 ;; ---- [PRED] Unary partitioning
 164 ;; ---- [PRED] Binary partitioning
 165 ;; ---- [PRED] Scalarization
 166 ;;
 167 ;; == Counting elements
 168 ;; ---- [INT] Count elements in a pattern (scalar)
 169 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
 170 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
 171 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
 172 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
 173 ;; ---- [INT] Count elements in a predicate (scalar)
 174 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
 175 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
 176 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
 177 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
 178
 179 ;; =========================================================================
 180 ;; == General notes
 181 ;; =========================================================================
 182 ;;
 183 ;; -------------------------------------------------------------------------
 184 ;; ---- Note on the handling of big-endian SVE
 185 ;; -------------------------------------------------------------------------
 186 ;;
 187 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
 188 ;; same way as movdi or movti would: the first byte of memory goes
 189 ;; into the most significant byte of the register and the last byte
 190 ;; of memory goes into the least significant byte of the register.
 191 ;; This is the most natural ordering for Advanced SIMD and matches
 192 ;; the ABI layout for 64-bit and 128-bit vector types.
 193 ;;
 194 ;; As a result, the order of bytes within the register is what GCC
 195 ;; expects for a big-endian target, and subreg offsets therefore work
 196 ;; as expected, with the first element in memory having subreg offset 0
 197 ;; and the last element in memory having the subreg offset associated
 198 ;; with a big-endian lowpart.  However, this ordering also means that
 199 ;; GCC's lane numbering does not match the architecture's numbering:
 200 ;; GCC always treats the element at the lowest address in memory
 201 ;; (subreg offset 0) as element 0, while the architecture treats
 202 ;; the least significant end of the register as element 0.
 203 ;;
 204 ;; The situation for SVE is different.  We want the layout of the
 205 ;; SVE register to be same for mov<mode> as it is for maskload<mode>:
 206 ;; logically, a mov<mode> load must be indistinguishable from a
 207 ;; maskload<mode> whose mask is all true.  We therefore need the
 208 ;; register layout to match LD1 rather than LDR.  The ABI layout of
 209 ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
 210 ;;
 211 ;; As a result, the architecture lane numbering matches GCC's lane
 212 ;; numbering, with element 0 always being the first in memory.
 213 ;; However:
 214 ;;
 215 ;; - Applying a subreg offset to a register does not give the element
 216 ;;   that GCC expects: the first element in memory has the subreg offset
 217 ;;   associated with a big-endian lowpart while the last element in memory
 218 ;;   has subreg offset 0.  We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
 219 ;;
 220 ;; - We cannot use LDR and STR for spill slots that might be accessed
 221 ;;   via subregs, since although the elements have the order GCC expects,
 222 ;;   the order of the bytes within the elements is different.  We instead
 223 ;;   access spill slots via LD1 and ST1, using secondary reloads to
 224 ;;   reserve a predicate register.
 225 ;;
 226 ;; -------------------------------------------------------------------------
 227 ;; ---- Description of UNSPEC_PTEST
 228 ;; -------------------------------------------------------------------------
 229 ;;
 230 ;; SVE provides a PTEST instruction for testing the active lanes of a
 231 ;; predicate and setting the flags based on the result.  The associated
 232 ;; condition code tests are:
 233 ;;
 234 ;; - any   (= ne): at least one active bit is set
 235 ;; - none  (= eq): all active bits are clear (*)
 236 ;; - first (= mi): the first active bit is set
 237 ;; - nfrst (= pl): the first active bit is clear (*)
 238 ;; - last  (= cc): the last active bit is set
 239 ;; - nlast (= cs): the last active bit is clear (*)
 240 ;;
 241 ;; where the conditions marked (*) are also true when there are no active
 242 ;; lanes (i.e. when the governing predicate is a PFALSE).  The flags results
 243 ;; of a PTEST use the condition code mode CC_NZC.
 244 ;;
 245 ;; PTEST is always a .B operation (i.e. it always operates on VNx16BI).
 246 ;; This means that for other predicate modes, we need a governing predicate
 247 ;; in which all bits are defined.
 248 ;;
 249 ;; For example, most predicated .H operations ignore the odd bits of the
 250 ;; governing predicate, so that an active lane is represented by the
 251 ;; bits "1x" and an inactive lane by the bits "0x", where "x" can be
 252 ;; any value.  To test a .H predicate, we instead need "10" and "00"
 253 ;; respectively, so that the condition only tests the even bits of the
 254 ;; predicate.
 255 ;;
 256 ;; Several instructions set the flags as a side-effect, in the same way
 257 ;; that a separate PTEST would.  It's important for code quality that we
 258 ;; use these flags results as often as possible, particularly in the case
 259 ;; of WHILE* and RDFFR.
 260 ;;
 261 ;; Also, some of the instructions that set the flags are unpredicated
 262 ;; and instead implicitly test all .B, .H, .S or .D elements, as though
 263 ;; they were predicated on a PTRUE of that size.  For example, a .S
 264 ;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE
 265 ;; would.
 266 ;;
 267 ;; We therefore need to represent PTEST operations in a way that
 268 ;; makes it easy to combine them with both predicated and unpredicated
 269 ;; operations, while using a VNx16BI governing predicate for all
 270 ;; predicate modes.  We do this using:
 271 ;;
 272 ;;   (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST)
 273 ;;
 274 ;; where:
 275 ;;
 276 ;; - GP is the real VNx16BI governing predicate
 277 ;;
 278 ;; - CAST_GP is GP cast to the mode of OP.  All bits dropped by casting
 279 ;;   GP to CAST_GP are guaranteed to be clear in GP.
 280 ;;
 281 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
 282 ;;   SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and
 283 ;;   SVE_MAYBE_NOT_PTRUE otherwise.
 284 ;;
 285 ;; - OP is the predicate we want to test, of the same mode as CAST_GP.
 286 ;;
 287 ;; -------------------------------------------------------------------------
 288 ;; ---- Description of UNSPEC_PRED_Z
 289 ;; -------------------------------------------------------------------------
 290 ;;
 291 ;; SVE integer comparisons are predicated and return zero for inactive
 292 ;; lanes.  Sometimes we use them with predicates that are all-true and
 293 ;; sometimes we use them with general predicates.
 294 ;;
 295 ;; The integer comparisons also set the flags and so build-in the effect
 296 ;; of a PTEST.  We therefore want to be able to combine integer comparison
 297 ;; patterns with PTESTs of the result.  One difficulty with doing this is
 298 ;; that (as noted above) the PTEST is always a .B operation and so can place
 299 ;; stronger requirements on the governing predicate than the comparison does.
 300 ;;
 301 ;; For example, when applying a separate PTEST to the result of a full-vector
 302 ;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a
 303 ;; .B PTRUE.  In constrast, the comparison might be predicated on either
 304 ;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate
 305 ;; bits don't matter for .H operations.
 306 ;;
 307 ;; We therefore can't rely on a full-vector comparison using the same
 308 ;; predicate register as a following PTEST.  We instead need to remember
 309 ;; whether a comparison is known to be a full-vector comparison and use
 310 ;; this information in addition to a check for equal predicate registers.
 311 ;; At the same time, it's useful to have a common representation for all
 312 ;; integer comparisons, so that they can be handled by a single set of
 313 ;; patterns.
 314 ;;
 315 ;; We therefore take a similar approach to UNSPEC_PTEST above and use:
 316 ;;
 317 ;;   (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z)
 318 ;;
 319 ;; where:
 320 ;;
 321 ;; - GP is the governing predicate, of mode <M:VPRED>
 322 ;;
 323 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
 324 ;;   SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE
 325 ;;   otherwise
 326 ;;
 327 ;; - CODE is the comparison code
 328 ;;
 329 ;; - OP0 and OP1 are the values being compared, of mode M
 330 ;;
 331 ;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero.
 332 ;;
 333 ;; -------------------------------------------------------------------------
 334 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
 335 ;; -------------------------------------------------------------------------
 336 ;;
 337 ;; Many SVE integer operations are predicated.  We can generate them
 338 ;; from four sources:
 339 ;;
 340 ;; (1) Using normal unpredicated optabs.  In this case we need to create
 341 ;;     an all-true predicate register to act as the governing predicate
 342 ;;     for the SVE instruction.  There are no inactive lanes, and thus
 343 ;;     the values of inactive lanes don't matter.
 344 ;;
 345 ;; (2) Using _x ACLE functions.  In this case the function provides a
 346 ;;     specific predicate and some lanes might be inactive.  However,
 347 ;;     as for (1), the values of the inactive lanes don't matter.
 348 ;;     We can make extra lanes active without changing the behavior
 349 ;;     (although for code-quality reasons we should avoid doing so
 350 ;;     needlessly).
 351 ;;
 352 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
 353 ;;     These optabs have a predicate operand that specifies which lanes are
 354 ;;     active and another operand that provides the values of inactive lanes.
 355 ;;
 356 ;; (4) Using _m and _z ACLE functions.  These functions map to the same
 357 ;;     patterns as (3), with the _z functions setting inactive lanes to zero
 358 ;;     and the _m functions setting the inactive lanes to one of the function
 359 ;;     arguments.
 360 ;;
 361 ;; For (1) and (2) we need a way of attaching the predicate to a normal
 362 ;; unpredicated integer operation.  We do this using:
 363 ;;
 364 ;;   (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X)
 365 ;;
 366 ;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED
 367 ;; is a predicate of mode <M:VPRED>.  PRED might or might not be a PTRUE;
 368 ;; it always is for (1), but might not be for (2).
 369 ;;
 370 ;; The unspec as a whole has the same value as (code:M ...) when PRED is
 371 ;; all-true.  It is always semantically valid to replace PRED with a PTRUE,
 372 ;; but as noted above, we should only do so if there's a specific benefit.
 373 ;;
 374 ;; (The "_X" in the unspec is named after the ACLE functions in (2).)
 375 ;;
 376 ;; For (3) and (4) we can simply use the SVE port's normal representation
 377 ;; of a predicate-based select:
 378 ;;
 379 ;;   (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL)
 380 ;;
 381 ;; where INACTIVE specifies the values of inactive lanes.
 382 ;;
 383 ;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather
 384 ;; than inserting the integer operation directly.  This is mostly useful
 385 ;; if we want the combine pass to merge an integer operation with an explicit
 386 ;; vcond_mask (in other words, with a following SEL instruction).  However,
 387 ;; it's generally better to merge such operations at the gimple level
 388 ;; using (3).
 389 ;;
 390 ;; -------------------------------------------------------------------------
 391 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
 392 ;; -------------------------------------------------------------------------
 393 ;;
 394 ;; Most SVE floating-point operations are predicated.  We can generate
 395 ;; them from four sources:
 396 ;;
 397 ;; (1) Using normal unpredicated optabs.  In this case we need to create
 398 ;;     an all-true predicate register to act as the governing predicate
 399 ;;     for the SVE instruction.  There are no inactive lanes, and thus
 400 ;;     the values of inactive lanes don't matter.
 401 ;;
 402 ;; (2) Using _x ACLE functions.  In this case the function provides a
 403 ;;     specific predicate and some lanes might be inactive.  However,
 404 ;;     as for (1), the values of the inactive lanes don't matter.
 405 ;;
 406 ;;     The instruction must have the same exception behavior as the
 407 ;;     function call unless things like command-line flags specifically
 408 ;;     allow otherwise.  For example, with -ffast-math, it is OK to
 409 ;;     raise exceptions for inactive lanes, but normally it isn't.
 410 ;;
 411 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
 412 ;;     These optabs have a predicate operand that specifies which lanes are
 413 ;;     active and another operand that provides the values of inactive lanes.
 414 ;;
 415 ;; (4) Using _m and _z ACLE functions.  These functions map to the same
 416 ;;     patterns as (3), with the _z functions setting inactive lanes to zero
 417 ;;     and the _m functions setting the inactive lanes to one of the function
 418 ;;     arguments.
 419 ;;
 420 ;; So:
 421 ;;
 422 ;; - In (1), the predicate is known to be all true and the pattern can use
 423 ;;   unpredicated operations where available.
 424 ;;
 425 ;; - In (2), the predicate might or might not be all true.  The pattern can
 426 ;;   use unpredicated instructions if the predicate is all-true or if things
 427 ;;   like command-line flags allow exceptions for inactive lanes.
 428 ;;
 429 ;; - (3) and (4) represent a native SVE predicated operation.  Some lanes
 430 ;;   might be inactive and inactive lanes of the result must have specific
 431 ;;   values.  There is no scope for using unpredicated instructions (and no
 432 ;;   reason to want to), so the question about command-line flags doesn't
 433 ;;   arise.
 434 ;;
 435 ;; It would be inaccurate to model (2) as an rtx code like (sqrt ...)
 436 ;; in combination with a separate predicate operand, e.g.
 437 ;;
 438 ;;   (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl")
 439 ;;            (sqrt:SVE_FULL_F 2 "register_operand" "w")]
 440 ;;           ....)
 441 ;;
 442 ;; because (sqrt ...) can raise an exception for any lane, including
 443 ;; inactive ones.  We therefore need to use an unspec instead.
 444 ;;
 445 ;; Also, (2) requires some way of distinguishing the case in which the
 446 ;; predicate might have inactive lanes and cannot be changed from the
 447 ;; case in which the predicate has no inactive lanes or can be changed.
 448 ;; This information is also useful when matching combined FP patterns
 449 ;; in which the predicates might not be equal.
 450 ;;
 451 ;; We therefore model FP operations as an unspec of the form:
 452 ;;
 453 ;;   (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>)
 454 ;;
 455 ;; where:
 456 ;;
 457 ;; - PRED is the governing predicate.
 458 ;;
 459 ;; - STRICTNESS is a CONST_INT that conceptually has mode SI.  It has the
 460 ;;   value SVE_STRICT_GP if PRED might have inactive lanes and if those
 461 ;;   lanes must remain inactive.  It has the value SVE_RELAXED_GP otherwise.
 462 ;;
 463 ;; - OP0 OP1 ... are the normal input operands to the operation.
 464 ;;
 465 ;; - MNEMONIC is the mnemonic of the associated SVE instruction.
 466 ;;
 467 ;; For (3) and (4), we combine these operations with an UNSPEC_SEL
 468 ;; that selects between the result of the FP operation and the "else"
 469 ;; value.  (This else value is a merge input for _m ACLE functions
 470 ;; and zero for _z ACLE functions.)  The outer pattern then has the form:
 471 ;;
 472 ;;   (unspec [pred fp_operation else_value] UNSPEC_SEL)
 473 ;;
 474 ;; This means that the patterns for (3) and (4) have two predicates:
 475 ;; one for the FP operation itself and one for the UNSPEC_SEL.
 476 ;; This pattern is equivalent to the result of combining an instance
 477 ;; of (1) or (2) with a separate vcond instruction, so these patterns
 478 ;; are useful as combine targets too.
 479 ;;
 480 ;; However, in the combine case, the instructions that we want to
 481 ;; combine might use different predicates.  Then:
 482 ;;
 483 ;; - Some of the active lanes of the FP operation might be discarded
 484 ;;   by the UNSPEC_SEL.  It's OK to drop the FP operation on those lanes,
 485 ;;   even for SVE_STRICT_GP, since the operations on those lanes are
 486 ;;   effectively dead code.
 487 ;;
 488 ;; - Some of the inactive lanes of the FP operation might be selected
 489 ;;   by the UNSPEC_SEL, giving unspecified values for those lanes.
 490 ;;   SVE_RELAXED_GP lets us extend the FP operation to cover these
 491 ;;   extra lanes, but SVE_STRICT_GP does not.
 492 ;;
 493 ;; Thus SVE_RELAXED_GP allows us to ignore the predicate on the FP operation
 494 ;; and operate on exactly the lanes selected by the UNSPEC_SEL predicate.
 495 ;; This typically leads to patterns like:
 496 ;;
 497 ;;    (unspec [(match_operand 1 "register_operand" "Upl")
 498 ;;             (unspec [(match_operand N)
 499 ;;                      (const_int SVE_RELAXED_GP)
 500 ;;                      ...]
 501 ;;                     UNSPEC_COND_<MNEMONIC>)
 502 ;;             ...])
 503 ;;
 504 ;; where operand N is allowed to be anything.  These instructions then
 505 ;; have rewrite rules to replace operand N with operand 1, which gives the
 506 ;; instructions a canonical form and means that the original operand N is
 507 ;; not kept live unnecessarily.
 508 ;;
 509 ;; In contrast, SVE_STRICT_GP only allows the UNSPEC_SEL predicate to be
 510 ;; a subset of the FP operation predicate.  This case isn't interesting
 511 ;; for FP operations that have an all-true predicate, since such operations
 512 ;; use SVE_RELAXED_GP instead.  And it is not possible for instruction
 513 ;; conditions to track the subset relationship for arbitrary registers.
 514 ;; So in practice, the only useful case for SVE_STRICT_GP is the one
 515 ;; in which the predicates match:
 516 ;;
 517 ;;    (unspec [(match_operand 1 "register_operand" "Upl")
 518 ;;             (unspec [(match_dup 1)
 519 ;;                      (const_int SVE_STRICT_GP)
 520 ;;                      ...]
 521 ;;                     UNSPEC_COND_<MNEMONIC>)
 522 ;;             ...])
 523 ;;
 524 ;; This pattern would also be correct for SVE_RELAXED_GP, but it would
 525 ;; be redundant with the one above.  However, if the combine pattern
 526 ;; has multiple FP operations, using a match_operand allows combinations
 527 ;; of SVE_STRICT_GP and SVE_RELAXED_GP in the same operation, provided
 528 ;; that the predicates are the same:
 529 ;;
 530 ;;    (unspec [(match_operand 1 "register_operand" "Upl")
 531 ;;             (...
 532 ;;                (unspec [(match_dup 1)
 533 ;;                         (match_operand:SI N "aarch64_sve_gp_strictness")
 534 ;;                         ...]
 535 ;;                        UNSPEC_COND_<MNEMONIC1>)
 536 ;;                (unspec [(match_dup 1)
 537 ;;                         (match_operand:SI M "aarch64_sve_gp_strictness")
 538 ;;                         ...]
 539 ;;                        UNSPEC_COND_<MNEMONIC2>) ...)
 540 ;;             ...])
 541 ;;
 542 ;; The fully-relaxed version of this pattern is:
 543 ;;
 544 ;;    (unspec [(match_operand 1 "register_operand" "Upl")
 545 ;;             (...
 546 ;;                (unspec [(match_operand:SI N)
 547 ;;                         (const_int SVE_RELAXED_GP)
 548 ;;                         ...]
 549 ;;                        UNSPEC_COND_<MNEMONIC1>)
 550 ;;                (unspec [(match_operand:SI M)
 551 ;;                         (const_int SVE_RELAXED_GP)
 552 ;;                         ...]
 553 ;;                        UNSPEC_COND_<MNEMONIC2>) ...)
 554 ;;             ...])
 555 ;;
 556 ;; -------------------------------------------------------------------------
 557 ;; ---- Note on FFR handling
 558 ;; -------------------------------------------------------------------------
 559 ;;
 560 ;; Logically we want to divide FFR-related instructions into regions
 561 ;; that contain exactly one of:
 562 ;;
 563 ;; - a single write to the FFR
 564 ;; - any number of reads from the FFR (but only one read is likely)
 565 ;; - any number of LDFF1 and LDNF1 instructions
 566 ;;
 567 ;; However, LDFF1 and LDNF1 instructions should otherwise behave like
 568 ;; normal loads as far as possible.  This means that they should be
 569 ;; schedulable within a region in the same way that LD1 would be,
 570 ;; and they should be deleted as dead if the result is unused.  The loads
 571 ;; should therefore not write to the FFR, since that would both serialize
 572 ;; the loads with respect to each other and keep the loads live for any
 573 ;; later RDFFR.
 574 ;;
 575 ;; We get around this by using a fake "FFR token" (FFRT) to help describe
 576 ;; the dependencies.  Writing to the FFRT starts a new "FFRT region",
 577 ;; while using the FFRT keeps the instruction within its region.
 578 ;; Specifically:
 579 ;;
 580 ;; - Writes start a new FFRT region as well as setting the FFR:
 581 ;;
 582 ;;       W1: parallel (FFRT = <new value>, FFR = <actual FFR value>)
 583 ;;
 584 ;; - Loads use an LD1-like instruction that also uses the FFRT, so that the
 585 ;;   loads stay within the same FFRT region:
 586 ;;
 587 ;;       L1: load data while using the FFRT
 588 ;;
 589 ;;   In addition, any FFRT region that includes a load also has at least one
 590 ;;   instance of:
 591 ;;
 592 ;;       L2: FFR = update(FFR, FFRT)  [type == no_insn]
 593 ;;
 594 ;;   to make it clear that the region both reads from and writes to the FFR.
 595 ;;
 596 ;; - Reads do the following:
 597 ;;
 598 ;;       R1: FFRT = FFR               [type == no_insn]
 599 ;;       R2: read from the FFRT
 600 ;;       R3: FFRT = update(FFRT)      [type == no_insn]
 601 ;;
 602 ;;   R1 and R3 both create new FFRT regions, so that previous LDFF1s and
 603 ;;   LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s
 604 ;;   cannot move backwards across R3.
 605 ;;
 606 ;; This way, writes are only kept alive by later loads or reads,
 607 ;; and write/read pairs fold normally.  For two consecutive reads,
 608 ;; the first R3 is made dead by the second R1, which in turn becomes
 609 ;; redundant with the first R1.  We then have:
 610 ;;
 611 ;;     first R1: FFRT = FFR
 612 ;;     first read from the FFRT
 613 ;;     second read from the FFRT
 614 ;;     second R3: FFRT = update(FFRT)
 615 ;;
 616 ;; i.e. the two FFRT regions collapse into a single one with two
 617 ;; independent reads.
 618 ;;
 619 ;; The model still prevents some valid optimizations though.  For example,
 620 ;; if all loads in an FFRT region are deleted as dead, nothing would remove
 621 ;; the L2 instructions.
 622
 623 ;; =========================================================================
 624 ;; == Moves
 625 ;; =========================================================================
 626
 627 ;; -------------------------------------------------------------------------
 628 ;; ---- Moves of single vectors
 629 ;; -------------------------------------------------------------------------
 630 ;; Includes:
 631 ;; - MOV  (including aliases)
 632 ;; - LD1B (contiguous form)
 633 ;; - LD1D (    "    "     )
 634 ;; - LD1H (    "    "     )
 635 ;; - LD1W (    "    "     )
 636 ;; - LDR
 637 ;; - ST1B (contiguous form)
 638 ;; - ST1D (    "    "     )
 639 ;; - ST1H (    "    "     )
 640 ;; - ST1W (    "    "     )
 641 ;; - STR
 642 ;; -------------------------------------------------------------------------
 643
 644 (define_expand "mov<mode>"
 645   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
 646         (match_operand:SVE_ALL 1 "general_operand"))]
 647   "TARGET_SVE"
 648   {
 649     /* Use the predicated load and store patterns where possible.
 650        This is required for big-endian targets (see the comment at the
 651        head of the file) and increases the addressing choices for
 652        little-endian.  */
 653     if ((MEM_P (operands[0]) || MEM_P (operands[1]))
 654         && can_create_pseudo_p ())
 655       {
 656         aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
 657         DONE;
 658       }
 659
 660     if (CONSTANT_P (operands[1]))
 661       {
 662         aarch64_expand_mov_immediate (operands[0], operands[1]);
 663         DONE;
 664       }
 665
 666     /* Optimize subregs on big-endian targets: we can use REV[BHW]
 667        instead of going through memory.  */
 668     if (BYTES_BIG_ENDIAN
 669         && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
 670       DONE;
 671   }
 672 )
 673
 674 (define_expand "movmisalign<mode>"
 675   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
 676         (match_operand:SVE_ALL 1 "general_operand"))]
 677   "TARGET_SVE"
 678   {
 679     /* Equivalent to a normal move for our purpooses.  */
 680     emit_move_insn (operands[0], operands[1]);
 681     DONE;
 682   }
 683 )
 684
 685 ;; Unpredicated moves that can use LDR and STR, i.e. full vectors for which
 686 ;; little-endian ordering is acceptable.  Only allow memory operations during
 687 ;; and after RA; before RA we want the predicated load and store patterns to
 688 ;; be used instead.
 689 (define_insn "*aarch64_sve_mov<mode>_ldr_str"
 690   [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
 691         (match_operand:SVE_FULL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
 692   "TARGET_SVE
 693    && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
 694    && ((lra_in_progress || reload_completed)
 695        || (register_operand (operands[0], <MODE>mode)
 696            && nonmemory_operand (operands[1], <MODE>mode)))"
 697   "@
 698    ldr\t%0, %1
 699    str\t%1, %0
 700    mov\t%0.d, %1.d
 701    * return aarch64_output_sve_mov_immediate (operands[1]);"
 702 )
 703
 704 ;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
 705 ;; or vectors for which little-endian ordering isn't acceptable.  Memory
 706 ;; accesses require secondary reloads.
 707 (define_insn "*aarch64_sve_mov<mode>_no_ldr_str"
 708   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
 709         (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
 710   "TARGET_SVE
 711    && <MODE>mode != VNx16QImode
 712    && (BYTES_BIG_ENDIAN
 713        || maybe_ne (BYTES_PER_SVE_VECTOR, GET_MODE_SIZE (<MODE>mode)))"
 714   "@
 715    mov\t%0.d, %1.d
 716    * return aarch64_output_sve_mov_immediate (operands[1]);"
 717 )
 718
 719 ;; Handle memory reloads for modes that can't use LDR and STR.  We use
 720 ;; byte PTRUE for all modes to try to encourage reuse.  This pattern
 721 ;; needs constraints because it is returned by TARGET_SECONDARY_RELOAD.
 722 (define_expand "aarch64_sve_reload_mem"
 723   [(parallel
 724      [(set (match_operand 0)
 725            (match_operand 1))
 726       (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
 727   "TARGET_SVE"
 728   {
 729     /* Create a PTRUE.  */
 730     emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
 731
 732     /* Refer to the PTRUE in the appropriate mode for this move.  */
 733     machine_mode mode = GET_MODE (operands[0]);
 734     rtx pred = gen_lowpart (aarch64_sve_pred_mode (mode), operands[2]);
 735
 736     /* Emit a predicated load or store.  */
 737     aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
 738     DONE;
 739   }
 740 )
 741
 742 ;; A predicated move in which the predicate is known to be all-true.
 743 ;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
 744 ;; so changes to this pattern will need changes there as well.
 745 (define_insn_and_split "@aarch64_pred_mov<mode>"
 746   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
 747         (unspec:SVE_ALL
 748           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 749            (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")]
 750           UNSPEC_PRED_X))]
 751   "TARGET_SVE
 752    && (register_operand (operands[0], <MODE>mode)
 753        || register_operand (operands[2], <MODE>mode))"
 754   "@
 755    #
 756    ld1<Vesize>\t%0.<Vctype>, %1/z, %2
 757    st1<Vesize>\t%2.<Vctype>, %1, %0"
 758   "&& register_operand (operands[0], <MODE>mode)
 759    && register_operand (operands[2], <MODE>mode)"
 760   [(set (match_dup 0) (match_dup 2))]
 761 )
 762
 763 ;; A pattern for optimizing SUBREGs that have a reinterpreting effect
 764 ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
 765 ;; for details.  We use a special predicate for operand 2 to reduce
 766 ;; the number of patterns.
 767 (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
 768   [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
 769         (unspec:SVE_ALL
 770           [(match_operand:VNx16BI 1 "register_operand" "Upl")
 771            (match_operand 2 "aarch64_any_register_operand" "w")]
 772           UNSPEC_REV_SUBREG))]
 773   "TARGET_SVE && BYTES_BIG_ENDIAN"
 774   "#"
 775   "&& reload_completed"
 776   [(const_int 0)]
 777   {
 778     aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
 779     DONE;
 780   }
 781 )
 782
 783 ;; Reinterpret operand 1 in operand 0's mode, without changing its contents.
 784 ;; This is equivalent to a subreg on little-endian targets but not for
 785 ;; big-endian; see the comment at the head of the file for details.
 786 (define_expand "@aarch64_sve_reinterpret<mode>"
 787   [(set (match_operand:SVE_ALL 0 "register_operand")
 788         (unspec:SVE_ALL
 789           [(match_operand 1 "aarch64_any_register_operand")]
 790           UNSPEC_REINTERPRET))]
 791   "TARGET_SVE"
 792   {
 793     machine_mode src_mode = GET_MODE (operands[1]);
 794     if (targetm.can_change_mode_class (<MODE>mode, src_mode, FP_REGS))
 795       {
 796         emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1]));
 797         DONE;
 798       }
 799   }
 800 )
 801
 802 ;; A pattern for handling type punning on big-endian targets.  We use a
 803 ;; special predicate for operand 1 to reduce the number of patterns.
 804 (define_insn_and_split "*aarch64_sve_reinterpret<mode>"
 805   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
 806         (unspec:SVE_ALL
 807           [(match_operand 1 "aarch64_any_register_operand" "w")]
 808           UNSPEC_REINTERPRET))]
 809   "TARGET_SVE"
 810   "#"
 811   "&& reload_completed"
 812   [(set (match_dup 0) (match_dup 1))]
 813   {
 814     operands[1] = aarch64_replace_reg_mode (operands[1], <MODE>mode);
 815   }
 816 )
 817
 818 ;; -------------------------------------------------------------------------
 819 ;; ---- Moves of multiple vectors
 820 ;; -------------------------------------------------------------------------
 821 ;; All patterns in this section are synthetic and split to real
 822 ;; instructions after reload.
 823 ;; -------------------------------------------------------------------------
 824
 825 (define_expand "mov<mode>"
 826   [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
 827         (match_operand:SVE_STRUCT 1 "general_operand"))]
 828   "TARGET_SVE"
 829   {
 830     /* Big-endian loads and stores need to be done via LD1 and ST1;
 831        see the comment at the head of the file for details.  */
 832     if ((MEM_P (operands[0]) || MEM_P (operands[1]))
 833         && BYTES_BIG_ENDIAN)
 834       {
 835         gcc_assert (can_create_pseudo_p ());
 836         aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
 837         DONE;
 838       }
 839
 840     if (CONSTANT_P (operands[1]))
 841       {
 842         aarch64_expand_mov_immediate (operands[0], operands[1]);
 843         DONE;
 844       }
 845   }
 846 )
 847
 848 ;; Unpredicated structure moves (little-endian).
 849 (define_insn "*aarch64_sve_mov<mode>_le"
 850   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
 851         (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
 852   "TARGET_SVE && !BYTES_BIG_ENDIAN"
 853   "#"
 854   [(set_attr "length" "<insn_length>")]
 855 )
 856
 857 ;; Unpredicated structure moves (big-endian).  Memory accesses require
 858 ;; secondary reloads.
 859 (define_insn "*aarch64_sve_mov<mode>_be"
 860   [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
 861         (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
 862   "TARGET_SVE && BYTES_BIG_ENDIAN"
 863   "#"
 864   [(set_attr "length" "<insn_length>")]
 865 )
 866
 867 ;; Split unpredicated structure moves into pieces.  This is the same
 868 ;; for both big-endian and little-endian code, although it only needs
 869 ;; to handle memory operands for little-endian code.
 870 (define_split
 871   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
 872         (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
 873   "TARGET_SVE && reload_completed"
 874   [(const_int 0)]
 875   {
 876     rtx dest = operands[0];
 877     rtx src = operands[1];
 878     if (REG_P (dest) && REG_P (src))
 879       aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
 880     else
 881       for (unsigned int i = 0; i < <vector_count>; ++i)
 882         {
 883           rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
 884                                              i * BYTES_PER_SVE_VECTOR);
 885           rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
 886                                             i * BYTES_PER_SVE_VECTOR);
 887           emit_insn (gen_rtx_SET (subdest, subsrc));
 888         }
 889     DONE;
 890   }
 891 )
 892
 893 ;; Predicated structure moves.  This works for both endiannesses but in
 894 ;; practice is only useful for big-endian.
 895 (define_insn_and_split "@aarch64_pred_mov<mode>"
 896   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
 897         (unspec:SVE_STRUCT
 898           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 899            (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
 900           UNSPEC_PRED_X))]
 901   "TARGET_SVE
 902    && (register_operand (operands[0], <MODE>mode)
 903        || register_operand (operands[2], <MODE>mode))"
 904   "#"
 905   "&& reload_completed"
 906   [(const_int 0)]
 907   {
 908     for (unsigned int i = 0; i < <vector_count>; ++i)
 909       {
 910         rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
 911                                            <MODE>mode,
 912                                            i * BYTES_PER_SVE_VECTOR);
 913         rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
 914                                           <MODE>mode,
 915                                           i * BYTES_PER_SVE_VECTOR);
 916         aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
 917       }
 918     DONE;
 919   }
 920   [(set_attr "length" "<insn_length>")]
 921 )
 922
 923 ;; -------------------------------------------------------------------------
 924 ;; ---- Moves of predicates
 925 ;; -------------------------------------------------------------------------
 926 ;; Includes:
 927 ;; - MOV
 928 ;; - LDR
 929 ;; - PFALSE
 930 ;; - PTRUE
 931 ;; - PTRUES
 932 ;; - STR
 933 ;; -------------------------------------------------------------------------
 934
 935 (define_expand "mov<mode>"
 936   [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
 937         (match_operand:PRED_ALL 1 "general_operand"))]
 938   "TARGET_SVE"
 939   {
 940     if (GET_CODE (operands[0]) == MEM)
 941       operands[1] = force_reg (<MODE>mode, operands[1]);
 942
 943     if (CONSTANT_P (operands[1]))
 944       {
 945         aarch64_expand_mov_immediate (operands[0], operands[1]);
 946         DONE;
 947       }
 948   }
 949 )
 950
 951 (define_insn "*aarch64_sve_mov<mode>"
 952   [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa")
 953         (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))]
 954   "TARGET_SVE
 955    && (register_operand (operands[0], <MODE>mode)
 956        || register_operand (operands[1], <MODE>mode))"
 957   "@
 958    mov\t%0.b, %1.b
 959    str\t%1, %0
 960    ldr\t%0, %1
 961    * return aarch64_output_sve_mov_immediate (operands[1]);"
 962 )
 963
 964 ;; Match PTRUES Pn.B when both the predicate and flags are useful.
 965 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc"
 966   [(set (reg:CC_NZC CC_REGNUM)
 967         (unspec:CC_NZC
 968           [(match_operand 2)
 969            (match_operand 3)
 970            (const_int SVE_KNOWN_PTRUE)
 971            (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
 972              [(unspec:VNx16BI
 973                 [(match_operand:SI 4 "const_int_operand")
 974                  (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
 975                 UNSPEC_PTRUE)])]
 976           UNSPEC_PTEST))
 977    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
 978         (match_dup 1))]
 979   "TARGET_SVE"
 980   {
 981     return aarch64_output_sve_ptrues (operands[1]);
 982   }
 983   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
 984   {
 985     operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
 986   }
 987 )
 988
 989 ;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful.
 990 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_cc"
 991   [(set (reg:CC_NZC CC_REGNUM)
 992         (unspec:CC_NZC
 993           [(match_operand 2)
 994            (match_operand 3)
 995            (const_int SVE_KNOWN_PTRUE)
 996            (subreg:PRED_HSD
 997              (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
 998                [(unspec:VNx16BI
 999                   [(match_operand:SI 4 "const_int_operand")
1000                    (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
1001                   UNSPEC_PTRUE)]) 0)]
1002           UNSPEC_PTEST))
1003    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1004         (match_dup 1))]
1005   "TARGET_SVE"
1006   {
1007     return aarch64_output_sve_ptrues (operands[1]);
1008   }
1009   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1010   {
1011     operands[2] = CONSTM1_RTX (VNx16BImode);
1012     operands[3] = CONSTM1_RTX (<MODE>mode);
1013   }
1014 )
1015
1016 ;; Match PTRUES Pn.B when only the flags result is useful (which is
1017 ;; a way of testing VL).
1018 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest"
1019   [(set (reg:CC_NZC CC_REGNUM)
1020         (unspec:CC_NZC
1021           [(match_operand 2)
1022            (match_operand 3)
1023            (const_int SVE_KNOWN_PTRUE)
1024            (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1025              [(unspec:VNx16BI
1026                 [(match_operand:SI 4 "const_int_operand")
1027                  (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
1028                 UNSPEC_PTRUE)])]
1029           UNSPEC_PTEST))
1030    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1031   "TARGET_SVE"
1032   {
1033     return aarch64_output_sve_ptrues (operands[1]);
1034   }
1035   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1036   {
1037     operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
1038   }
1039 )
1040
1041 ;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is
1042 ;; a way of testing VL).
1043 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest"
1044   [(set (reg:CC_NZC CC_REGNUM)
1045         (unspec:CC_NZC
1046           [(match_operand 2)
1047            (match_operand 3)
1048            (const_int SVE_KNOWN_PTRUE)
1049            (subreg:PRED_HSD
1050              (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1051                [(unspec:VNx16BI
1052                   [(match_operand:SI 4 "const_int_operand")
1053                    (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
1054                   UNSPEC_PTRUE)]) 0)]
1055           UNSPEC_PTEST))
1056    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1057   "TARGET_SVE"
1058   {
1059     return aarch64_output_sve_ptrues (operands[1]);
1060   }
1061   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1062   {
1063     operands[2] = CONSTM1_RTX (VNx16BImode);
1064     operands[3] = CONSTM1_RTX (<MODE>mode);
1065   }
1066 )
1067
1068 ;; -------------------------------------------------------------------------
1069 ;; ---- Moves relating to the FFR
1070 ;; -------------------------------------------------------------------------
1071 ;; RDFFR
1072 ;; RDFFRS
1073 ;; SETFFR
1074 ;; WRFFR
1075 ;; -------------------------------------------------------------------------
1076
1077 ;; [W1 in the block comment above about FFR handling]
1078 ;;
1079 ;; Write to the FFR and start a new FFRT scheduling region.
1080 (define_insn "aarch64_wrffr"
1081   [(set (reg:VNx16BI FFR_REGNUM)
1082         (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one" "Dm, Upa"))
1083    (set (reg:VNx16BI FFRT_REGNUM)
1084         (unspec:VNx16BI [(match_dup 0)] UNSPEC_WRFFR))]
1085   "TARGET_SVE"
1086   "@
1087    setffr
1088    wrffr\t%0.b"
1089 )
1090
1091 ;; [L2 in the block comment above about FFR handling]
1092 ;;
1093 ;; Introduce a read from and write to the FFR in the current FFRT region,
1094 ;; so that the FFR value is live on entry to the region and so that the FFR
1095 ;; value visibly changes within the region.  This is used (possibly multiple
1096 ;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions.
1097 (define_insn "aarch64_update_ffr_for_load"
1098   [(set (reg:VNx16BI FFR_REGNUM)
1099         (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)
1100                          (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))]
1101   "TARGET_SVE"
1102   ""
1103   [(set_attr "type" "no_insn")]
1104 )
1105
1106 ;; [R1 in the block comment above about FFR handling]
1107 ;;
1108 ;; Notionally copy the FFR to the FFRT, so that the current FFR value
1109 ;; can be read from there by the RDFFR instructions below.  This acts
1110 ;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and
1111 ;; creates a natural dependency with earlier writes.
1112 (define_insn "aarch64_copy_ffr_to_ffrt"
1113   [(set (reg:VNx16BI FFRT_REGNUM)
1114         (reg:VNx16BI FFR_REGNUM))]
1115   "TARGET_SVE"
1116   ""
1117   [(set_attr "type" "no_insn")]
1118 )
1119
1120 ;; [R2 in the block comment above about FFR handling]
1121 ;;
1122 ;; Read the FFR via the FFRT.
1123 (define_insn "aarch64_rdffr"
1124   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1125         (reg:VNx16BI FFRT_REGNUM))]
1126   "TARGET_SVE"
1127   "rdffr\t%0.b"
1128 )
1129
1130 ;; Likewise with zero predication.
1131 (define_insn "aarch64_rdffr_z"
1132   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1133         (and:VNx16BI
1134           (reg:VNx16BI FFRT_REGNUM)
1135           (match_operand:VNx16BI 1 "register_operand" "Upa")))]
1136   "TARGET_SVE"
1137   "rdffr\t%0.b, %1/z"
1138 )
1139
1140 ;; Read the FFR to test for a fault, without using the predicate result.
1141 (define_insn "*aarch64_rdffr_z_ptest"
1142   [(set (reg:CC_NZC CC_REGNUM)
1143         (unspec:CC_NZC
1144           [(match_operand:VNx16BI 1 "register_operand" "Upa")
1145            (match_dup 1)
1146            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1147            (and:VNx16BI
1148              (reg:VNx16BI FFRT_REGNUM)
1149              (match_dup 1))]
1150           UNSPEC_PTEST))
1151    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1152   "TARGET_SVE"
1153   "rdffrs\t%0.b, %1/z"
1154 )
1155
1156 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1157 (define_insn "*aarch64_rdffr_ptest"
1158   [(set (reg:CC_NZC CC_REGNUM)
1159         (unspec:CC_NZC
1160           [(match_operand:VNx16BI 1 "register_operand" "Upa")
1161            (match_dup 1)
1162            (const_int SVE_KNOWN_PTRUE)
1163            (reg:VNx16BI FFRT_REGNUM)]
1164           UNSPEC_PTEST))
1165    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1166   "TARGET_SVE"
1167   "rdffrs\t%0.b, %1/z"
1168 )
1169
1170 ;; Read the FFR with zero predication and test the result.
1171 (define_insn "*aarch64_rdffr_z_cc"
1172   [(set (reg:CC_NZC CC_REGNUM)
1173         (unspec:CC_NZC
1174           [(match_operand:VNx16BI 1 "register_operand" "Upa")
1175            (match_dup 1)
1176            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1177            (and:VNx16BI
1178              (reg:VNx16BI FFRT_REGNUM)
1179              (match_dup 1))]
1180           UNSPEC_PTEST))
1181    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1182         (and:VNx16BI
1183           (reg:VNx16BI FFRT_REGNUM)
1184           (match_dup 1)))]
1185   "TARGET_SVE"
1186   "rdffrs\t%0.b, %1/z"
1187 )
1188
1189 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1190 (define_insn "*aarch64_rdffr_cc"
1191   [(set (reg:CC_NZC CC_REGNUM)
1192         (unspec:CC_NZC
1193           [(match_operand:VNx16BI 1 "register_operand" "Upa")
1194            (match_dup 1)
1195            (const_int SVE_KNOWN_PTRUE)
1196            (reg:VNx16BI FFRT_REGNUM)]
1197           UNSPEC_PTEST))
1198    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1199         (reg:VNx16BI FFRT_REGNUM))]
1200   "TARGET_SVE"
1201   "rdffrs\t%0.b, %1/z"
1202 )
1203
1204 ;; [R3 in the block comment above about FFR handling]
1205 ;;
1206 ;; Arbitrarily update the FFRT after a read from the FFR.  This acts as
1207 ;; a scheduling barrier for later LDFF1 and LDNF1 instructions.
1208 (define_insn "aarch64_update_ffrt"
1209   [(set (reg:VNx16BI FFRT_REGNUM)
1210         (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))]
1211   "TARGET_SVE"
1212   ""
1213   [(set_attr "type" "no_insn")]
1214 )
1215
1216 ;; =========================================================================
1217 ;; == Loads
1218 ;; =========================================================================
1219
1220 ;; -------------------------------------------------------------------------
1221 ;; ---- Normal contiguous loads
1222 ;; -------------------------------------------------------------------------
1223 ;; Includes contiguous forms of:
1224 ;; - LD1B
1225 ;; - LD1D
1226 ;; - LD1H
1227 ;; - LD1W
1228 ;; - LD2B
1229 ;; - LD2D
1230 ;; - LD2H
1231 ;; - LD2W
1232 ;; - LD3B
1233 ;; - LD3D
1234 ;; - LD3H
1235 ;; - LD3W
1236 ;; - LD4B
1237 ;; - LD4D
1238 ;; - LD4H
1239 ;; - LD4W
1240 ;; -------------------------------------------------------------------------
1241
1242 ;; Predicated LD1.
1243 (define_insn "maskload<mode><vpred>"
1244   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1245         (unspec:SVE_ALL
1246           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1247            (match_operand:SVE_ALL 1 "memory_operand" "m")]
1248           UNSPEC_LD1_SVE))]
1249   "TARGET_SVE"
1250   "ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
1251 )
1252
1253 ;; Unpredicated LD[234].
1254 (define_expand "vec_load_lanes<mode><vsingle>"
1255   [(set (match_operand:SVE_STRUCT 0 "register_operand")
1256         (unspec:SVE_STRUCT
1257           [(match_dup 2)
1258            (match_operand:SVE_STRUCT 1 "memory_operand")]
1259           UNSPEC_LDN))]
1260   "TARGET_SVE"
1261   {
1262     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1263   }
1264 )
1265
1266 ;; Predicated LD[234].
1267 (define_insn "vec_mask_load_lanes<mode><vsingle>"
1268   [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
1269         (unspec:SVE_STRUCT
1270           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1271            (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
1272           UNSPEC_LDN))]
1273   "TARGET_SVE"
1274   "ld<vector_count><Vesize>\t%0, %2/z, %1"
1275 )
1276
1277 ;; -------------------------------------------------------------------------
1278 ;; ---- Extending contiguous loads
1279 ;; -------------------------------------------------------------------------
1280 ;; Includes contiguous forms of:
1281 ;; LD1B
1282 ;; LD1H
1283 ;; LD1SB
1284 ;; LD1SH
1285 ;; LD1SW
1286 ;; LD1W
1287 ;; -------------------------------------------------------------------------
1288
1289 ;; Predicated load and extend, with 8 elements per 128-bit block.
1290 (define_insn_and_rewrite "@aarch64_load<SVE_PRED_LOAD:pred_load>_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1291   [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1292         (unspec:SVE_HSDI
1293           [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1294            (ANY_EXTEND:SVE_HSDI
1295              (unspec:SVE_PARTIAL_I
1296                [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1297                 (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
1298                SVE_PRED_LOAD))]
1299           UNSPEC_PRED_X))]
1300   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1301   "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1302   "&& !CONSTANT_P (operands[3])"
1303   {
1304     operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1305   }
1306 )
1307
1308 ;; -------------------------------------------------------------------------
1309 ;; ---- First-faulting contiguous loads
1310 ;; -------------------------------------------------------------------------
1311 ;; Includes contiguous forms of:
1312 ;; - LDFF1B
1313 ;; - LDFF1D
1314 ;; - LDFF1H
1315 ;; - LDFF1W
1316 ;; - LDNF1B
1317 ;; - LDNF1D
1318 ;; - LDNF1H
1319 ;; - LDNF1W
1320 ;; -------------------------------------------------------------------------
1321
1322 ;; Contiguous non-extending first-faulting or non-faulting loads.
1323 (define_insn "@aarch64_ld<fn>f1<mode>"
1324   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1325         (unspec:SVE_FULL
1326           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1327            (match_operand:SVE_FULL 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1328            (reg:VNx16BI FFRT_REGNUM)]
1329           SVE_LDFF1_LDNF1))]
1330   "TARGET_SVE"
1331   "ld<fn>f1<Vesize>\t%0.<Vetype>, %2/z, %1"
1332 )
1333
1334 ;; -------------------------------------------------------------------------
1335 ;; ---- First-faulting extending contiguous loads
1336 ;; -------------------------------------------------------------------------
1337 ;; Includes contiguous forms of:
1338 ;; - LDFF1B
1339 ;; - LDFF1H
1340 ;; - LDFF1SB
1341 ;; - LDFF1SH
1342 ;; - LDFF1SW
1343 ;; - LDFF1W
1344 ;; - LDNF1B
1345 ;; - LDNF1H
1346 ;; - LDNF1SB
1347 ;; - LDNF1SH
1348 ;; - LDNF1SW
1349 ;; - LDNF1W
1350 ;; -------------------------------------------------------------------------
1351
1352 ;; Predicated first-faulting or non-faulting load and extend.
1353 (define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1354   [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1355         (unspec:SVE_HSDI
1356           [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1357            (ANY_EXTEND:SVE_HSDI
1358              (unspec:SVE_PARTIAL_I
1359                [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1360                 (match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1361                 (reg:VNx16BI FFRT_REGNUM)]
1362                SVE_LDFF1_LDNF1))]
1363           UNSPEC_PRED_X))]
1364   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1365   "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1366   "&& !CONSTANT_P (operands[3])"
1367   {
1368     operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1369   }
1370 )
1371
1372 ;; -------------------------------------------------------------------------
1373 ;; ---- Non-temporal contiguous loads
1374 ;; -------------------------------------------------------------------------
1375 ;; Includes:
1376 ;; - LDNT1B
1377 ;; - LDNT1D
1378 ;; - LDNT1H
1379 ;; - LDNT1W
1380 ;; -------------------------------------------------------------------------
1381
1382 ;; Predicated contiguous non-temporal load.
1383 (define_insn "@aarch64_ldnt1<mode>"
1384   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1385         (unspec:SVE_FULL
1386           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1387            (match_operand:SVE_FULL 1 "memory_operand" "m")]
1388           UNSPEC_LDNT1_SVE))]
1389   "TARGET_SVE"
1390   "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1"
1391 )
1392
1393 ;; -------------------------------------------------------------------------
1394 ;; ---- Normal gather loads
1395 ;; -------------------------------------------------------------------------
1396 ;; Includes gather forms of:
1397 ;; - LD1D
1398 ;; - LD1W
1399 ;; -------------------------------------------------------------------------
1400
1401 ;; Unpredicated gather loads.
1402 (define_expand "gather_load<mode><v_int_container>"
1403   [(set (match_operand:SVE_24 0 "register_operand")
1404         (unspec:SVE_24
1405           [(match_dup 5)
1406            (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1407            (match_operand:<V_INT_CONTAINER> 2 "register_operand")
1408            (match_operand:DI 3 "const_int_operand")
1409            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1410            (mem:BLK (scratch))]
1411           UNSPEC_LD1_GATHER))]
1412   "TARGET_SVE"
1413   {
1414     operands[5] = aarch64_ptrue_reg (<VPRED>mode);
1415   }
1416 )
1417
1418 ;; Predicated gather loads for 32-bit elements.  Operand 3 is true for
1419 ;; unsigned extension and false for signed extension.
1420 (define_insn "mask_gather_load<mode><v_int_container>"
1421   [(set (match_operand:SVE_4 0 "register_operand" "=w, w, w, w, w, w")
1422         (unspec:SVE_4
1423           [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1424            (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
1425            (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1426            (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
1427            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1428            (mem:BLK (scratch))]
1429           UNSPEC_LD1_GATHER))]
1430   "TARGET_SVE"
1431   "@
1432    ld1<Vesize>\t%0.s, %5/z, [%2.s]
1433    ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1434    ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1435    ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1436    ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1437    ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1438 )
1439
1440 ;; Predicated gather loads for 64-bit elements.  The value of operand 3
1441 ;; doesn't matter in this case.
1442 (define_insn "mask_gather_load<mode><v_int_container>"
1443   [(set (match_operand:SVE_2 0 "register_operand" "=w, w, w, w")
1444         (unspec:SVE_2
1445           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1446            (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
1447            (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1448            (match_operand:DI 3 "const_int_operand")
1449            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
1450            (mem:BLK (scratch))]
1451           UNSPEC_LD1_GATHER))]
1452   "TARGET_SVE"
1453   "@
1454    ld1<Vesize>\t%0.d, %5/z, [%2.d]
1455    ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1456    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1457    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1458 )
1459
1460 ;; Likewise, but with the offset being extended from 32 bits.
1461 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked"
1462   [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1463         (unspec:SVE_2
1464           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1465            (match_operand:DI 1 "register_operand" "rk, rk")
1466            (unspec:VNx2DI
1467              [(match_operand 6)
1468               (ANY_EXTEND:VNx2DI
1469                 (match_operand:VNx2SI 2 "register_operand" "w, w"))]
1470              UNSPEC_PRED_X)
1471            (match_operand:DI 3 "const_int_operand")
1472            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1473            (mem:BLK (scratch))]
1474           UNSPEC_LD1_GATHER))]
1475   "TARGET_SVE"
1476   "@
1477    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw]
1478    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]"
1479   "&& !CONSTANT_P (operands[6])"
1480   {
1481     operands[6] = CONSTM1_RTX (VNx2BImode);
1482   }
1483 )
1484
1485 ;; Likewise, but with the offset being truncated to 32 bits and then
1486 ;; sign-extended.
1487 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_sxtw"
1488   [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1489         (unspec:SVE_2
1490           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1491            (match_operand:DI 1 "register_operand" "rk, rk")
1492            (unspec:VNx2DI
1493              [(match_operand 6)
1494               (sign_extend:VNx2DI
1495                 (truncate:VNx2SI
1496                   (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1497              UNSPEC_PRED_X)
1498            (match_operand:DI 3 "const_int_operand")
1499            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1500            (mem:BLK (scratch))]
1501           UNSPEC_LD1_GATHER))]
1502   "TARGET_SVE"
1503   "@
1504    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1505    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1506   "&& !CONSTANT_P (operands[6])"
1507   {
1508     operands[6] = CONSTM1_RTX (VNx2BImode);
1509   }
1510 )
1511
1512 ;; Likewise, but with the offset being truncated to 32 bits and then
1513 ;; zero-extended.
1514 (define_insn "*mask_gather_load<mode><v_int_container>_uxtw"
1515   [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1516         (unspec:SVE_2
1517           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1518            (match_operand:DI 1 "register_operand" "rk, rk")
1519            (and:VNx2DI
1520              (match_operand:VNx2DI 2 "register_operand" "w, w")
1521              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1522            (match_operand:DI 3 "const_int_operand")
1523            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1524            (mem:BLK (scratch))]
1525           UNSPEC_LD1_GATHER))]
1526   "TARGET_SVE"
1527   "@
1528    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1529    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1530 )
1531
1532 ;; -------------------------------------------------------------------------
1533 ;; ---- Extending gather loads
1534 ;; -------------------------------------------------------------------------
1535 ;; Includes gather forms of:
1536 ;; - LD1B
1537 ;; - LD1H
1538 ;; - LD1SB
1539 ;; - LD1SH
1540 ;; - LD1SW
1541 ;; - LD1W
1542 ;; -------------------------------------------------------------------------
1543
1544 ;; Predicated extending gather loads for 32-bit elements.  Operand 3 is
1545 ;; true for unsigned extension and false for signed extension.
1546 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>"
1547   [(set (match_operand:SVE_4HSI 0 "register_operand" "=w, w, w, w, w, w")
1548         (unspec:SVE_4HSI
1549           [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1550            (ANY_EXTEND:SVE_4HSI
1551              (unspec:SVE_4BHI
1552                [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1553                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_4BHI:Vesize>" "Z, vg<SVE_4BHI:Vesize>, rk, rk, rk, rk")
1554                 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1555                 (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
1556                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1557                 (mem:BLK (scratch))]
1558                UNSPEC_LD1_GATHER))]
1559           UNSPEC_PRED_X))]
1560   "TARGET_SVE && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0"
1561   "@
1562    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s]
1563    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1564    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1565    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1566    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1567    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1568   "&& !CONSTANT_P (operands[6])"
1569   {
1570     operands[6] = CONSTM1_RTX (VNx4BImode);
1571   }
1572 )
1573
1574 ;; Predicated extending gather loads for 64-bit elements.  The value of
1575 ;; operand 3 doesn't matter in this case.
1576 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>"
1577   [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w, w, w")
1578         (unspec:SVE_2HSDI
1579           [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1580            (ANY_EXTEND:SVE_2HSDI
1581              (unspec:SVE_2BHSI
1582                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1583                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_2BHSI:Vesize>" "Z, vg<SVE_2BHSI:Vesize>, rk, rk")
1584                 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1585                 (match_operand:DI 3 "const_int_operand")
1586                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, Ui1, Ui1, i")
1587                 (mem:BLK (scratch))]
1588                UNSPEC_LD1_GATHER))]
1589           UNSPEC_PRED_X))]
1590   "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1591   "@
1592    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d]
1593    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1594    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d]
1595    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1596   "&& !CONSTANT_P (operands[6])"
1597   {
1598     operands[6] = CONSTM1_RTX (VNx2BImode);
1599   }
1600 )
1601
1602 ;; Likewise, but with the offset being extended from 32 bits.
1603 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked"
1604   [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1605         (unspec:SVE_2HSDI
1606           [(match_operand 6)
1607            (ANY_EXTEND:SVE_2HSDI
1608              (unspec:SVE_2BHSI
1609                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1610                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1611                 (unspec:VNx2DI
1612                   [(match_operand 7)
1613                    (ANY_EXTEND2:VNx2DI
1614                      (match_operand:VNx2SI 2 "register_operand" "w, w"))]
1615                   UNSPEC_PRED_X)
1616                 (match_operand:DI 3 "const_int_operand")
1617                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1618                 (mem:BLK (scratch))]
1619                UNSPEC_LD1_GATHER))]
1620           UNSPEC_PRED_X))]
1621   "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1622   "@
1623    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw]
1624    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]"
1625   "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1626   {
1627     operands[6] = CONSTM1_RTX (VNx2BImode);
1628     operands[7] = CONSTM1_RTX (VNx2BImode);
1629   }
1630 )
1631
1632 ;; Likewise, but with the offset being truncated to 32 bits and then
1633 ;; sign-extended.
1634 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw"
1635   [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1636         (unspec:SVE_2HSDI
1637           [(match_operand 6)
1638            (ANY_EXTEND:SVE_2HSDI
1639              (unspec:SVE_2BHSI
1640                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1641                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1642                 (unspec:VNx2DI
1643                   [(match_operand 7)
1644                    (sign_extend:VNx2DI
1645                      (truncate:VNx2SI
1646                        (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1647                   UNSPEC_PRED_X)
1648                 (match_operand:DI 3 "const_int_operand")
1649                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1650                 (mem:BLK (scratch))]
1651                UNSPEC_LD1_GATHER))]
1652           UNSPEC_PRED_X))]
1653   "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1654   "@
1655    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1656    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1657   "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1658   {
1659     operands[6] = CONSTM1_RTX (VNx2BImode);
1660     operands[7] = CONSTM1_RTX (VNx2BImode);
1661   }
1662 )
1663
1664 ;; Likewise, but with the offset being truncated to 32 bits and then
1665 ;; zero-extended.
1666 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw"
1667   [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1668         (unspec:SVE_2HSDI
1669           [(match_operand 7)
1670            (ANY_EXTEND:SVE_2HSDI
1671              (unspec:SVE_2BHSI
1672                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1673                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1674                 (and:VNx2DI
1675                   (match_operand:VNx2DI 2 "register_operand" "w, w")
1676                   (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1677                 (match_operand:DI 3 "const_int_operand")
1678                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1679                 (mem:BLK (scratch))]
1680                UNSPEC_LD1_GATHER))]
1681           UNSPEC_PRED_X))]
1682   "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1683   "@
1684    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1685    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1686   "&& !CONSTANT_P (operands[7])"
1687   {
1688     operands[7] = CONSTM1_RTX (VNx2BImode);
1689   }
1690 )
1691
1692 ;; -------------------------------------------------------------------------
1693 ;; ---- First-faulting gather loads
1694 ;; -------------------------------------------------------------------------
1695 ;; Includes gather forms of:
1696 ;; - LDFF1D
1697 ;; - LDFF1W
1698 ;; -------------------------------------------------------------------------
1699
1700 ;; Predicated first-faulting gather loads for 32-bit elements.  Operand
1701 ;; 3 is true for unsigned extension and false for signed extension.
1702 (define_insn "@aarch64_ldff1_gather<mode>"
1703   [(set (match_operand:SVE_FULL_S 0 "register_operand" "=w, w, w, w, w, w")
1704         (unspec:SVE_FULL_S
1705           [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1706            (match_operand:DI 1 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk")
1707            (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1708            (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1709            (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i")
1710            (mem:BLK (scratch))
1711            (reg:VNx16BI FFRT_REGNUM)]
1712           UNSPEC_LDFF1_GATHER))]
1713   "TARGET_SVE"
1714   "@
1715    ldff1w\t%0.s, %5/z, [%2.s]
1716    ldff1w\t%0.s, %5/z, [%2.s, #%1]
1717    ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw]
1718    ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw]
1719    ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1720    ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1721 )
1722
1723 ;; Predicated first-faulting gather loads for 64-bit elements.  The value
1724 ;; of operand 3 doesn't matter in this case.
1725 (define_insn "@aarch64_ldff1_gather<mode>"
1726   [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w, w, w")
1727         (unspec:SVE_FULL_D
1728           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1729            (match_operand:DI 1 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk")
1730            (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1731            (match_operand:DI 3 "const_int_operand")
1732            (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i")
1733            (mem:BLK (scratch))
1734            (reg:VNx16BI FFRT_REGNUM)]
1735           UNSPEC_LDFF1_GATHER))]
1736   "TARGET_SVE"
1737   "@
1738    ldff1d\t%0.d, %5/z, [%2.d]
1739    ldff1d\t%0.d, %5/z, [%2.d, #%1]
1740    ldff1d\t%0.d, %5/z, [%1, %2.d]
1741    ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1742 )
1743
1744 ;; Likewise, but with the offset being sign-extended from 32 bits.
1745 (define_insn_and_rewrite "*aarch64_ldff1_gather<mode>_sxtw"
1746   [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1747         (unspec:SVE_FULL_D
1748           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1749            (match_operand:DI 1 "register_operand" "rk, rk")
1750            (unspec:VNx2DI
1751              [(match_operand 6)
1752               (sign_extend:VNx2DI
1753                 (truncate:VNx2SI
1754                   (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1755              UNSPEC_PRED_X)
1756            (match_operand:DI 3 "const_int_operand")
1757            (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1758            (mem:BLK (scratch))
1759            (reg:VNx16BI FFRT_REGNUM)]
1760           UNSPEC_LDFF1_GATHER))]
1761   "TARGET_SVE"
1762   "@
1763    ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw]
1764    ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1765   "&& !CONSTANT_P (operands[6])"
1766   {
1767     operands[6] = CONSTM1_RTX (VNx2BImode);
1768   }
1769 )
1770
1771 ;; Likewise, but with the offset being zero-extended from 32 bits.
1772 (define_insn "*aarch64_ldff1_gather<mode>_uxtw"
1773   [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1774         (unspec:SVE_FULL_D
1775           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1776            (match_operand:DI 1 "register_operand" "rk, rk")
1777            (and:VNx2DI
1778              (match_operand:VNx2DI 2 "register_operand" "w, w")
1779              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1780            (match_operand:DI 3 "const_int_operand")
1781            (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1782            (mem:BLK (scratch))
1783            (reg:VNx16BI FFRT_REGNUM)]
1784           UNSPEC_LDFF1_GATHER))]
1785   "TARGET_SVE"
1786   "@
1787    ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw]
1788    ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1789 )
1790
1791 ;; -------------------------------------------------------------------------
1792 ;; ---- First-faulting extending gather loads
1793 ;; -------------------------------------------------------------------------
1794 ;; Includes gather forms of:
1795 ;; - LDFF1B
1796 ;; - LDFF1H
1797 ;; - LDFF1SB
1798 ;; - LDFF1SH
1799 ;; - LDFF1SW
1800 ;; - LDFF1W
1801 ;; -------------------------------------------------------------------------
1802
1803 ;; Predicated extending first-faulting gather loads for 32-bit elements.
1804 ;; Operand 3 is true for unsigned extension and false for signed extension.
1805 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
1806   [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w")
1807         (unspec:VNx4_WIDE
1808           [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1809            (ANY_EXTEND:VNx4_WIDE
1810              (unspec:VNx4_NARROW
1811                [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1812                 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
1813                 (match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w")
1814                 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1815                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1816                 (mem:BLK (scratch))
1817                 (reg:VNx16BI FFRT_REGNUM)]
1818                UNSPEC_LDFF1_GATHER))]
1819           UNSPEC_PRED_X))]
1820   "TARGET_SVE"
1821   "@
1822    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s]
1823    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1824    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1825    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1826    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1827    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1828   "&& !CONSTANT_P (operands[6])"
1829   {
1830     operands[6] = CONSTM1_RTX (VNx4BImode);
1831   }
1832 )
1833
1834 ;; Predicated extending first-faulting gather loads for 64-bit elements.
1835 ;; The value of operand 3 doesn't matter in this case.
1836 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
1837   [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w")
1838         (unspec:VNx2_WIDE
1839           [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1840            (ANY_EXTEND:VNx2_WIDE
1841              (unspec:VNx2_NARROW
1842                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1843                 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
1844                 (match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w")
1845                 (match_operand:DI 3 "const_int_operand")
1846                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
1847                 (mem:BLK (scratch))
1848                 (reg:VNx16BI FFRT_REGNUM)]
1849                UNSPEC_LDFF1_GATHER))]
1850           UNSPEC_PRED_X))]
1851   "TARGET_SVE"
1852   "@
1853    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d]
1854    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1855    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d]
1856    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1857   "&& !CONSTANT_P (operands[6])"
1858   {
1859     operands[6] = CONSTM1_RTX (VNx2BImode);
1860   }
1861 )
1862
1863 ;; Likewise, but with the offset being sign-extended from 32 bits.
1864 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw"
1865   [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
1866         (unspec:VNx2_WIDE
1867           [(match_operand 6)
1868            (ANY_EXTEND:VNx2_WIDE
1869              (unspec:VNx2_NARROW
1870                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1871                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1872                 (unspec:VNx2DI
1873                   [(match_operand 7)
1874                    (sign_extend:VNx2DI
1875                      (truncate:VNx2SI
1876                        (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1877                   UNSPEC_PRED_X)
1878                 (match_operand:DI 3 "const_int_operand")
1879                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1880                 (mem:BLK (scratch))
1881                 (reg:VNx16BI FFRT_REGNUM)]
1882                UNSPEC_LDFF1_GATHER))]
1883           UNSPEC_PRED_X))]
1884   "TARGET_SVE"
1885   "@
1886    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1887    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1888   "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1889   {
1890     operands[6] = CONSTM1_RTX (VNx2BImode);
1891     operands[7] = CONSTM1_RTX (VNx2BImode);
1892   }
1893 )
1894
1895 ;; Likewise, but with the offset being zero-extended from 32 bits.
1896 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw"
1897   [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
1898         (unspec:VNx2_WIDE
1899           [(match_operand 7)
1900            (ANY_EXTEND:VNx2_WIDE
1901              (unspec:VNx2_NARROW
1902                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1903                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1904                 (and:VNx2DI
1905                   (match_operand:VNx2DI 2 "register_operand" "w, w")
1906                   (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1907                 (match_operand:DI 3 "const_int_operand")
1908                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1909                 (mem:BLK (scratch))
1910                 (reg:VNx16BI FFRT_REGNUM)]
1911                UNSPEC_LDFF1_GATHER))]
1912           UNSPEC_PRED_X))]
1913   "TARGET_SVE"
1914   "@
1915    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1916    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1917   "&& !CONSTANT_P (operands[7])"
1918   {
1919     operands[7] = CONSTM1_RTX (VNx2BImode);
1920   }
1921 )
1922
1923 ;; =========================================================================
1924 ;; == Prefetches
1925 ;; =========================================================================
1926
1927 ;; -------------------------------------------------------------------------
1928 ;; ---- Contiguous prefetches
1929 ;; -------------------------------------------------------------------------
1930 ;; Includes contiguous forms of:
1931 ;; - PRFB
1932 ;; - PRFD
1933 ;; - PRFH
1934 ;; - PRFW
1935 ;; -------------------------------------------------------------------------
1936
1937 ;; Contiguous predicated prefetches.  Operand 2 gives the real prefetch
1938 ;; operation (as an svprfop), with operands 3 and 4 providing distilled
1939 ;; information.
1940 (define_insn "@aarch64_sve_prefetch<mode>"
1941   [(prefetch (unspec:DI
1942                [(match_operand:<VPRED> 0 "register_operand" "Upl")
1943                 (match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP<Vesize>")
1944                 (match_operand:DI 2 "const_int_operand")]
1945                UNSPEC_SVE_PREFETCH)
1946              (match_operand:DI 3 "const_int_operand")
1947              (match_operand:DI 4 "const_int_operand"))]
1948   "TARGET_SVE"
1949   {
1950     operands[1] = gen_rtx_MEM (<MODE>mode, operands[1]);
1951     return aarch64_output_sve_prefetch ("prf<Vesize>", operands[2], "%0, %1");
1952   }
1953 )
1954
1955 ;; -------------------------------------------------------------------------
1956 ;; ---- Gather prefetches
1957 ;; -------------------------------------------------------------------------
1958 ;; Includes gather forms of:
1959 ;; - PRFB
1960 ;; - PRFD
1961 ;; - PRFH
1962 ;; - PRFW
1963 ;; -------------------------------------------------------------------------
1964
1965 ;; Predicated gather prefetches for 32-bit bases and offsets.  The operands
1966 ;; are:
1967 ;; 0: the governing predicate
1968 ;; 1: the scalar component of the address
1969 ;; 2: the vector component of the address
1970 ;; 3: 1 for zero extension, 0 for sign extension
1971 ;; 4: the scale multiplier
1972 ;; 5: a vector zero that identifies the mode of data being accessed
1973 ;; 6: the prefetch operator (an svprfop)
1974 ;; 7: the normal RTL prefetch rw flag
1975 ;; 8: the normal RTL prefetch locality value
1976 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>"
1977   [(prefetch (unspec:DI
1978                [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1979                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk, rk, rk")
1980                 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w")
1981                 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1982                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1983                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1984                 (match_operand:DI 6 "const_int_operand")]
1985                UNSPEC_SVE_PREFETCH_GATHER)
1986              (match_operand:DI 7 "const_int_operand")
1987              (match_operand:DI 8 "const_int_operand"))]
1988   "TARGET_SVE"
1989   {
1990     static const char *const insns[][2] = {
1991       "prf<SVE_FULL_I:Vesize>", "%0, [%2.s]",
1992       "prf<SVE_FULL_I:Vesize>", "%0, [%2.s, #%1]",
1993       "prfb", "%0, [%1, %2.s, sxtw]",
1994       "prfb", "%0, [%1, %2.s, uxtw]",
1995       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, sxtw %p4]",
1996       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, uxtw %p4]"
1997     };
1998     const char *const *parts = insns[which_alternative];
1999     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2000   }
2001 )
2002
2003 ;; Predicated gather prefetches for 64-bit elements.  The value of operand 3
2004 ;; doesn't matter in this case.
2005 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>"
2006   [(prefetch (unspec:DI
2007                [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl")
2008                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk")
2009                 (match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w")
2010                 (match_operand:DI 3 "const_int_operand")
2011                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, i")
2012                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2013                 (match_operand:DI 6 "const_int_operand")]
2014                UNSPEC_SVE_PREFETCH_GATHER)
2015              (match_operand:DI 7 "const_int_operand")
2016              (match_operand:DI 8 "const_int_operand"))]
2017   "TARGET_SVE"
2018   {
2019     static const char *const insns[][2] = {
2020       "prf<SVE_FULL_I:Vesize>", "%0, [%2.d]",
2021       "prf<SVE_FULL_I:Vesize>", "%0, [%2.d, #%1]",
2022       "prfb", "%0, [%1, %2.d]",
2023       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, lsl %p4]"
2024     };
2025     const char *const *parts = insns[which_alternative];
2026     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2027   }
2028 )
2029
2030 ;; Likewise, but with the offset being sign-extended from 32 bits.
2031 (define_insn_and_rewrite "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw"
2032   [(prefetch (unspec:DI
2033                [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
2034                 (match_operand:DI 1 "register_operand" "rk, rk")
2035                 (unspec:VNx2DI_ONLY
2036                   [(match_operand 9)
2037                    (sign_extend:VNx2DI
2038                      (truncate:VNx2SI
2039                        (match_operand:VNx2DI 2 "register_operand" "w, w")))]
2040                   UNSPEC_PRED_X)
2041                 (match_operand:DI 3 "const_int_operand")
2042                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
2043                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2044                 (match_operand:DI 6 "const_int_operand")]
2045                UNSPEC_SVE_PREFETCH_GATHER)
2046              (match_operand:DI 7 "const_int_operand")
2047              (match_operand:DI 8 "const_int_operand"))]
2048   "TARGET_SVE"
2049   {
2050     static const char *const insns[][2] = {
2051       "prfb", "%0, [%1, %2.d, sxtw]",
2052       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, sxtw %p4]"
2053     };
2054     const char *const *parts = insns[which_alternative];
2055     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2056   }
2057   "&& !rtx_equal_p (operands[0], operands[9])"
2058   {
2059     operands[9] = copy_rtx (operands[0]);
2060   }
2061 )
2062
2063 ;; Likewise, but with the offset being zero-extended from 32 bits.
2064 (define_insn "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw"
2065   [(prefetch (unspec:DI
2066                [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
2067                 (match_operand:DI 1 "register_operand" "rk, rk")
2068                 (and:VNx2DI_ONLY
2069                   (match_operand:VNx2DI 2 "register_operand" "w, w")
2070                   (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate"))
2071                 (match_operand:DI 3 "const_int_operand")
2072                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
2073                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2074                 (match_operand:DI 6 "const_int_operand")]
2075                UNSPEC_SVE_PREFETCH_GATHER)
2076              (match_operand:DI 7 "const_int_operand")
2077              (match_operand:DI 8 "const_int_operand"))]
2078   "TARGET_SVE"
2079   {
2080     static const char *const insns[][2] = {
2081       "prfb", "%0, [%1, %2.d, uxtw]",
2082       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, uxtw %p4]"
2083     };
2084     const char *const *parts = insns[which_alternative];
2085     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2086   }
2087 )
2088
2089 ;; =========================================================================
2090 ;; == Stores
2091 ;; =========================================================================
2092
2093 ;; -------------------------------------------------------------------------
2094 ;; ---- Normal contiguous stores
2095 ;; -------------------------------------------------------------------------
2096 ;; Includes contiguous forms of:
2097 ;; - ST1B
2098 ;; - ST1D
2099 ;; - ST1H
2100 ;; - ST1W
2101 ;; - ST2B
2102 ;; - ST2D
2103 ;; - ST2H
2104 ;; - ST2W
2105 ;; - ST3B
2106 ;; - ST3D
2107 ;; - ST3H
2108 ;; - ST3W
2109 ;; - ST4B
2110 ;; - ST4D
2111 ;; - ST4H
2112 ;; - ST4W
2113 ;; -------------------------------------------------------------------------
2114
2115 ;; Predicated ST1.
2116 (define_insn "maskstore<mode><vpred>"
2117   [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
2118         (unspec:SVE_ALL
2119           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2120            (match_operand:SVE_ALL 1 "register_operand" "w")
2121            (match_dup 0)]
2122           UNSPEC_ST1_SVE))]
2123   "TARGET_SVE"
2124   "st1<Vesize>\t%1.<Vctype>, %2, %0"
2125 )
2126
2127 ;; Unpredicated ST[234].  This is always a full update, so the dependence
2128 ;; on the old value of the memory location (via (match_dup 0)) is redundant.
2129 ;; There doesn't seem to be any obvious benefit to treating the all-true
2130 ;; case differently though.  In particular, it's very unlikely that we'll
2131 ;; only find out during RTL that a store_lanes is dead.
2132 (define_expand "vec_store_lanes<mode><vsingle>"
2133   [(set (match_operand:SVE_STRUCT 0 "memory_operand")
2134         (unspec:SVE_STRUCT
2135           [(match_dup 2)
2136            (match_operand:SVE_STRUCT 1 "register_operand")
2137            (match_dup 0)]
2138           UNSPEC_STN))]
2139   "TARGET_SVE"
2140   {
2141     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2142   }
2143 )
2144
2145 ;; Predicated ST[234].
2146 (define_insn "vec_mask_store_lanes<mode><vsingle>"
2147   [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
2148         (unspec:SVE_STRUCT
2149           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2150            (match_operand:SVE_STRUCT 1 "register_operand" "w")
2151            (match_dup 0)]
2152           UNSPEC_STN))]
2153   "TARGET_SVE"
2154   "st<vector_count><Vesize>\t%1, %2, %0"
2155 )
2156
2157 ;; -------------------------------------------------------------------------
2158 ;; ---- Truncating contiguous stores
2159 ;; -------------------------------------------------------------------------
2160 ;; Includes:
2161 ;; - ST1B
2162 ;; - ST1H
2163 ;; - ST1W
2164 ;; -------------------------------------------------------------------------
2165
2166 ;; Predicated truncate and store, with 8 elements per 128-bit block.
2167 (define_insn "@aarch64_store_trunc<VNx8_NARROW:mode><VNx8_WIDE:mode>"
2168   [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m")
2169         (unspec:VNx8_NARROW
2170           [(match_operand:VNx8BI 2 "register_operand" "Upl")
2171            (truncate:VNx8_NARROW
2172              (match_operand:VNx8_WIDE 1 "register_operand" "w"))
2173            (match_dup 0)]
2174           UNSPEC_ST1_SVE))]
2175   "TARGET_SVE"
2176   "st1<VNx8_NARROW:Vesize>\t%1.<VNx8_WIDE:Vetype>, %2, %0"
2177 )
2178
2179 ;; Predicated truncate and store, with 4 elements per 128-bit block.
2180 (define_insn "@aarch64_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2181   [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m")
2182         (unspec:VNx4_NARROW
2183           [(match_operand:VNx4BI 2 "register_operand" "Upl")
2184            (truncate:VNx4_NARROW
2185              (match_operand:VNx4_WIDE 1 "register_operand" "w"))
2186            (match_dup 0)]
2187           UNSPEC_ST1_SVE))]
2188   "TARGET_SVE"
2189   "st1<VNx4_NARROW:Vesize>\t%1.<VNx4_WIDE:Vetype>, %2, %0"
2190 )
2191
2192 ;; Predicated truncate and store, with 2 elements per 128-bit block.
2193 (define_insn "@aarch64_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2194   [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m")
2195         (unspec:VNx2_NARROW
2196           [(match_operand:VNx2BI 2 "register_operand" "Upl")
2197            (truncate:VNx2_NARROW
2198              (match_operand:VNx2_WIDE 1 "register_operand" "w"))
2199            (match_dup 0)]
2200           UNSPEC_ST1_SVE))]
2201   "TARGET_SVE"
2202   "st1<VNx2_NARROW:Vesize>\t%1.<VNx2_WIDE:Vetype>, %2, %0"
2203 )
2204
2205 ;; -------------------------------------------------------------------------
2206 ;; ---- Non-temporal contiguous stores
2207 ;; -------------------------------------------------------------------------
2208 ;; Includes:
2209 ;; - STNT1B
2210 ;; - STNT1D
2211 ;; - STNT1H
2212 ;; - STNT1W
2213 ;; -------------------------------------------------------------------------
2214
2215 (define_insn "@aarch64_stnt1<mode>"
2216   [(set (match_operand:SVE_FULL 0 "memory_operand" "+m")
2217         (unspec:SVE_FULL
2218           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2219            (match_operand:SVE_FULL 1 "register_operand" "w")
2220            (match_dup 0)]
2221           UNSPEC_STNT1_SVE))]
2222   "TARGET_SVE"
2223   "stnt1<Vesize>\t%1.<Vetype>, %2, %0"
2224 )
2225
2226 ;; -------------------------------------------------------------------------
2227 ;; ---- Normal scatter stores
2228 ;; -------------------------------------------------------------------------
2229 ;; Includes scatter forms of:
2230 ;; - ST1D
2231 ;; - ST1W
2232 ;; -------------------------------------------------------------------------
2233
2234 ;; Unpredicated scatter stores.
2235 (define_expand "scatter_store<mode><v_int_container>"
2236   [(set (mem:BLK (scratch))
2237         (unspec:BLK
2238           [(match_dup 5)
2239            (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2240            (match_operand:<V_INT_CONTAINER> 1 "register_operand")
2241            (match_operand:DI 2 "const_int_operand")
2242            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2243            (match_operand:SVE_24 4 "register_operand")]
2244           UNSPEC_ST1_SCATTER))]
2245   "TARGET_SVE"
2246   {
2247     operands[5] = aarch64_ptrue_reg (<VPRED>mode);
2248   }
2249 )
2250
2251 ;; Predicated scatter stores for 32-bit elements.  Operand 2 is true for
2252 ;; unsigned extension and false for signed extension.
2253 (define_insn "mask_scatter_store<mode><v_int_container>"
2254   [(set (mem:BLK (scratch))
2255         (unspec:BLK
2256           [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2257            (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
2258            (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2259            (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
2260            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2261            (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")]
2262           UNSPEC_ST1_SCATTER))]
2263   "TARGET_SVE"
2264   "@
2265    st1<Vesize>\t%4.s, %5, [%1.s]
2266    st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2267    st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2268    st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2269    st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2270    st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
2271 )
2272
2273 ;; Predicated scatter stores for 64-bit elements.  The value of operand 2
2274 ;; doesn't matter in this case.
2275 (define_insn "mask_scatter_store<mode><v_int_container>"
2276   [(set (mem:BLK (scratch))
2277         (unspec:BLK
2278           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
2279            (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
2280            (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
2281            (match_operand:DI 2 "const_int_operand")
2282            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
2283            (match_operand:SVE_2 4 "register_operand" "w, w, w, w")]
2284           UNSPEC_ST1_SCATTER))]
2285   "TARGET_SVE"
2286   "@
2287    st1<Vesize>\t%4.d, %5, [%1.d]
2288    st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2289    st1<Vesize>\t%4.d, %5, [%0, %1.d]
2290    st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
2291 )
2292
2293 ;; Likewise, but with the offset being extended from 32 bits.
2294 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked"
2295   [(set (mem:BLK (scratch))
2296         (unspec:BLK
2297           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2298            (match_operand:DI 0 "register_operand" "rk, rk")
2299            (unspec:VNx2DI
2300              [(match_operand 6)
2301               (ANY_EXTEND:VNx2DI
2302                 (match_operand:VNx2SI 1 "register_operand" "w, w"))]
2303              UNSPEC_PRED_X)
2304            (match_operand:DI 2 "const_int_operand")
2305            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2306            (match_operand:SVE_2 4 "register_operand" "w, w")]
2307           UNSPEC_ST1_SCATTER))]
2308   "TARGET_SVE"
2309   "@
2310    st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw]
2311    st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]"
2312   "&& !CONSTANT_P (operands[6])"
2313   {
2314     operands[6] = CONSTM1_RTX (<VPRED>mode);
2315   }
2316 )
2317
2318 ;; Likewise, but with the offset being truncated to 32 bits and then
2319 ;; sign-extended.
2320 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw"
2321   [(set (mem:BLK (scratch))
2322         (unspec:BLK
2323           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2324            (match_operand:DI 0 "register_operand" "rk, rk")
2325            (unspec:VNx2DI
2326              [(match_operand 6)
2327               (sign_extend:VNx2DI
2328                 (truncate:VNx2SI
2329                   (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2330              UNSPEC_PRED_X)
2331            (match_operand:DI 2 "const_int_operand")
2332            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2333            (match_operand:SVE_2 4 "register_operand" "w, w")]
2334           UNSPEC_ST1_SCATTER))]
2335   "TARGET_SVE"
2336   "@
2337    st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2338    st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2339   "&& !CONSTANT_P (operands[6])"
2340   {
2341     operands[6] = CONSTM1_RTX (<VPRED>mode);
2342   }
2343 )
2344
2345 ;; Likewise, but with the offset being truncated to 32 bits and then
2346 ;; zero-extended.
2347 (define_insn "*mask_scatter_store<mode><v_int_container>_uxtw"
2348   [(set (mem:BLK (scratch))
2349         (unspec:BLK
2350           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2351            (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2352            (and:VNx2DI
2353              (match_operand:VNx2DI 1 "register_operand" "w, w")
2354              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2355            (match_operand:DI 2 "const_int_operand")
2356            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2357            (match_operand:SVE_2 4 "register_operand" "w, w")]
2358           UNSPEC_ST1_SCATTER))]
2359   "TARGET_SVE"
2360   "@
2361    st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2362    st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
2363 )
2364
2365 ;; -------------------------------------------------------------------------
2366 ;; ---- Truncating scatter stores
2367 ;; -------------------------------------------------------------------------
2368 ;; Includes scatter forms of:
2369 ;; - ST1B
2370 ;; - ST1H
2371 ;; - ST1W
2372 ;; -------------------------------------------------------------------------
2373
2374 ;; Predicated truncating scatter stores for 32-bit elements.  Operand 2 is
2375 ;; true for unsigned extension and false for signed extension.
2376 (define_insn "@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2377   [(set (mem:BLK (scratch))
2378         (unspec:BLK
2379           [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2380            (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
2381            (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2382            (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
2383            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2384            (truncate:VNx4_NARROW
2385              (match_operand:VNx4_WIDE 4 "register_operand" "w, w, w, w, w, w"))]
2386           UNSPEC_ST1_SCATTER))]
2387   "TARGET_SVE"
2388   "@
2389    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s]
2390    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s, #%0]
2391    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2392    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2393    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2394    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
2395 )
2396
2397 ;; Predicated truncating scatter stores for 64-bit elements.  The value of
2398 ;; operand 2 doesn't matter in this case.
2399 (define_insn "@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2400   [(set (mem:BLK (scratch))
2401         (unspec:BLK
2402           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
2403            (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
2404            (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
2405            (match_operand:DI 2 "const_int_operand")
2406            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
2407            (truncate:VNx2_NARROW
2408              (match_operand:VNx2_WIDE 4 "register_operand" "w, w, w, w"))]
2409           UNSPEC_ST1_SCATTER))]
2410   "TARGET_SVE"
2411   "@
2412    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d]
2413    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d, #%0]
2414    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d]
2415    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
2416 )
2417
2418 ;; Likewise, but with the offset being sign-extended from 32 bits.
2419 (define_insn_and_rewrite "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw"
2420   [(set (mem:BLK (scratch))
2421         (unspec:BLK
2422           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2423            (match_operand:DI 0 "register_operand" "rk, rk")
2424            (unspec:VNx2DI
2425              [(match_operand 6)
2426               (sign_extend:VNx2DI
2427                 (truncate:VNx2SI
2428                   (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2429              UNSPEC_PRED_X)
2430            (match_operand:DI 2 "const_int_operand")
2431            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2432            (truncate:VNx2_NARROW
2433              (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2434           UNSPEC_ST1_SCATTER))]
2435   "TARGET_SVE"
2436   "@
2437    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2438    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2439   "&& !rtx_equal_p (operands[5], operands[6])"
2440   {
2441     operands[6] = copy_rtx (operands[5]);
2442   }
2443 )
2444
2445 ;; Likewise, but with the offset being zero-extended from 32 bits.
2446 (define_insn "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw"
2447   [(set (mem:BLK (scratch))
2448         (unspec:BLK
2449           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2450            (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2451            (and:VNx2DI
2452              (match_operand:VNx2DI 1 "register_operand" "w, w")
2453              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2454            (match_operand:DI 2 "const_int_operand")
2455            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2456            (truncate:VNx2_NARROW
2457              (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2458           UNSPEC_ST1_SCATTER))]
2459   "TARGET_SVE"
2460   "@
2461    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2462    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
2463 )
2464
2465 ;; =========================================================================
2466 ;; == Vector creation
2467 ;; =========================================================================
2468
2469 ;; -------------------------------------------------------------------------
2470 ;; ---- [INT,FP] Duplicate element
2471 ;; -------------------------------------------------------------------------
2472 ;; Includes:
2473 ;; - DUP
2474 ;; - MOV
2475 ;; - LD1RB
2476 ;; - LD1RD
2477 ;; - LD1RH
2478 ;; - LD1RW
2479 ;; - LD1ROB (F64MM)
2480 ;; - LD1ROD (F64MM)
2481 ;; - LD1ROH (F64MM)
2482 ;; - LD1ROW (F64MM)
2483 ;; - LD1RQB
2484 ;; - LD1RQD
2485 ;; - LD1RQH
2486 ;; - LD1RQW
2487 ;; -------------------------------------------------------------------------
2488
2489 (define_expand "vec_duplicate<mode>"
2490   [(parallel
2491     [(set (match_operand:SVE_ALL 0 "register_operand")
2492           (vec_duplicate:SVE_ALL
2493             (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
2494      (clobber (scratch:VNx16BI))])]
2495   "TARGET_SVE"
2496   {
2497     if (MEM_P (operands[1]))
2498       {
2499         rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
2500         emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
2501                                        CONST0_RTX (<MODE>mode)));
2502         DONE;
2503       }
2504   }
2505 )
2506
2507 ;; Accept memory operands for the benefit of combine, and also in case
2508 ;; the scalar input gets spilled to memory during RA.  We want to split
2509 ;; the load at the first opportunity in order to allow the PTRUE to be
2510 ;; optimized with surrounding code.
2511 (define_insn_and_split "*vec_duplicate<mode>_reg"
2512   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
2513         (vec_duplicate:SVE_ALL
2514           (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
2515    (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
2516   "TARGET_SVE"
2517   "@
2518    mov\t%0.<Vetype>, %<vwcore>1
2519    mov\t%0.<Vetype>, %<Vetype>1
2520    #"
2521   "&& MEM_P (operands[1])"
2522   [(const_int 0)]
2523   {
2524     if (GET_CODE (operands[2]) == SCRATCH)
2525       operands[2] = gen_reg_rtx (VNx16BImode);
2526     emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
2527     rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
2528     emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1],
2529                                    CONST0_RTX (<MODE>mode)));
2530     DONE;
2531   }
2532   [(set_attr "length" "4,4,8")]
2533 )
2534
2535 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version).
2536 (define_insn "@aarch64_vec_duplicate_vq<mode>_le"
2537   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2538         (vec_duplicate:SVE_FULL
2539           (match_operand:<V128> 1 "register_operand" "w")))]
2540   "TARGET_SVE && !BYTES_BIG_ENDIAN"
2541   {
2542     operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2543     return "dup\t%0.q, %1.q[0]";
2544   }
2545 )
2546
2547 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version).
2548 ;; The SVE register layout puts memory lane N into (architectural)
2549 ;; register lane N, whereas the Advanced SIMD layout puts the memory
2550 ;; lsb into the register lsb.  We therefore have to describe this in rtl
2551 ;; terms as a reverse of the V128 vector followed by a duplicate.
2552 (define_insn "@aarch64_vec_duplicate_vq<mode>_be"
2553   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2554         (vec_duplicate:SVE_FULL
2555           (vec_select:<V128>
2556             (match_operand:<V128> 1 "register_operand" "w")
2557             (match_operand 2 "descending_int_parallel"))))]
2558   "TARGET_SVE
2559    && BYTES_BIG_ENDIAN
2560    && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
2561                 GET_MODE_NUNITS (<V128>mode) - 1)"
2562   {
2563     operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2564     return "dup\t%0.q, %1.q[0]";
2565   }
2566 )
2567
2568 ;; This is used for vec_duplicate<mode>s from memory, but can also
2569 ;; be used by combine to optimize selects of a vec_duplicate<mode>
2570 ;; with zero.
2571 (define_insn "sve_ld1r<mode>"
2572   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
2573         (unspec:SVE_ALL
2574           [(match_operand:<VPRED> 1 "register_operand" "Upl")
2575            (vec_duplicate:SVE_ALL
2576              (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
2577            (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
2578           UNSPEC_SEL))]
2579   "TARGET_SVE"
2580   "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
2581 )
2582
2583 ;; Load 128 bits from memory under predicate control and duplicate to
2584 ;; fill a vector.
2585 (define_insn "@aarch64_sve_ld1rq<mode>"
2586   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2587         (unspec:SVE_FULL
2588           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2589            (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")]
2590           UNSPEC_LD1RQ))]
2591   "TARGET_SVE"
2592   {
2593     operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2594     return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1";
2595   }
2596 )
2597
2598 (define_insn "@aarch64_sve_ld1ro<mode>"
2599   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2600         (unspec:SVE_FULL
2601           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2602            (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>"
2603                                "UO<Vesize>")]
2604           UNSPEC_LD1RO))]
2605   "TARGET_SVE_F64MM"
2606   {
2607     operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2608     return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1";
2609   }
2610 )
2611
2612 ;; -------------------------------------------------------------------------
2613 ;; ---- [INT,FP] Initialize from individual elements
2614 ;; -------------------------------------------------------------------------
2615 ;; Includes:
2616 ;; - INSR
2617 ;; -------------------------------------------------------------------------
2618
2619 (define_expand "vec_init<mode><Vel>"
2620   [(match_operand:SVE_FULL 0 "register_operand")
2621     (match_operand 1 "")]
2622   "TARGET_SVE"
2623   {
2624     aarch64_sve_expand_vector_init (operands[0], operands[1]);
2625     DONE;
2626   }
2627 )
2628
2629 ;; Shift an SVE vector left and insert a scalar into element 0.
2630 (define_insn "vec_shl_insert_<mode>"
2631   [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??&w, ?&w")
2632         (unspec:SVE_FULL
2633           [(match_operand:SVE_FULL 1 "register_operand" "0, 0, w, w")
2634            (match_operand:<VEL> 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")]
2635           UNSPEC_INSR))]
2636   "TARGET_SVE"
2637   "@
2638    insr\t%0.<Vetype>, %<vwcore>2
2639    insr\t%0.<Vetype>, %<Vetype>2
2640    movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2
2641    movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2"
2642   [(set_attr "movprfx" "*,*,yes,yes")]
2643 )
2644
2645 ;; -------------------------------------------------------------------------
2646 ;; ---- [INT] Linear series
2647 ;; -------------------------------------------------------------------------
2648 ;; Includes:
2649 ;; - INDEX
2650 ;; -------------------------------------------------------------------------
2651
2652 (define_insn "vec_series<mode>"
2653   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
2654         (vec_series:SVE_I
2655           (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
2656           (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
2657   "TARGET_SVE"
2658   "@
2659    index\t%0.<Vctype>, #%1, %<vccore>2
2660    index\t%0.<Vctype>, %<vccore>1, #%2
2661    index\t%0.<Vctype>, %<vccore>1, %<vccore>2"
2662 )
2663
2664 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
2665 ;; of an INDEX instruction.
2666 (define_insn "*vec_series<mode>_plus"
2667   [(set (match_operand:SVE_I 0 "register_operand" "=w")
2668         (plus:SVE_I
2669           (vec_duplicate:SVE_I
2670             (match_operand:<VEL> 1 "register_operand" "r"))
2671           (match_operand:SVE_I 2 "immediate_operand")))]
2672   "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
2673   {
2674     operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
2675     return "index\t%0.<Vctype>, %<vccore>1, #%2";
2676   }
2677 )
2678
2679 ;; -------------------------------------------------------------------------
2680 ;; ---- [PRED] Duplicate element
2681 ;; -------------------------------------------------------------------------
2682 ;; The patterns in this section are synthetic.
2683 ;; -------------------------------------------------------------------------
2684
2685 ;; Implement a predicate broadcast by shifting the low bit of the scalar
2686 ;; input into the top bit and using a WHILELO.  An alternative would be to
2687 ;; duplicate the input and do a compare with zero.
2688 (define_expand "vec_duplicate<mode>"
2689   [(set (match_operand:PRED_ALL 0 "register_operand")
2690         (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))]
2691   "TARGET_SVE"
2692   {
2693     rtx tmp = gen_reg_rtx (DImode);
2694     rtx op1 = gen_lowpart (DImode, operands[1]);
2695     emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
2696     emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
2697     DONE;
2698   }
2699 )
2700
2701 ;; =========================================================================
2702 ;; == Vector decomposition
2703 ;; =========================================================================
2704
2705 ;; -------------------------------------------------------------------------
2706 ;; ---- [INT,FP] Extract index
2707 ;; -------------------------------------------------------------------------
2708 ;; Includes:
2709 ;; - DUP    (Advanced SIMD)
2710 ;; - DUP    (SVE)
2711 ;; - EXT    (SVE)
2712 ;; - ST1    (Advanced SIMD)
2713 ;; - UMOV   (Advanced SIMD)
2714 ;; -------------------------------------------------------------------------
2715
2716 (define_expand "vec_extract<mode><Vel>"
2717   [(set (match_operand:<VEL> 0 "register_operand")
2718         (vec_select:<VEL>
2719           (match_operand:SVE_FULL 1 "register_operand")
2720           (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
2721   "TARGET_SVE"
2722   {
2723     poly_int64 val;
2724     if (poly_int_rtx_p (operands[2], &val)
2725         && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
2726       {
2727         /* The last element can be extracted with a LASTB and a false
2728            predicate.  */
2729         rtx sel = aarch64_pfalse_reg (<VPRED>mode);
2730         emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2731         DONE;
2732       }
2733     if (!CONST_INT_P (operands[2]))
2734       {
2735         /* Create an index with operand[2] as the base and -1 as the step.
2736            It will then be zero for the element we care about.  */
2737         rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
2738         index = force_reg (<VEL_INT>mode, index);
2739         rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
2740         emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
2741
2742         /* Get a predicate that is true for only that element.  */
2743         rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
2744         rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
2745         rtx sel = gen_reg_rtx (<VPRED>mode);
2746         emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
2747
2748         /* Select the element using LASTB.  */
2749         emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2750         DONE;
2751       }
2752   }
2753 )
2754
2755 ;; Extract element zero.  This is a special case because we want to force
2756 ;; the registers to be the same for the second alternative, and then
2757 ;; split the instruction into nothing after RA.
2758 (define_insn_and_split "*vec_extract<mode><Vel>_0"
2759   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2760         (vec_select:<VEL>
2761           (match_operand:SVE_FULL 1 "register_operand" "w, 0, w")
2762           (parallel [(const_int 0)])))]
2763   "TARGET_SVE"
2764   {
2765     operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2766     switch (which_alternative)
2767       {
2768         case 0:
2769           return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
2770         case 1:
2771           return "#";
2772         case 2:
2773           return "st1\\t{%1.<Vetype>}[0], %0";
2774         default:
2775           gcc_unreachable ();
2776       }
2777   }
2778   "&& reload_completed
2779    && REG_P (operands[0])
2780    && REGNO (operands[0]) == REGNO (operands[1])"
2781   [(const_int 0)]
2782   {
2783     emit_note (NOTE_INSN_DELETED);
2784     DONE;
2785   }
2786   [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
2787 )
2788
2789 ;; Extract an element from the Advanced SIMD portion of the register.
2790 ;; We don't just reuse the aarch64-simd.md pattern because we don't
2791 ;; want any change in lane number on big-endian targets.
2792 (define_insn "*vec_extract<mode><Vel>_v128"
2793   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2794         (vec_select:<VEL>
2795           (match_operand:SVE_FULL 1 "register_operand" "w, w, w")
2796           (parallel [(match_operand:SI 2 "const_int_operand")])))]
2797   "TARGET_SVE
2798    && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
2799   {
2800     operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2801     switch (which_alternative)
2802       {
2803         case 0:
2804           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2805         case 1:
2806           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2807         case 2:
2808           return "st1\\t{%1.<Vetype>}[%2], %0";
2809         default:
2810           gcc_unreachable ();
2811       }
2812   }
2813   [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
2814 )
2815
2816 ;; Extract an element in the range of DUP.  This pattern allows the
2817 ;; source and destination to be different.
2818 (define_insn "*vec_extract<mode><Vel>_dup"
2819   [(set (match_operand:<VEL> 0 "register_operand" "=w")
2820         (vec_select:<VEL>
2821           (match_operand:SVE_FULL 1 "register_operand" "w")
2822           (parallel [(match_operand:SI 2 "const_int_operand")])))]
2823   "TARGET_SVE
2824    && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
2825   {
2826     operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2827     return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
2828   }
2829 )
2830
2831 ;; Extract an element outside the range of DUP.  This pattern requires the
2832 ;; source and destination to be the same.
2833 (define_insn "*vec_extract<mode><Vel>_ext"
2834   [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w")
2835         (vec_select:<VEL>
2836           (match_operand:SVE_FULL 1 "register_operand" "0, w")
2837           (parallel [(match_operand:SI 2 "const_int_operand")])))]
2838   "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
2839   {
2840     operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2841     operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
2842     return (which_alternative == 0
2843             ? "ext\t%0.b, %0.b, %0.b, #%2"
2844             : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2");
2845   }
2846   [(set_attr "movprfx" "*,yes")]
2847 )
2848
2849 ;; -------------------------------------------------------------------------
2850 ;; ---- [INT,FP] Extract active element
2851 ;; -------------------------------------------------------------------------
2852 ;; Includes:
2853 ;; - LASTA
2854 ;; - LASTB
2855 ;; -------------------------------------------------------------------------
2856
2857 ;; Extract the last active element of operand 1 into operand 0.
2858 ;; If no elements are active, extract the last inactive element instead.
2859 (define_insn "@extract_<last_op>_<mode>"
2860   [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
2861         (unspec:<VEL>
2862           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2863            (match_operand:SVE_FULL 2 "register_operand" "w, w")]
2864           LAST))]
2865   "TARGET_SVE"
2866   "@
2867    last<ab>\t%<vwcore>0, %1, %2.<Vetype>
2868    last<ab>\t%<Vetype>0, %1, %2.<Vetype>"
2869 )
2870
2871 ;; -------------------------------------------------------------------------
2872 ;; ---- [PRED] Extract index
2873 ;; -------------------------------------------------------------------------
2874 ;; The patterns in this section are synthetic.
2875 ;; -------------------------------------------------------------------------
2876
2877 ;; Handle extractions from a predicate by converting to an integer vector
2878 ;; and extracting from there.
2879 (define_expand "vec_extract<vpred><Vel>"
2880   [(match_operand:<VEL> 0 "register_operand")
2881    (match_operand:<VPRED> 1 "register_operand")
2882    (match_operand:SI 2 "nonmemory_operand")
2883    ;; Dummy operand to which we can attach the iterator.
2884    (reg:SVE_FULL_I V0_REGNUM)]
2885   "TARGET_SVE"
2886   {
2887     rtx tmp = gen_reg_rtx (<MODE>mode);
2888     emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
2889                                              CONST1_RTX (<MODE>mode),
2890                                              CONST0_RTX (<MODE>mode)));
2891     emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
2892     DONE;
2893   }
2894 )
2895
2896 ;; =========================================================================
2897 ;; == Unary arithmetic
2898 ;; =========================================================================
2899
2900 ;; -------------------------------------------------------------------------
2901 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
2902 ;; -------------------------------------------------------------------------
2903 ;; Includes:
2904 ;; - ABS
2905 ;; - CLS (= clrsb)
2906 ;; - CLZ
2907 ;; - CNT (= popcount)
2908 ;; - NEG
2909 ;; - NOT
2910 ;; -------------------------------------------------------------------------
2911
2912 ;; Unpredicated integer unary arithmetic.
2913 (define_expand "<optab><mode>2"
2914   [(set (match_operand:SVE_I 0 "register_operand")
2915         (unspec:SVE_I
2916           [(match_dup 2)
2917            (SVE_INT_UNARY:SVE_I
2918              (match_operand:SVE_I 1 "register_operand"))]
2919           UNSPEC_PRED_X))]
2920   "TARGET_SVE"
2921   {
2922     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2923   }
2924 )
2925
2926 ;; Integer unary arithmetic predicated with a PTRUE.
2927 (define_insn "@aarch64_pred_<optab><mode>"
2928   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
2929         (unspec:SVE_I
2930           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2931            (SVE_INT_UNARY:SVE_I
2932              (match_operand:SVE_I 2 "register_operand" "0, w"))]
2933           UNSPEC_PRED_X))]
2934   "TARGET_SVE"
2935   "@
2936    <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2937    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2938   [(set_attr "movprfx" "*,yes")]
2939 )
2940
2941 ;; Predicated integer unary arithmetic with merging.
2942 (define_expand "@cond_<optab><mode>"
2943   [(set (match_operand:SVE_I 0 "register_operand")
2944         (unspec:SVE_I
2945           [(match_operand:<VPRED> 1 "register_operand")
2946            (SVE_INT_UNARY:SVE_I
2947              (match_operand:SVE_I 2 "register_operand"))
2948            (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")]
2949           UNSPEC_SEL))]
2950   "TARGET_SVE"
2951 )
2952
2953 ;; Predicated integer unary arithmetic, merging with the first input.
2954 (define_insn "*cond_<optab><mode>_2"
2955   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
2956         (unspec:SVE_I
2957           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2958            (SVE_INT_UNARY:SVE_I
2959              (match_operand:SVE_I 2 "register_operand" "0, w"))
2960            (match_dup 2)]
2961           UNSPEC_SEL))]
2962   "TARGET_SVE"
2963   "@
2964    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
2965    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2966   [(set_attr "movprfx" "*,yes")]
2967 )
2968
2969 ;; Predicated integer unary arithmetic, merging with an independent value.
2970 ;;
2971 ;; The earlyclobber isn't needed for the first alternative, but omitting
2972 ;; it would only help the case in which operands 2 and 3 are the same,
2973 ;; which is handled above rather than here.  Marking all the alternatives
2974 ;; as earlyclobber helps to make the instruction more regular to the
2975 ;; register allocator.
2976 (define_insn "*cond_<optab><mode>_any"
2977   [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w")
2978         (unspec:SVE_I
2979           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2980            (SVE_INT_UNARY:SVE_I
2981              (match_operand:SVE_I 2 "register_operand" "w, w, w"))
2982            (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2983           UNSPEC_SEL))]
2984   "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
2985   "@
2986    <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2987    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2988    movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2989   [(set_attr "movprfx" "*,yes,yes")]
2990 )
2991
2992 ;; -------------------------------------------------------------------------
2993 ;; ---- [INT] General unary arithmetic corresponding to unspecs
2994 ;; -------------------------------------------------------------------------
2995 ;; Includes
2996 ;; - RBIT
2997 ;; - REVB
2998 ;; - REVH
2999 ;; - REVW
3000 ;; -------------------------------------------------------------------------
3001
3002 ;; Predicated integer unary operations.
3003 (define_insn "@aarch64_pred_<optab><mode>"
3004   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3005         (unspec:SVE_FULL_I
3006           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3007            (unspec:SVE_FULL_I
3008              [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")]
3009              SVE_INT_UNARY)]
3010           UNSPEC_PRED_X))]
3011   "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
3012   "@
3013    <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3014    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3015   [(set_attr "movprfx" "*,yes")]
3016 )
3017
3018 ;; Another way of expressing the REVB, REVH and REVW patterns, with this
3019 ;; form being easier for permutes.  The predicate mode determines the number
3020 ;; of lanes and the data mode decides the granularity of the reversal within
3021 ;; each lane.
3022 (define_insn "@aarch64_sve_revbhw_<SVE_ALL:mode><PRED_HSD:mode>"
3023   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w")
3024         (unspec:SVE_ALL
3025           [(match_operand:PRED_HSD 1 "register_operand" "Upl, Upl")
3026            (unspec:SVE_ALL
3027              [(match_operand:SVE_ALL 2 "register_operand" "0, w")]
3028              UNSPEC_REVBHW)]
3029           UNSPEC_PRED_X))]
3030   "TARGET_SVE && <PRED_HSD:elem_bits> > <SVE_ALL:container_bits>"
3031   "@
3032    rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>
3033    movprfx\t%0, %2\;rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>"
3034   [(set_attr "movprfx" "*,yes")]
3035 )
3036
3037 ;; Predicated integer unary operations with merging.
3038 (define_insn "@cond_<optab><mode>"
3039   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, ?&w")
3040         (unspec:SVE_FULL_I
3041           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3042            (unspec:SVE_FULL_I
3043              [(match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")]
3044              SVE_INT_UNARY)
3045            (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3046           UNSPEC_SEL))]
3047   "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
3048   "@
3049    <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3050    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3051    movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3052   [(set_attr "movprfx" "*,yes,yes")]
3053 )
3054
3055 ;; -------------------------------------------------------------------------
3056 ;; ---- [INT] Sign and zero extension
3057 ;; -------------------------------------------------------------------------
3058 ;; Includes:
3059 ;; - SXTB
3060 ;; - SXTH
3061 ;; - SXTW
3062 ;; - UXTB
3063 ;; - UXTH
3064 ;; - UXTW
3065 ;; -------------------------------------------------------------------------
3066
3067 ;; Unpredicated sign and zero extension from a narrower mode.
3068 (define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
3069   [(set (match_operand:SVE_HSDI 0 "register_operand")
3070         (unspec:SVE_HSDI
3071           [(match_dup 2)
3072            (ANY_EXTEND:SVE_HSDI
3073              (match_operand:SVE_PARTIAL_I 1 "register_operand"))]
3074           UNSPEC_PRED_X))]
3075   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3076   {
3077     operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode);
3078   }
3079 )
3080
3081 ;; Predicated sign and zero extension from a narrower mode.
3082 (define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
3083   [(set (match_operand:SVE_HSDI 0 "register_operand" "=w, ?&w")
3084         (unspec:SVE_HSDI
3085           [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
3086            (ANY_EXTEND:SVE_HSDI
3087              (match_operand:SVE_PARTIAL_I 2 "register_operand" "0, w"))]
3088           UNSPEC_PRED_X))]
3089   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3090   "@
3091    <su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
3092    movprfx\t%0, %2\;<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>"
3093   [(set_attr "movprfx" "*,yes")]
3094 )
3095
3096 ;; Predicated truncate-and-sign-extend operations.
3097 (define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
3098   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
3099         (unspec:SVE_FULL_HSDI
3100           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
3101            (sign_extend:SVE_FULL_HSDI
3102              (truncate:SVE_PARTIAL_I
3103                (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")))]
3104           UNSPEC_PRED_X))]
3105   "TARGET_SVE
3106    && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3107   "@
3108    sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3109    movprfx\t%0, %2\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
3110   [(set_attr "movprfx" "*,yes")]
3111 )
3112
3113 ;; Predicated truncate-and-sign-extend operations with merging.
3114 (define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
3115   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w")
3116         (unspec:SVE_FULL_HSDI
3117           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
3118            (sign_extend:SVE_FULL_HSDI
3119              (truncate:SVE_PARTIAL_I
3120                (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")))
3121            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3122           UNSPEC_SEL))]
3123   "TARGET_SVE
3124    && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3125   "@
3126    sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3127    movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3128    movprfx\t%0, %3\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
3129   [(set_attr "movprfx" "*,yes,yes")]
3130 )
3131
3132 ;; Predicated truncate-and-zero-extend operations, merging with the
3133 ;; first input.
3134 ;;
3135 ;; The canonical form of this operation is an AND of a constant rather
3136 ;; than (zero_extend (truncate ...)).
3137 (define_insn "*cond_uxt<mode>_2"
3138   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3139         (unspec:SVE_I
3140           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3141            (and:SVE_I
3142              (match_operand:SVE_I 2 "register_operand" "0, w")
3143              (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate"))
3144            (match_dup 2)]
3145           UNSPEC_SEL))]
3146   "TARGET_SVE"
3147   "@
3148    uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype>
3149    movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3150   [(set_attr "movprfx" "*,yes")]
3151 )
3152
3153 ;; Predicated truncate-and-zero-extend operations, merging with an
3154 ;; independent value.
3155 ;;
3156 ;; The earlyclobber isn't needed for the first alternative, but omitting
3157 ;; it would only help the case in which operands 2 and 4 are the same,
3158 ;; which is handled above rather than here.  Marking all the alternatives
3159 ;; as early-clobber helps to make the instruction more regular to the
3160 ;; register allocator.
3161 (define_insn "*cond_uxt<mode>_any"
3162   [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w")
3163         (unspec:SVE_I
3164           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3165            (and:SVE_I
3166              (match_operand:SVE_I 2 "register_operand" "w, w, w")
3167              (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate"))
3168            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3169           UNSPEC_SEL))]
3170   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
3171   "@
3172    uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3173    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3174    movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3175   [(set_attr "movprfx" "*,yes,yes")]
3176 )
3177
3178 ;; -------------------------------------------------------------------------
3179 ;; ---- [INT] Truncation
3180 ;; -------------------------------------------------------------------------
3181 ;; The patterns in this section are synthetic.
3182 ;; -------------------------------------------------------------------------
3183
3184 ;; Truncate to a partial SVE vector from either a full vector or a
3185 ;; wider partial vector.  This is a no-op, because we can just ignore
3186 ;; the unused upper bits of the source.
3187 (define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
3188   [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w")
3189         (truncate:SVE_PARTIAL_I
3190           (match_operand:SVE_HSDI 1 "register_operand" "w")))]
3191   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3192   "#"
3193   "&& reload_completed"
3194   [(set (match_dup 0) (match_dup 1))]
3195   {
3196     operands[1] = aarch64_replace_reg_mode (operands[1],
3197                                             <SVE_PARTIAL_I:MODE>mode);
3198   }
3199 )
3200
3201 ;; -------------------------------------------------------------------------
3202 ;; ---- [INT] Logical inverse
3203 ;; -------------------------------------------------------------------------
3204 ;; Includes:
3205 ;; - CNOT
3206 ;; -------------------------------------------------------------------------
3207
3208 ;; Predicated logical inverse.
3209 (define_expand "@aarch64_pred_cnot<mode>"
3210   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3211         (unspec:SVE_FULL_I
3212           [(unspec:<VPRED>
3213              [(match_operand:<VPRED> 1 "register_operand")
3214               (match_operand:SI 2 "aarch64_sve_ptrue_flag")
3215               (eq:<VPRED>
3216                 (match_operand:SVE_FULL_I 3 "register_operand")
3217                 (match_dup 4))]
3218              UNSPEC_PRED_Z)
3219            (match_dup 5)
3220            (match_dup 4)]
3221           UNSPEC_SEL))]
3222   "TARGET_SVE"
3223   {
3224     operands[4] = CONST0_RTX (<MODE>mode);
3225     operands[5] = CONST1_RTX (<MODE>mode);
3226   }
3227 )
3228
3229 (define_insn "*cnot<mode>"
3230   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3231         (unspec:SVE_I
3232           [(unspec:<VPRED>
3233              [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3234               (match_operand:SI 5 "aarch64_sve_ptrue_flag")
3235               (eq:<VPRED>
3236                 (match_operand:SVE_I 2 "register_operand" "0, w")
3237                 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
3238              UNSPEC_PRED_Z)
3239            (match_operand:SVE_I 4 "aarch64_simd_imm_one")
3240            (match_dup 3)]
3241           UNSPEC_SEL))]
3242   "TARGET_SVE"
3243   "@
3244    cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3245    movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3246   [(set_attr "movprfx" "*,yes")]
3247 )
3248
3249 ;; Predicated logical inverse with merging.
3250 (define_expand "@cond_cnot<mode>"
3251   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3252         (unspec:SVE_FULL_I
3253           [(match_operand:<VPRED> 1 "register_operand")
3254            (unspec:SVE_FULL_I
3255              [(unspec:<VPRED>
3256                 [(match_dup 4)
3257                  (const_int SVE_KNOWN_PTRUE)
3258                  (eq:<VPRED>
3259                    (match_operand:SVE_FULL_I 2 "register_operand")
3260                    (match_dup 5))]
3261                 UNSPEC_PRED_Z)
3262               (match_dup 6)
3263               (match_dup 5)]
3264              UNSPEC_SEL)
3265            (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
3266           UNSPEC_SEL))]
3267   "TARGET_SVE"
3268   {
3269     operands[4] = CONSTM1_RTX (<VPRED>mode);
3270     operands[5] = CONST0_RTX (<MODE>mode);
3271     operands[6] = CONST1_RTX (<MODE>mode);
3272   }
3273 )
3274
3275 ;; Predicated logical inverse, merging with the first input.
3276 (define_insn_and_rewrite "*cond_cnot<mode>_2"
3277   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3278         (unspec:SVE_I
3279           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3280            ;; Logical inverse of operand 2 (as above).
3281            (unspec:SVE_I
3282              [(unspec:<VPRED>
3283                 [(match_operand 5)
3284                  (const_int SVE_KNOWN_PTRUE)
3285                  (eq:<VPRED>
3286                    (match_operand:SVE_I 2 "register_operand" "0, w")
3287                    (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
3288                 UNSPEC_PRED_Z)
3289               (match_operand:SVE_I 4 "aarch64_simd_imm_one")
3290               (match_dup 3)]
3291              UNSPEC_SEL)
3292            (match_dup 2)]
3293           UNSPEC_SEL))]
3294   "TARGET_SVE"
3295   "@
3296    cnot\t%0.<Vetype>, %1/m, %0.<Vetype>
3297    movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3298   "&& !CONSTANT_P (operands[5])"
3299   {
3300     operands[5] = CONSTM1_RTX (<VPRED>mode);
3301   }
3302   [(set_attr "movprfx" "*,yes")]
3303 )
3304
3305 ;; Predicated logical inverse, merging with an independent value.
3306 ;;
3307 ;; The earlyclobber isn't needed for the first alternative, but omitting
3308 ;; it would only help the case in which operands 2 and 6 are the same,
3309 ;; which is handled above rather than here.  Marking all the alternatives
3310 ;; as earlyclobber helps to make the instruction more regular to the
3311 ;; register allocator.
3312 (define_insn_and_rewrite "*cond_cnot<mode>_any"
3313   [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w")
3314         (unspec:SVE_I
3315           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3316            ;; Logical inverse of operand 2 (as above).
3317            (unspec:SVE_I
3318              [(unspec:<VPRED>
3319                 [(match_operand 5)
3320                  (const_int SVE_KNOWN_PTRUE)
3321                  (eq:<VPRED>
3322                    (match_operand:SVE_I 2 "register_operand" "w, w, w")
3323                    (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
3324                 UNSPEC_PRED_Z)
3325               (match_operand:SVE_I 4 "aarch64_simd_imm_one")
3326               (match_dup 3)]
3327              UNSPEC_SEL)
3328            (match_operand:SVE_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3329           UNSPEC_SEL))]
3330   "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])"
3331   "@
3332    cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3333    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3334    movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3335   "&& !CONSTANT_P (operands[5])"
3336   {
3337     operands[5] = CONSTM1_RTX (<VPRED>mode);
3338   }
3339   [(set_attr "movprfx" "*,yes,yes")]
3340 )
3341
3342 ;; -------------------------------------------------------------------------
3343 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
3344 ;; -------------------------------------------------------------------------
3345 ;; Includes:
3346 ;; - FEXPA
3347 ;; -------------------------------------------------------------------------
3348
3349 ;; Unpredicated unary operations that take an integer and return a float.
3350 (define_insn "@aarch64_sve_<optab><mode>"
3351   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3352         (unspec:SVE_FULL_F
3353           [(match_operand:<V_INT_EQUIV> 1 "register_operand" "w")]
3354           SVE_FP_UNARY_INT))]
3355   "TARGET_SVE"
3356   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3357 )
3358
3359 ;; -------------------------------------------------------------------------
3360 ;; ---- [FP] General unary arithmetic corresponding to unspecs
3361 ;; -------------------------------------------------------------------------
3362 ;; Includes:
3363 ;; - FABS
3364 ;; - FNEG
3365 ;; - FRECPE
3366 ;; - FRECPX
3367 ;; - FRINTA
3368 ;; - FRINTI
3369 ;; - FRINTM
3370 ;; - FRINTN
3371 ;; - FRINTP
3372 ;; - FRINTX
3373 ;; - FRINTZ
3374 ;; - FRSQRTE
3375 ;; - FSQRT
3376 ;; -------------------------------------------------------------------------
3377
3378 ;; Unpredicated floating-point unary operations.
3379 (define_insn "@aarch64_sve_<optab><mode>"
3380   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3381         (unspec:SVE_FULL_F
3382           [(match_operand:SVE_FULL_F 1 "register_operand" "w")]
3383           SVE_FP_UNARY))]
3384   "TARGET_SVE"
3385   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3386 )
3387
3388 ;; Unpredicated floating-point unary operations.
3389 (define_expand "<optab><mode>2"
3390   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3391         (unspec:SVE_FULL_F
3392           [(match_dup 2)
3393            (const_int SVE_RELAXED_GP)
3394            (match_operand:SVE_FULL_F 1 "register_operand")]
3395           SVE_COND_FP_UNARY_OPTAB))]
3396   "TARGET_SVE"
3397   {
3398     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3399   }
3400 )
3401
3402 ;; Predicated floating-point unary operations.
3403 (define_insn "@aarch64_pred_<optab><mode>"
3404   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
3405         (unspec:SVE_FULL_F
3406           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3407            (match_operand:SI 3 "aarch64_sve_gp_strictness")
3408            (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
3409           SVE_COND_FP_UNARY))]
3410   "TARGET_SVE"
3411   "@
3412    <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3413    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3414   [(set_attr "movprfx" "*,yes")]
3415 )
3416
3417 ;; Predicated floating-point unary arithmetic with merging.
3418 (define_expand "@cond_<optab><mode>"
3419   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3420         (unspec:SVE_FULL_F
3421           [(match_operand:<VPRED> 1 "register_operand")
3422            (unspec:SVE_FULL_F
3423              [(match_dup 1)
3424               (const_int SVE_STRICT_GP)
3425               (match_operand:SVE_FULL_F 2 "register_operand")]
3426              SVE_COND_FP_UNARY)
3427            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3428           UNSPEC_SEL))]
3429   "TARGET_SVE"
3430 )
3431
3432 ;; Predicated floating-point unary arithmetic, merging with the first input.
3433 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
3434   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
3435         (unspec:SVE_FULL_F
3436           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3437            (unspec:SVE_FULL_F
3438              [(match_operand 3)
3439               (const_int SVE_RELAXED_GP)
3440               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
3441              SVE_COND_FP_UNARY)
3442            (match_dup 2)]
3443           UNSPEC_SEL))]
3444   "TARGET_SVE"
3445   "@
3446    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3447    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3448   "&& !rtx_equal_p (operands[1], operands[3])"
3449   {
3450     operands[3] = copy_rtx (operands[1]);
3451   }
3452   [(set_attr "movprfx" "*,yes")]
3453 )
3454
3455 (define_insn "*cond_<optab><mode>_2_strict"
3456   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
3457         (unspec:SVE_FULL_F
3458           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3459            (unspec:SVE_FULL_F
3460              [(match_dup 1)
3461               (const_int SVE_STRICT_GP)
3462               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
3463              SVE_COND_FP_UNARY)
3464            (match_dup 2)]
3465           UNSPEC_SEL))]
3466   "TARGET_SVE"
3467   "@
3468    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3469    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3470   [(set_attr "movprfx" "*,yes")]
3471 )
3472
3473 ;; Predicated floating-point unary arithmetic, merging with an independent
3474 ;; value.
3475 ;;
3476 ;; The earlyclobber isn't needed for the first alternative, but omitting
3477 ;; it would only help the case in which operands 2 and 3 are the same,
3478 ;; which is handled above rather than here.  Marking all the alternatives
3479 ;; as earlyclobber helps to make the instruction more regular to the
3480 ;; register allocator.
3481 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
3482   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
3483         (unspec:SVE_FULL_F
3484           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3485            (unspec:SVE_FULL_F
3486              [(match_operand 4)
3487               (const_int SVE_RELAXED_GP)
3488               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
3489              SVE_COND_FP_UNARY)
3490            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3491           UNSPEC_SEL))]
3492   "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
3493   "@
3494    <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3495    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3496    movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3497   "&& !rtx_equal_p (operands[1], operands[4])"
3498   {
3499     operands[4] = copy_rtx (operands[1]);
3500   }
3501   [(set_attr "movprfx" "*,yes,yes")]
3502 )
3503
3504 (define_insn "*cond_<optab><mode>_any_strict"
3505   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
3506         (unspec:SVE_FULL_F
3507           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3508            (unspec:SVE_FULL_F
3509              [(match_dup 1)
3510               (const_int SVE_STRICT_GP)
3511               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
3512              SVE_COND_FP_UNARY)
3513            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3514           UNSPEC_SEL))]
3515   "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
3516   "@
3517    <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3518    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3519    movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3520   [(set_attr "movprfx" "*,yes,yes")]
3521 )
3522
3523 ;; -------------------------------------------------------------------------
3524 ;; ---- [FP] Square root
3525 ;; -------------------------------------------------------------------------
3526
3527 (define_expand "sqrt<mode>2"
3528   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3529         (unspec:SVE_FULL_F
3530           [(match_dup 2)
3531            (const_int SVE_RELAXED_GP)
3532            (match_operand:SVE_FULL_F 1 "register_operand")]
3533           UNSPEC_COND_FSQRT))]
3534   "TARGET_SVE"
3535 {
3536   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
3537     DONE;
3538   operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3539 })
3540
3541 ;; -------------------------------------------------------------------------
3542 ;; ---- [FP] Reciprocal square root
3543 ;; -------------------------------------------------------------------------
3544
3545 (define_expand "rsqrt<mode>2"
3546   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3547         (unspec:SVE_FULL_SDF
3548           [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3549           UNSPEC_RSQRT))]
3550   "TARGET_SVE"
3551 {
3552   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
3553   DONE;
3554 })
3555
3556 (define_expand "@aarch64_rsqrte<mode>"
3557   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3558         (unspec:SVE_FULL_SDF
3559           [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3560           UNSPEC_RSQRTE))]
3561   "TARGET_SVE"
3562 )
3563
3564 (define_expand "@aarch64_rsqrts<mode>"
3565   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3566         (unspec:SVE_FULL_SDF
3567           [(match_operand:SVE_FULL_SDF 1 "register_operand")
3568            (match_operand:SVE_FULL_SDF 2 "register_operand")]
3569           UNSPEC_RSQRTS))]
3570   "TARGET_SVE"
3571 )
3572
3573 ;; -------------------------------------------------------------------------
3574 ;; ---- [PRED] Inverse
3575 ;; -------------------------------------------------------------------------
3576 ;; Includes:
3577 ;; - NOT
3578 ;; -------------------------------------------------------------------------
3579
3580 ;; Unpredicated predicate inverse.
3581 (define_expand "one_cmpl<mode>2"
3582   [(set (match_operand:PRED_ALL 0 "register_operand")
3583         (and:PRED_ALL
3584           (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
3585           (match_dup 2)))]
3586   "TARGET_SVE"
3587   {
3588     operands[2] = aarch64_ptrue_reg (<MODE>mode);
3589   }
3590 )
3591
3592 ;; Predicated predicate inverse.
3593 (define_insn "*one_cmpl<mode>3"
3594   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3595         (and:PRED_ALL
3596           (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
3597           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
3598   "TARGET_SVE"
3599   "not\t%0.b, %1/z, %2.b"
3600 )
3601
3602 ;; =========================================================================
3603 ;; == Binary arithmetic
3604 ;; =========================================================================
3605
3606 ;; -------------------------------------------------------------------------
3607 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
3608 ;; -------------------------------------------------------------------------
3609 ;; Includes:
3610 ;; - ADD    (merging form only)
3611 ;; - AND    (merging form only)
3612 ;; - ASR    (merging form only)
3613 ;; - EOR    (merging form only)
3614 ;; - LSL    (merging form only)
3615 ;; - LSR    (merging form only)
3616 ;; - MUL
3617 ;; - ORR    (merging form only)
3618 ;; - SMAX
3619 ;; - SMIN
3620 ;; - SQADD  (SVE2 merging form only)
3621 ;; - SQSUB  (SVE2 merging form only)
3622 ;; - SUB    (merging form only)
3623 ;; - UMAX
3624 ;; - UMIN
3625 ;; - UQADD  (SVE2 merging form only)
3626 ;; - UQSUB  (SVE2 merging form only)
3627 ;; -------------------------------------------------------------------------
3628
3629 ;; Unpredicated integer binary operations that have an immediate form.
3630 (define_expand "<optab><mode>3"
3631   [(set (match_operand:SVE_I 0 "register_operand")
3632         (unspec:SVE_I
3633           [(match_dup 3)
3634            (SVE_INT_BINARY_IMM:SVE_I
3635              (match_operand:SVE_I 1 "register_operand")
3636              (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
3637           UNSPEC_PRED_X))]
3638   "TARGET_SVE"
3639   {
3640     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3641   }
3642 )
3643
3644 ;; Integer binary operations that have an immediate form, predicated
3645 ;; with a PTRUE.  We don't actually need the predicate for the first
3646 ;; and third alternatives, but using Upa or X isn't likely to gain much
3647 ;; and would make the instruction seem less uniform to the register
3648 ;; allocator.
3649 (define_insn_and_split "@aarch64_pred_<optab><mode>"
3650   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w, ?&w")
3651         (unspec:SVE_I
3652           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
3653            (SVE_INT_BINARY_IMM:SVE_I
3654              (match_operand:SVE_I 2 "register_operand" "%0, 0, w, w")
3655              (match_operand:SVE_I 3 "aarch64_sve_<sve_imm_con>_operand" "<sve_imm_con>, w, <sve_imm_con>, w"))]
3656           UNSPEC_PRED_X))]
3657   "TARGET_SVE"
3658   "@
3659    #
3660    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3661    #
3662    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3663   ; Split the unpredicated form after reload, so that we don't have
3664   ; the unnecessary PTRUE.
3665   "&& reload_completed
3666    && !register_operand (operands[3], <MODE>mode)"
3667   [(set (match_dup 0)
3668         (SVE_INT_BINARY_IMM:SVE_I (match_dup 2) (match_dup 3)))]
3669   ""
3670   [(set_attr "movprfx" "*,*,yes,yes")]
3671 )
3672
3673 ;; Unpredicated binary operations with a constant (post-RA only).
3674 ;; These are generated by splitting a predicated instruction whose
3675 ;; predicate is unused.
3676 (define_insn "*post_ra_<optab><mode>3"
3677   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3678         (SVE_INT_BINARY_IMM:SVE_I
3679           (match_operand:SVE_I 1 "register_operand" "0, w")
3680           (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_immediate")))]
3681   "TARGET_SVE && reload_completed"
3682   "@
3683    <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2
3684    movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2"
3685   [(set_attr "movprfx" "*,yes")]
3686 )
3687
3688 ;; Predicated integer operations with merging.
3689 (define_expand "@cond_<optab><mode>"
3690   [(set (match_operand:SVE_I 0 "register_operand")
3691         (unspec:SVE_I
3692           [(match_operand:<VPRED> 1 "register_operand")
3693            (SVE_INT_BINARY:SVE_I
3694              (match_operand:SVE_I 2 "register_operand")
3695              (match_operand:SVE_I 3 "<sve_pred_int_rhs2_operand>"))
3696            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
3697           UNSPEC_SEL))]
3698   "TARGET_SVE"
3699 )
3700
3701 ;; Predicated integer operations, merging with the first input.
3702 (define_insn "*cond_<optab><mode>_2"
3703   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3704         (unspec:SVE_I
3705           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3706            (SVE_INT_BINARY:SVE_I
3707              (match_operand:SVE_I 2 "register_operand" "0, w")
3708              (match_operand:SVE_I 3 "register_operand" "w, w"))
3709            (match_dup 2)]
3710           UNSPEC_SEL))]
3711   "TARGET_SVE"
3712   "@
3713    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3714    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3715   [(set_attr "movprfx" "*,yes")]
3716 )
3717
3718 ;; Predicated integer operations, merging with the second input.
3719 (define_insn "*cond_<optab><mode>_3"
3720   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3721         (unspec:SVE_I
3722           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3723            (SVE_INT_BINARY:SVE_I
3724              (match_operand:SVE_I 2 "register_operand" "w, w")
3725              (match_operand:SVE_I 3 "register_operand" "0, w"))
3726            (match_dup 3)]
3727           UNSPEC_SEL))]
3728   "TARGET_SVE"
3729   "@
3730    <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3731    movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
3732   [(set_attr "movprfx" "*,yes")]
3733 )
3734
3735 ;; Predicated integer operations, merging with an independent value.
3736 (define_insn_and_rewrite "*cond_<optab><mode>_any"
3737   [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
3738         (unspec:SVE_I
3739           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
3740            (SVE_INT_BINARY:SVE_I
3741              (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
3742              (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))
3743            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
3744           UNSPEC_SEL))]
3745   "TARGET_SVE
3746    && !rtx_equal_p (operands[2], operands[4])
3747    && !rtx_equal_p (operands[3], operands[4])"
3748   "@
3749    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3750    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3751    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3752    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3753    #"
3754   "&& reload_completed
3755    && register_operand (operands[4], <MODE>mode)
3756    && !rtx_equal_p (operands[0], operands[4])"
3757   {
3758     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
3759                                              operands[4], operands[1]));
3760     operands[4] = operands[2] = operands[0];
3761   }
3762   [(set_attr "movprfx" "yes")]
3763 )
3764
3765 ;; -------------------------------------------------------------------------
3766 ;; ---- [INT] Addition
3767 ;; -------------------------------------------------------------------------
3768 ;; Includes:
3769 ;; - ADD
3770 ;; - DECB
3771 ;; - DECD
3772 ;; - DECH
3773 ;; - DECW
3774 ;; - INCB
3775 ;; - INCD
3776 ;; - INCH
3777 ;; - INCW
3778 ;; - SUB
3779 ;; -------------------------------------------------------------------------
3780
3781 (define_insn "add<mode>3"
3782   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?w, ?w, w")
3783         (plus:SVE_I
3784           (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w, w, w")
3785           (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))]
3786   "TARGET_SVE"
3787   "@
3788    add\t%0.<Vetype>, %0.<Vetype>, #%D2
3789    sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3790    * return aarch64_output_sve_vector_inc_dec (\"%0.<Vetype>\", operands[2]);
3791    movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
3792    movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3793    add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3794   [(set_attr "movprfx" "*,*,*,yes,yes,*")]
3795 )
3796
3797 ;; Merging forms are handled through SVE_INT_BINARY.
3798
3799 ;; -------------------------------------------------------------------------
3800 ;; ---- [INT] Subtraction
3801 ;; -------------------------------------------------------------------------
3802 ;; Includes:
3803 ;; - SUB
3804 ;; - SUBR
3805 ;; -------------------------------------------------------------------------
3806
3807 (define_insn "sub<mode>3"
3808   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
3809         (minus:SVE_I
3810           (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa, vsa")
3811           (match_operand:SVE_I 2 "register_operand" "w, 0, w")))]
3812   "TARGET_SVE"
3813   "@
3814    sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
3815    subr\t%0.<Vetype>, %0.<Vetype>, #%D1
3816    movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
3817   [(set_attr "movprfx" "*,*,yes")]
3818 )
3819
3820 ;; Merging forms are handled through SVE_INT_BINARY.
3821
3822 ;; -------------------------------------------------------------------------
3823 ;; ---- [INT] Take address
3824 ;; -------------------------------------------------------------------------
3825 ;; Includes:
3826 ;; - ADR
3827 ;; -------------------------------------------------------------------------
3828
3829 ;; An unshifted and unscaled ADR.  This is functionally equivalent to an ADD,
3830 ;; but the svadrb intrinsics should preserve the user's choice.
3831 (define_insn "@aarch64_adr<mode>"
3832   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
3833         (unspec:SVE_FULL_SDI
3834           [(match_operand:SVE_FULL_SDI 1 "register_operand" "w")
3835            (match_operand:SVE_FULL_SDI 2 "register_operand" "w")]
3836           UNSPEC_ADR))]
3837   "TARGET_SVE"
3838   "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>]"
3839 )
3840
3841 ;; Same, but with the offset being sign-extended from the low 32 bits.
3842 (define_insn_and_rewrite "*aarch64_adr_sxtw"
3843   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3844         (unspec:VNx2DI
3845           [(match_operand:VNx2DI 1 "register_operand" "w")
3846            (unspec:VNx2DI
3847              [(match_operand 3)
3848               (sign_extend:VNx2DI
3849                 (truncate:VNx2SI
3850                   (match_operand:VNx2DI 2 "register_operand" "w")))]
3851              UNSPEC_PRED_X)]
3852           UNSPEC_ADR))]
3853   "TARGET_SVE"
3854   "adr\t%0.d, [%1.d, %2.d, sxtw]"
3855   "&& !CONSTANT_P (operands[3])"
3856   {
3857     operands[3] = CONSTM1_RTX (VNx2BImode);
3858   }
3859 )
3860
3861 ;; Same, but with the offset being zero-extended from the low 32 bits.
3862 (define_insn "*aarch64_adr_uxtw_unspec"
3863   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3864         (unspec:VNx2DI
3865           [(match_operand:VNx2DI 1 "register_operand" "w")
3866            (and:VNx2DI
3867              (match_operand:VNx2DI 2 "register_operand" "w")
3868              (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))]
3869           UNSPEC_ADR))]
3870   "TARGET_SVE"
3871   "adr\t%0.d, [%1.d, %2.d, uxtw]"
3872 )
3873
3874 ;; Same, matching as a PLUS rather than unspec.
3875 (define_insn "*aarch64_adr_uxtw_and"
3876   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3877         (plus:VNx2DI
3878           (and:VNx2DI
3879             (match_operand:VNx2DI 2 "register_operand" "w")
3880             (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))
3881           (match_operand:VNx2DI 1 "register_operand" "w")))]
3882   "TARGET_SVE"
3883   "adr\t%0.d, [%1.d, %2.d, uxtw]"
3884 )
3885
3886 ;; ADR with a nonzero shift.
3887 (define_expand "@aarch64_adr<mode>_shift"
3888   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
3889         (plus:SVE_FULL_SDI
3890           (unspec:SVE_FULL_SDI
3891             [(match_dup 4)
3892              (ashift:SVE_FULL_SDI
3893                (match_operand:SVE_FULL_SDI 2 "register_operand")
3894                (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
3895             UNSPEC_PRED_X)
3896           (match_operand:SVE_FULL_SDI 1 "register_operand")))]
3897   "TARGET_SVE"
3898   {
3899     operands[4] = CONSTM1_RTX (<VPRED>mode);
3900   }
3901 )
3902
3903 (define_insn_and_rewrite "*aarch64_adr<mode>_shift"
3904   [(set (match_operand:SVE_24I 0 "register_operand" "=w")
3905         (plus:SVE_24I
3906           (unspec:SVE_24I
3907             [(match_operand 4)
3908              (ashift:SVE_24I
3909                (match_operand:SVE_24I 2 "register_operand" "w")
3910                (match_operand:SVE_24I 3 "const_1_to_3_operand"))]
3911             UNSPEC_PRED_X)
3912           (match_operand:SVE_24I 1 "register_operand" "w")))]
3913   "TARGET_SVE"
3914   "adr\t%0.<Vctype>, [%1.<Vctype>, %2.<Vctype>, lsl %3]"
3915   "&& !CONSTANT_P (operands[4])"
3916   {
3917     operands[4] = CONSTM1_RTX (<VPRED>mode);
3918   }
3919 )
3920
3921 ;; Same, but with the index being sign-extended from the low 32 bits.
3922 (define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
3923   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3924         (plus:VNx2DI
3925           (unspec:VNx2DI
3926             [(match_operand 4)
3927              (ashift:VNx2DI
3928                (unspec:VNx2DI
3929                  [(match_operand 5)
3930                   (sign_extend:VNx2DI
3931                     (truncate:VNx2SI
3932                       (match_operand:VNx2DI 2 "register_operand" "w")))]
3933                  UNSPEC_PRED_X)
3934                (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3935             UNSPEC_PRED_X)
3936           (match_operand:VNx2DI 1 "register_operand" "w")))]
3937   "TARGET_SVE"
3938   "adr\t%0.d, [%1.d, %2.d, sxtw %3]"
3939   "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
3940   {
3941     operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
3942   }
3943 )
3944
3945 ;; Same, but with the index being zero-extended from the low 32 bits.
3946 (define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
3947   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3948         (plus:VNx2DI
3949           (unspec:VNx2DI
3950             [(match_operand 5)
3951              (ashift:VNx2DI
3952                (and:VNx2DI
3953                  (match_operand:VNx2DI 2 "register_operand" "w")
3954                  (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
3955                (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3956             UNSPEC_PRED_X)
3957           (match_operand:VNx2DI 1 "register_operand" "w")))]
3958   "TARGET_SVE"
3959   "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
3960   "&& !CONSTANT_P (operands[5])"
3961   {
3962     operands[5] = CONSTM1_RTX (VNx2BImode);
3963   }
3964 )
3965
3966 ;; -------------------------------------------------------------------------
3967 ;; ---- [INT] Absolute difference
3968 ;; -------------------------------------------------------------------------
3969 ;; Includes:
3970 ;; - SABD
3971 ;; - UABD
3972 ;; -------------------------------------------------------------------------
3973
3974 ;; Unpredicated integer absolute difference.
3975 (define_expand "<su>abd<mode>_3"
3976   [(use (match_operand:SVE_I 0 "register_operand"))
3977    (USMAX:SVE_I
3978      (match_operand:SVE_I 1 "register_operand")
3979      (match_operand:SVE_I 2 "register_operand"))]
3980   "TARGET_SVE"
3981   {
3982     rtx pred = aarch64_ptrue_reg (<VPRED>mode);
3983     emit_insn (gen_aarch64_pred_<su>abd<mode> (operands[0], pred, operands[1],
3984                                                operands[2]));
3985     DONE;
3986   }
3987 )
3988
3989 ;; Predicated integer absolute difference.
3990 (define_insn "@aarch64_pred_<su>abd<mode>"
3991   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3992         (minus:SVE_I
3993           (unspec:SVE_I
3994             [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3995              (USMAX:SVE_I
3996                (match_operand:SVE_I 2 "register_operand" "%0, w")
3997                (match_operand:SVE_I 3 "register_operand" "w, w"))]
3998             UNSPEC_PRED_X)
3999           (unspec:SVE_I
4000             [(match_dup 1)
4001              (<max_opp>:SVE_I
4002                (match_dup 2)
4003                (match_dup 3))]
4004             UNSPEC_PRED_X)))]
4005   "TARGET_SVE"
4006   "@
4007    <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4008    movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4009   [(set_attr "movprfx" "*,yes")]
4010 )
4011
4012 (define_expand "@aarch64_cond_<su>abd<mode>"
4013   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4014         (unspec:SVE_FULL_I
4015           [(match_operand:<VPRED> 1 "register_operand")
4016            (minus:SVE_FULL_I
4017              (unspec:SVE_FULL_I
4018                [(match_dup 1)
4019                 (USMAX:SVE_FULL_I
4020                   (match_operand:SVE_FULL_I 2 "register_operand")
4021                   (match_operand:SVE_FULL_I 3 "register_operand"))]
4022                UNSPEC_PRED_X)
4023              (unspec:SVE_FULL_I
4024                [(match_dup 1)
4025                 (<max_opp>:SVE_FULL_I
4026                   (match_dup 2)
4027                   (match_dup 3))]
4028                UNSPEC_PRED_X))
4029            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4030           UNSPEC_SEL))]
4031   "TARGET_SVE"
4032 {
4033   if (rtx_equal_p (operands[3], operands[4]))
4034     std::swap (operands[2], operands[3]);
4035 })
4036
4037 ;; Predicated integer absolute difference, merging with the first input.
4038 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2"
4039   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4040         (unspec:SVE_I
4041           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4042            (minus:SVE_I
4043              (unspec:SVE_I
4044                [(match_operand 4)
4045                 (USMAX:SVE_I
4046                   (match_operand:SVE_I 2 "register_operand" "0, w")
4047                   (match_operand:SVE_I 3 "register_operand" "w, w"))]
4048                UNSPEC_PRED_X)
4049              (unspec:SVE_I
4050                [(match_operand 5)
4051                 (<max_opp>:SVE_I
4052                   (match_dup 2)
4053                   (match_dup 3))]
4054                UNSPEC_PRED_X))
4055            (match_dup 2)]
4056           UNSPEC_SEL))]
4057   "TARGET_SVE"
4058   "@
4059    <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4060    movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4061   "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
4062   {
4063     operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
4064   }
4065   [(set_attr "movprfx" "*,yes")]
4066 )
4067
4068 ;; Predicated integer absolute difference, merging with the second input.
4069 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_3"
4070   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4071         (unspec:SVE_I
4072           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4073            (minus:SVE_I
4074              (unspec:SVE_I
4075                [(match_operand 4)
4076                 (USMAX:SVE_I
4077                   (match_operand:SVE_I 2 "register_operand" "w, w")
4078                   (match_operand:SVE_I 3 "register_operand" "0, w"))]
4079                UNSPEC_PRED_X)
4080              (unspec:SVE_I
4081                [(match_operand 5)
4082                 (<max_opp>:SVE_I
4083                   (match_dup 2)
4084                   (match_dup 3))]
4085                UNSPEC_PRED_X))
4086            (match_dup 3)]
4087           UNSPEC_SEL))]
4088   "TARGET_SVE"
4089   "@
4090    <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4091    movprfx\t%0, %3\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
4092   "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
4093   {
4094     operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
4095   }
4096   [(set_attr "movprfx" "*,yes")]
4097 )
4098
4099 ;; Predicated integer absolute difference, merging with an independent value.
4100 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any"
4101   [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4102         (unspec:SVE_I
4103           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
4104            (minus:SVE_I
4105              (unspec:SVE_I
4106                [(match_operand 5)
4107                 (USMAX:SVE_I
4108                   (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
4109                   (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))]
4110                UNSPEC_PRED_X)
4111              (unspec:SVE_I
4112                [(match_operand 6)
4113                 (<max_opp>:SVE_I
4114                   (match_dup 2)
4115                   (match_dup 3))]
4116                UNSPEC_PRED_X))
4117            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
4118           UNSPEC_SEL))]
4119   "TARGET_SVE
4120    && !rtx_equal_p (operands[2], operands[4])
4121    && !rtx_equal_p (operands[3], operands[4])"
4122   "@
4123    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4124    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4125    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4126    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4127    #"
4128   "&& 1"
4129   {
4130     if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6]))
4131       operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode);
4132     else if (reload_completed
4133              && register_operand (operands[4], <MODE>mode)
4134              && !rtx_equal_p (operands[0], operands[4]))
4135       {
4136         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4137                                                  operands[4], operands[1]));
4138         operands[4] = operands[2] = operands[0];
4139       }
4140     else
4141       FAIL;
4142   }
4143   [(set_attr "movprfx" "yes")]
4144 )
4145
4146 ;; -------------------------------------------------------------------------
4147 ;; ---- [INT] Saturating addition and subtraction
4148 ;; -------------------------------------------------------------------------
4149 ;; - SQADD
4150 ;; - SQSUB
4151 ;; - UQADD
4152 ;; - UQSUB
4153 ;; -------------------------------------------------------------------------
4154
4155 ;; Unpredicated saturating signed addition and subtraction.
4156 (define_insn "@aarch64_sve_<optab><mode>"
4157   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w, w")
4158         (SBINQOPS:SVE_FULL_I
4159           (match_operand:SVE_FULL_I 1 "register_operand" "0, 0, w, w, w")
4160           (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand" "vsQ, vsS, vsQ, vsS, w")))]
4161   "TARGET_SVE"
4162   "@
4163    <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4164    <binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
4165    movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4166    movprfx\t%0, %1\;<binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
4167    <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4168   [(set_attr "movprfx" "*,*,yes,yes,*")]
4169 )
4170
4171 ;; Unpredicated saturating unsigned addition and subtraction.
4172 (define_insn "@aarch64_sve_<optab><mode>"
4173   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, w")
4174         (UBINQOPS:SVE_FULL_I
4175           (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w")
4176           (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand" "vsa, vsa, w")))]
4177   "TARGET_SVE"
4178   "@
4179    <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4180    movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4181    <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4182   [(set_attr "movprfx" "*,yes,*")]
4183 )
4184
4185 ;; -------------------------------------------------------------------------
4186 ;; ---- [INT] Highpart multiplication
4187 ;; -------------------------------------------------------------------------
4188 ;; Includes:
4189 ;; - SMULH
4190 ;; - UMULH
4191 ;; -------------------------------------------------------------------------
4192
4193 ;; Unpredicated highpart multiplication.
4194 (define_expand "<su>mul<mode>3_highpart"
4195   [(set (match_operand:SVE_I 0 "register_operand")
4196         (unspec:SVE_I
4197           [(match_dup 3)
4198            (unspec:SVE_I
4199              [(match_operand:SVE_I 1 "register_operand")
4200               (match_operand:SVE_I 2 "register_operand")]
4201              MUL_HIGHPART)]
4202           UNSPEC_PRED_X))]
4203   "TARGET_SVE"
4204   {
4205     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4206   }
4207 )
4208
4209 ;; Predicated highpart multiplication.
4210 (define_insn "@aarch64_pred_<optab><mode>"
4211   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4212         (unspec:SVE_I
4213           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4214            (unspec:SVE_I
4215              [(match_operand:SVE_I 2 "register_operand" "%0, w")
4216               (match_operand:SVE_I 3 "register_operand" "w, w")]
4217              MUL_HIGHPART)]
4218           UNSPEC_PRED_X))]
4219   "TARGET_SVE"
4220   "@
4221    <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4222    movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4223   [(set_attr "movprfx" "*,yes")]
4224 )
4225
4226 ;; Predicated highpart multiplications with merging.
4227 (define_expand "@cond_<optab><mode>"
4228   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4229         (unspec:SVE_FULL_I
4230           [(match_operand:<VPRED> 1 "register_operand")
4231            (unspec:SVE_FULL_I
4232              [(match_operand:SVE_FULL_I 2 "register_operand")
4233               (match_operand:SVE_FULL_I 3 "register_operand")]
4234              MUL_HIGHPART)
4235            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4236           UNSPEC_SEL))]
4237   "TARGET_SVE"
4238 {
4239   /* Only target code is aware of these operations, so we don't need
4240      to handle the fully-general case.  */
4241   gcc_assert (rtx_equal_p (operands[2], operands[4])
4242               || CONSTANT_P (operands[4]));
4243 })
4244
4245 ;; Predicated highpart multiplications, merging with the first input.
4246 (define_insn "*cond_<optab><mode>_2"
4247   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4248         (unspec:SVE_FULL_I
4249           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4250            (unspec:SVE_FULL_I
4251              [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4252               (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
4253              MUL_HIGHPART)
4254            (match_dup 2)]
4255           UNSPEC_SEL))]
4256   "TARGET_SVE"
4257   "@
4258    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4259    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4260   [(set_attr "movprfx" "*,yes")])
4261
4262 ;; Predicated highpart multiplications, merging with zero.
4263 (define_insn "*cond_<optab><mode>_z"
4264   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w")
4265         (unspec:SVE_FULL_I
4266           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4267            (unspec:SVE_FULL_I
4268              [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
4269               (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
4270              MUL_HIGHPART)
4271            (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
4272           UNSPEC_SEL))]
4273   "TARGET_SVE"
4274   "@
4275    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4276    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4277   [(set_attr "movprfx" "yes")])
4278
4279 ;; -------------------------------------------------------------------------
4280 ;; ---- [INT] Division
4281 ;; -------------------------------------------------------------------------
4282 ;; Includes:
4283 ;; - SDIV
4284 ;; - SDIVR
4285 ;; - UDIV
4286 ;; - UDIVR
4287 ;; -------------------------------------------------------------------------
4288
4289 ;; Unpredicated integer division.
4290 (define_expand "<optab><mode>3"
4291   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4292         (unspec:SVE_FULL_SDI
4293           [(match_dup 3)
4294            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4295              (match_operand:SVE_FULL_SDI 1 "register_operand")
4296              (match_operand:SVE_FULL_SDI 2 "register_operand"))]
4297           UNSPEC_PRED_X))]
4298   "TARGET_SVE"
4299   {
4300     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4301   }
4302 )
4303
4304 ;; Integer division predicated with a PTRUE.
4305 (define_insn "@aarch64_pred_<optab><mode>"
4306   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w, ?&w")
4307         (unspec:SVE_FULL_SDI
4308           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4309            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4310              (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w")
4311              (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w"))]
4312           UNSPEC_PRED_X))]
4313   "TARGET_SVE"
4314   "@
4315    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4316    <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4317    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4318   [(set_attr "movprfx" "*,*,yes")]
4319 )
4320
4321 ;; Predicated integer division with merging.
4322 (define_expand "@cond_<optab><mode>"
4323   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4324         (unspec:SVE_FULL_SDI
4325           [(match_operand:<VPRED> 1 "register_operand")
4326            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4327              (match_operand:SVE_FULL_SDI 2 "register_operand")
4328              (match_operand:SVE_FULL_SDI 3 "register_operand"))
4329            (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
4330           UNSPEC_SEL))]
4331   "TARGET_SVE"
4332 )
4333
4334 ;; Predicated integer division, merging with the first input.
4335 (define_insn "*cond_<optab><mode>_2"
4336   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4337         (unspec:SVE_FULL_SDI
4338           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4339            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4340              (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w")
4341              (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))
4342            (match_dup 2)]
4343           UNSPEC_SEL))]
4344   "TARGET_SVE"
4345   "@
4346    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4347    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4348   [(set_attr "movprfx" "*,yes")]
4349 )
4350
4351 ;; Predicated integer division, merging with the second input.
4352 (define_insn "*cond_<optab><mode>_3"
4353   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4354         (unspec:SVE_FULL_SDI
4355           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4356            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4357              (match_operand:SVE_FULL_SDI 2 "register_operand" "w, w")
4358              (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w"))
4359            (match_dup 3)]
4360           UNSPEC_SEL))]
4361   "TARGET_SVE"
4362   "@
4363    <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4364    movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
4365   [(set_attr "movprfx" "*,yes")]
4366 )
4367
4368 ;; Predicated integer division, merging with an independent value.
4369 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4370   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4371         (unspec:SVE_FULL_SDI
4372           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
4373            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4374              (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w, w, w")
4375              (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w, w, w"))
4376            (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
4377           UNSPEC_SEL))]
4378   "TARGET_SVE
4379    && !rtx_equal_p (operands[2], operands[4])
4380    && !rtx_equal_p (operands[3], operands[4])"
4381   "@
4382    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4383    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4384    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4385    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4386    #"
4387   "&& reload_completed
4388    && register_operand (operands[4], <MODE>mode)
4389    && !rtx_equal_p (operands[0], operands[4])"
4390   {
4391     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4392                                              operands[4], operands[1]));
4393     operands[4] = operands[2] = operands[0];
4394   }
4395   [(set_attr "movprfx" "yes")]
4396 )
4397
4398 ;; -------------------------------------------------------------------------
4399 ;; ---- [INT] Binary logical operations
4400 ;; -------------------------------------------------------------------------
4401 ;; Includes:
4402 ;; - AND
4403 ;; - EOR
4404 ;; - ORR
4405 ;; -------------------------------------------------------------------------
4406
4407 ;; Unpredicated integer binary logical operations.
4408 (define_insn "<optab><mode>3"
4409   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?w, w")
4410         (LOGICAL:SVE_I
4411           (match_operand:SVE_I 1 "register_operand" "%0, w, w")
4412           (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))]
4413   "TARGET_SVE"
4414   "@
4415    <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4416    movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4417    <logical>\t%0.d, %1.d, %2.d"
4418   [(set_attr "movprfx" "*,yes,*")]
4419 )
4420
4421 ;; Merging forms are handled through SVE_INT_BINARY.
4422
4423 ;; -------------------------------------------------------------------------
4424 ;; ---- [INT] Binary logical operations (inverted second input)
4425 ;; -------------------------------------------------------------------------
4426 ;; Includes:
4427 ;; - BIC
4428 ;; -------------------------------------------------------------------------
4429
4430 ;; Unpredicated BIC.
4431 (define_expand "@aarch64_bic<mode>"
4432   [(set (match_operand:SVE_I 0 "register_operand")
4433         (and:SVE_I
4434           (unspec:SVE_I
4435             [(match_dup 3)
4436              (not:SVE_I (match_operand:SVE_I 2 "register_operand"))]
4437             UNSPEC_PRED_X)
4438           (match_operand:SVE_I 1 "register_operand")))]
4439   "TARGET_SVE"
4440   {
4441     operands[3] = CONSTM1_RTX (<VPRED>mode);
4442   }
4443 )
4444
4445 ;; Predicated BIC.
4446 (define_insn_and_rewrite "*bic<mode>3"
4447   [(set (match_operand:SVE_I 0 "register_operand" "=w")
4448         (and:SVE_I
4449           (unspec:SVE_I
4450             [(match_operand 3)
4451              (not:SVE_I
4452                (match_operand:SVE_I 2 "register_operand" "w"))]
4453             UNSPEC_PRED_X)
4454           (match_operand:SVE_I 1 "register_operand" "w")))]
4455   "TARGET_SVE"
4456   "bic\t%0.d, %1.d, %2.d"
4457   "&& !CONSTANT_P (operands[3])"
4458   {
4459     operands[3] = CONSTM1_RTX (<VPRED>mode);
4460   }
4461 )
4462
4463 ;; Predicated BIC with merging.
4464 (define_expand "@cond_bic<mode>"
4465   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4466         (unspec:SVE_FULL_I
4467           [(match_operand:<VPRED> 1 "register_operand")
4468            (and:SVE_FULL_I
4469              (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand"))
4470              (match_operand:SVE_FULL_I 2 "register_operand"))
4471            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4472           UNSPEC_SEL))]
4473   "TARGET_SVE"
4474 )
4475
4476 ;; Predicated integer BIC, merging with the first input.
4477 (define_insn "*cond_bic<mode>_2"
4478   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4479         (unspec:SVE_I
4480           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4481            (and:SVE_I
4482              (not:SVE_I
4483                (match_operand:SVE_I 3 "register_operand" "w, w"))
4484              (match_operand:SVE_I 2 "register_operand" "0, w"))
4485            (match_dup 2)]
4486           UNSPEC_SEL))]
4487   "TARGET_SVE"
4488   "@
4489    bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4490    movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4491   [(set_attr "movprfx" "*,yes")]
4492 )
4493
4494 ;; Predicated integer BIC, merging with an independent value.
4495 (define_insn_and_rewrite "*cond_bic<mode>_any"
4496   [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, ?&w")
4497         (unspec:SVE_I
4498           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4499            (and:SVE_I
4500              (not:SVE_I
4501                (match_operand:SVE_I 3 "register_operand" "w, w, w, w"))
4502              (match_operand:SVE_I 2 "register_operand" "0, w, w, w"))
4503            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4504           UNSPEC_SEL))]
4505   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4506   "@
4507    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4508    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4509    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4510    #"
4511   "&& reload_completed
4512    && register_operand (operands[4], <MODE>mode)
4513    && !rtx_equal_p (operands[0], operands[4])"
4514   {
4515     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4516                                              operands[4], operands[1]));
4517     operands[4] = operands[2] = operands[0];
4518   }
4519   [(set_attr "movprfx" "yes")]
4520 )
4521
4522 ;; -------------------------------------------------------------------------
4523 ;; ---- [INT] Shifts (rounding towards -Inf)
4524 ;; -------------------------------------------------------------------------
4525 ;; Includes:
4526 ;; - ASR
4527 ;; - ASRR
4528 ;; - LSL
4529 ;; - LSLR
4530 ;; - LSR
4531 ;; - LSRR
4532 ;; -------------------------------------------------------------------------
4533
4534 ;; Unpredicated shift by a scalar, which expands into one of the vector
4535 ;; shifts below.
4536 (define_expand "<ASHIFT:optab><mode>3"
4537   [(set (match_operand:SVE_I 0 "register_operand")
4538         (ASHIFT:SVE_I
4539           (match_operand:SVE_I 1 "register_operand")
4540           (match_operand:<VEL> 2 "general_operand")))]
4541   "TARGET_SVE"
4542   {
4543     rtx amount;
4544     if (CONST_INT_P (operands[2]))
4545       {
4546         amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
4547         if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
4548           amount = force_reg (<MODE>mode, amount);
4549       }
4550     else
4551       {
4552         amount = convert_to_mode (<VEL>mode, operands[2], 0);
4553         amount = expand_vector_broadcast (<MODE>mode, amount);
4554       }
4555     emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
4556     DONE;
4557   }
4558 )
4559
4560 ;; Unpredicated shift by a vector.
4561 (define_expand "v<optab><mode>3"
4562   [(set (match_operand:SVE_I 0 "register_operand")
4563         (unspec:SVE_I
4564           [(match_dup 3)
4565            (ASHIFT:SVE_I
4566              (match_operand:SVE_I 1 "register_operand")
4567              (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
4568           UNSPEC_PRED_X))]
4569   "TARGET_SVE"
4570   {
4571     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4572   }
4573 )
4574
4575 ;; Shift by a vector, predicated with a PTRUE.  We don't actually need
4576 ;; the predicate for the first alternative, but using Upa or X isn't
4577 ;; likely to gain much and would make the instruction seem less uniform
4578 ;; to the register allocator.
4579 (define_insn_and_split "@aarch64_pred_<optab><mode>"
4580   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w")
4581         (unspec:SVE_I
4582           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4583            (ASHIFT:SVE_I
4584              (match_operand:SVE_I 2 "register_operand" "w, 0, w, w")
4585              (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))]
4586           UNSPEC_PRED_X))]
4587   "TARGET_SVE"
4588   "@
4589    #
4590    <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4591    <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype>
4592    movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4593   "&& reload_completed
4594    && !register_operand (operands[3], <MODE>mode)"
4595   [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
4596   ""
4597   [(set_attr "movprfx" "*,*,*,yes")]
4598 )
4599
4600 ;; Unpredicated shift operations by a constant (post-RA only).
4601 ;; These are generated by splitting a predicated instruction whose
4602 ;; predicate is unused.
4603 (define_insn "*post_ra_v<optab><mode>3"
4604   [(set (match_operand:SVE_I 0 "register_operand" "=w")
4605         (ASHIFT:SVE_I
4606           (match_operand:SVE_I 1 "register_operand" "w")
4607           (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))]
4608   "TARGET_SVE && reload_completed"
4609   "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
4610 )
4611
4612 ;; Predicated integer shift, merging with the first input.
4613 (define_insn "*cond_<optab><mode>_2_const"
4614   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4615         (unspec:SVE_I
4616           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4617            (ASHIFT:SVE_I
4618              (match_operand:SVE_I 2 "register_operand" "0, w")
4619              (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
4620            (match_dup 2)]
4621          UNSPEC_SEL))]
4622   "TARGET_SVE"
4623   "@
4624    <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4625    movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4626   [(set_attr "movprfx" "*,yes")]
4627 )
4628
4629 ;; Predicated integer shift, merging with an independent value.
4630 (define_insn_and_rewrite "*cond_<optab><mode>_any_const"
4631   [(set (match_operand:SVE_I 0 "register_operand" "=w, &w, ?&w")
4632         (unspec:SVE_I
4633           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4634            (ASHIFT:SVE_I
4635              (match_operand:SVE_I 2 "register_operand" "w, w, w")
4636              (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
4637            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
4638          UNSPEC_SEL))]
4639   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4640   "@
4641    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4642    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4643    #"
4644   "&& reload_completed
4645    && register_operand (operands[4], <MODE>mode)
4646    && !rtx_equal_p (operands[0], operands[4])"
4647   {
4648     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4649                                              operands[4], operands[1]));
4650     operands[4] = operands[2] = operands[0];
4651   }
4652   [(set_attr "movprfx" "yes")]
4653 )
4654
4655 ;; Unpredicated shifts of narrow elements by 64-bit amounts.
4656 (define_insn "@aarch64_sve_<sve_int_op><mode>"
4657   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
4658         (unspec:SVE_FULL_BHSI
4659           [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")
4660            (match_operand:VNx2DI 2 "register_operand" "w")]
4661           SVE_SHIFT_WIDE))]
4662   "TARGET_SVE"
4663   "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.d"
4664 )
4665
4666 ;; Merging predicated shifts of narrow elements by 64-bit amounts.
4667 (define_expand "@cond_<sve_int_op><mode>"
4668   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
4669         (unspec:SVE_FULL_BHSI
4670           [(match_operand:<VPRED> 1 "register_operand")
4671            (unspec:SVE_FULL_BHSI
4672              [(match_operand:SVE_FULL_BHSI 2 "register_operand")
4673               (match_operand:VNx2DI 3 "register_operand")]
4674              SVE_SHIFT_WIDE)
4675            (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")]
4676           UNSPEC_SEL))]
4677   "TARGET_SVE"
4678 )
4679
4680 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with
4681 ;; the first input.
4682 (define_insn "*cond_<sve_int_op><mode>_m"
4683   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w, ?&w")
4684         (unspec:SVE_FULL_BHSI
4685           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4686            (unspec:SVE_FULL_BHSI
4687              [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
4688               (match_operand:VNx2DI 3 "register_operand" "w, w")]
4689              SVE_SHIFT_WIDE)
4690            (match_dup 2)]
4691          UNSPEC_SEL))]
4692   "TARGET_SVE"
4693   "@
4694    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4695    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4696   [(set_attr "movprfx" "*, yes")])
4697
4698 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero.
4699 (define_insn "*cond_<sve_int_op><mode>_z"
4700   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=&w, &w")
4701         (unspec:SVE_FULL_BHSI
4702           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4703            (unspec:SVE_FULL_BHSI
4704              [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
4705               (match_operand:VNx2DI 3 "register_operand" "w, w")]
4706              SVE_SHIFT_WIDE)
4707            (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")]
4708          UNSPEC_SEL))]
4709   "TARGET_SVE"
4710   "@
4711    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4712    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4713   [(set_attr "movprfx" "yes")])
4714
4715 ;; -------------------------------------------------------------------------
4716 ;; ---- [INT] Shifts (rounding towards 0)
4717 ;; -------------------------------------------------------------------------
4718 ;; Includes:
4719 ;; - ASRD
4720 ;; - SQSHLU (SVE2)
4721 ;; - SRSHR (SVE2)
4722 ;; - URSHR (SVE2)
4723 ;; -------------------------------------------------------------------------
4724
4725 ;; Unpredicated ASRD.
4726 (define_expand "sdiv_pow2<mode>3"
4727   [(set (match_operand:SVE_I 0 "register_operand")
4728         (unspec:SVE_I
4729           [(match_dup 3)
4730            (unspec:SVE_I
4731              [(match_operand:SVE_I 1 "register_operand")
4732               (match_operand 2 "aarch64_simd_rshift_imm")]
4733              UNSPEC_ASRD)]
4734          UNSPEC_PRED_X))]
4735   "TARGET_SVE"
4736   {
4737     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4738   }
4739 )
4740
4741 ;; Predicated ASRD.
4742 (define_insn "*sdiv_pow2<mode>3"
4743   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4744         (unspec:SVE_I
4745           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4746            (unspec:SVE_I
4747              [(match_operand:SVE_I 2 "register_operand" "0, w")
4748               (match_operand:SVE_I 3 "aarch64_simd_rshift_imm")]
4749              UNSPEC_ASRD)]
4750           UNSPEC_PRED_X))]
4751   "TARGET_SVE"
4752   "@
4753    asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4754    movprfx\t%0, %2\;asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4755   [(set_attr "movprfx" "*,yes")])
4756
4757 ;; Predicated shift with merging.
4758 (define_expand "@cond_<sve_int_op><mode>"
4759   [(set (match_operand:SVE_I 0 "register_operand")
4760         (unspec:SVE_I
4761           [(match_operand:<VPRED> 1 "register_operand")
4762            (unspec:SVE_I
4763              [(match_dup 5)
4764               (unspec:SVE_I
4765                 [(match_operand:SVE_I 2 "register_operand")
4766                  (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
4767                 SVE_INT_SHIFT_IMM)]
4768              UNSPEC_PRED_X)
4769            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
4770           UNSPEC_SEL))]
4771   "TARGET_SVE"
4772   {
4773     operands[5] = aarch64_ptrue_reg (<VPRED>mode);
4774   }
4775 )
4776
4777 ;; Predicated shift, merging with the first input.
4778 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
4779   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
4780         (unspec:SVE_I
4781           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4782            (unspec:SVE_I
4783              [(match_operand 4)
4784               (unspec:SVE_I
4785                 [(match_operand:SVE_I 2 "register_operand" "0, w")
4786                  (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
4787                 SVE_INT_SHIFT_IMM)]
4788              UNSPEC_PRED_X)
4789            (match_dup 2)]
4790           UNSPEC_SEL))]
4791   "TARGET_SVE"
4792   "@
4793    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4794    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4795   "&& !CONSTANT_P (operands[4])"
4796   {
4797     operands[4] = CONSTM1_RTX (<VPRED>mode);
4798   }
4799   [(set_attr "movprfx" "*,yes")])
4800
4801 ;; Predicated shift, merging with an independent value.
4802 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
4803   [(set (match_operand:SVE_I 0 "register_operand" "=w, &w, ?&w")
4804         (unspec:SVE_I
4805           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4806            (unspec:SVE_I
4807              [(match_operand 5)
4808               (unspec:SVE_I
4809                 [(match_operand:SVE_I 2 "register_operand" "w, w, w")
4810                  (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
4811                 SVE_INT_SHIFT_IMM)]
4812              UNSPEC_PRED_X)
4813            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
4814          UNSPEC_SEL))]
4815   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4816   "@
4817    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4818    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4819    #"
4820   "&& reload_completed
4821    && register_operand (operands[4], <MODE>mode)
4822    && !rtx_equal_p (operands[0], operands[4])"
4823   {
4824     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4825                                              operands[4], operands[1]));
4826     operands[4] = operands[2] = operands[0];
4827   }
4828   [(set_attr "movprfx" "yes")]
4829 )
4830
4831 ;; -------------------------------------------------------------------------
4832 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
4833 ;; -------------------------------------------------------------------------
4834 ;; Includes:
4835 ;; - FSCALE
4836 ;; - FTSMUL
4837 ;; - FTSSEL
4838 ;; -------------------------------------------------------------------------
4839
4840 ;; Unpredicated floating-point binary operations that take an integer as
4841 ;; their second operand.
4842 (define_insn "@aarch64_sve_<optab><mode>"
4843   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4844         (unspec:SVE_FULL_F
4845           [(match_operand:SVE_FULL_F 1 "register_operand" "w")
4846            (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
4847           SVE_FP_BINARY_INT))]
4848   "TARGET_SVE"
4849   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4850 )
4851
4852 ;; Predicated floating-point binary operations that take an integer
4853 ;; as their second operand.
4854 (define_insn "@aarch64_pred_<optab><mode>"
4855   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4856         (unspec:SVE_FULL_F
4857           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4858            (match_operand:SI 4 "aarch64_sve_gp_strictness")
4859            (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4860            (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4861           SVE_COND_FP_BINARY_INT))]
4862   "TARGET_SVE"
4863   "@
4864    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4865    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4866   [(set_attr "movprfx" "*,yes")]
4867 )
4868
4869 ;; Predicated floating-point binary operations with merging, taking an
4870 ;; integer as their second operand.
4871 (define_expand "@cond_<optab><mode>"
4872   [(set (match_operand:SVE_FULL_F 0 "register_operand")
4873         (unspec:SVE_FULL_F
4874           [(match_operand:<VPRED> 1 "register_operand")
4875            (unspec:SVE_FULL_F
4876              [(match_dup 1)
4877               (const_int SVE_STRICT_GP)
4878               (match_operand:SVE_FULL_F 2 "register_operand")
4879               (match_operand:<V_INT_EQUIV> 3 "register_operand")]
4880              SVE_COND_FP_BINARY_INT)
4881            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
4882           UNSPEC_SEL))]
4883   "TARGET_SVE"
4884 )
4885
4886 ;; Predicated floating-point binary operations that take an integer as their
4887 ;; second operand, with inactive lanes coming from the first operand.
4888 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
4889   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4890         (unspec:SVE_FULL_F
4891           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4892            (unspec:SVE_FULL_F
4893              [(match_operand 4)
4894               (const_int SVE_RELAXED_GP)
4895               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4896               (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4897              SVE_COND_FP_BINARY_INT)
4898            (match_dup 2)]
4899           UNSPEC_SEL))]
4900   "TARGET_SVE"
4901   "@
4902    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4903    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4904   "&& !rtx_equal_p (operands[1], operands[4])"
4905   {
4906     operands[4] = copy_rtx (operands[1]);
4907   }
4908   [(set_attr "movprfx" "*,yes")]
4909 )
4910
4911 (define_insn "*cond_<optab><mode>_2_strict"
4912   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4913         (unspec:SVE_FULL_F
4914           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4915            (unspec:SVE_FULL_F
4916              [(match_dup 1)
4917               (const_int SVE_STRICT_GP)
4918               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4919               (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4920              SVE_COND_FP_BINARY_INT)
4921            (match_dup 2)]
4922           UNSPEC_SEL))]
4923   "TARGET_SVE"
4924   "@
4925    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4926    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4927   [(set_attr "movprfx" "*,yes")]
4928 )
4929
4930 ;; Predicated floating-point binary operations that take an integer as
4931 ;; their second operand, with the values of inactive lanes being distinct
4932 ;; from the other inputs.
4933 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
4934   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
4935         (unspec:SVE_FULL_F
4936           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4937            (unspec:SVE_FULL_F
4938              [(match_operand 5)
4939               (const_int SVE_RELAXED_GP)
4940               (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
4941               (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
4942              SVE_COND_FP_BINARY_INT)
4943            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4944           UNSPEC_SEL))]
4945   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4946   "@
4947    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4948    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4949    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4950    #"
4951   "&& 1"
4952   {
4953     if (reload_completed
4954         && register_operand (operands[4], <MODE>mode)
4955         && !rtx_equal_p (operands[0], operands[4]))
4956       {
4957         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4958                                                  operands[4], operands[1]));
4959         operands[4] = operands[2] = operands[0];
4960       }
4961     else if (!rtx_equal_p (operands[1], operands[5]))
4962       operands[5] = copy_rtx (operands[1]);
4963     else
4964       FAIL;
4965   }
4966   [(set_attr "movprfx" "yes")]
4967 )
4968
4969 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
4970   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
4971         (unspec:SVE_FULL_F
4972           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4973            (unspec:SVE_FULL_F
4974              [(match_dup 1)
4975               (const_int SVE_STRICT_GP)
4976               (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
4977               (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
4978              SVE_COND_FP_BINARY_INT)
4979            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4980           UNSPEC_SEL))]
4981   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4982   "@
4983    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4984    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4985    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4986    #"
4987   "&& reload_completed
4988    && register_operand (operands[4], <MODE>mode)
4989    && !rtx_equal_p (operands[0], operands[4])"
4990   {
4991     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4992                                              operands[4], operands[1]));
4993     operands[4] = operands[2] = operands[0];
4994   }
4995   [(set_attr "movprfx" "yes")]
4996 )
4997
4998 ;; -------------------------------------------------------------------------
4999 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
5000 ;; -------------------------------------------------------------------------
5001 ;; Includes post-RA forms of:
5002 ;; - FADD
5003 ;; - FMUL
5004 ;; - FSUB
5005 ;; -------------------------------------------------------------------------
5006
5007 ;; Unpredicated floating-point binary operations (post-RA only).
5008 ;; These are generated by splitting a predicated instruction whose
5009 ;; predicate is unused.
5010 (define_insn "*post_ra_<sve_fp_op><mode>3"
5011   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5012         (SVE_UNPRED_FP_BINARY:SVE_FULL_F
5013           (match_operand:SVE_FULL_F 1 "register_operand" "w")
5014           (match_operand:SVE_FULL_F 2 "register_operand" "w")))]
5015   "TARGET_SVE && reload_completed"
5016   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
5017
5018 ;; -------------------------------------------------------------------------
5019 ;; ---- [FP] General binary arithmetic corresponding to unspecs
5020 ;; -------------------------------------------------------------------------
5021 ;; Includes merging forms of:
5022 ;; - FADD    (constant forms handled in the "Addition" section)
5023 ;; - FDIV
5024 ;; - FDIVR
5025 ;; - FMAX
5026 ;; - FMAXNM  (including #0.0 and #1.0)
5027 ;; - FMIN
5028 ;; - FMINNM  (including #0.0 and #1.0)
5029 ;; - FMUL    (including #0.5 and #2.0)
5030 ;; - FMULX
5031 ;; - FRECPS
5032 ;; - FRSQRTS
5033 ;; - FSUB    (constant forms handled in the "Addition" section)
5034 ;; - FSUBR   (constant forms handled in the "Subtraction" section)
5035 ;; -------------------------------------------------------------------------
5036
5037 ;; Unpredicated floating-point binary operations.
5038 (define_insn "@aarch64_sve_<optab><mode>"
5039   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5040         (unspec:SVE_FULL_F
5041           [(match_operand:SVE_FULL_F 1 "register_operand" "w")
5042            (match_operand:SVE_FULL_F 2 "register_operand" "w")]
5043           SVE_FP_BINARY))]
5044   "TARGET_SVE"
5045   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
5046 )
5047
5048 ;; Unpredicated floating-point binary operations that need to be predicated
5049 ;; for SVE.
5050 (define_expand "<optab><mode>3"
5051   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5052         (unspec:SVE_FULL_F
5053           [(match_dup 3)
5054            (const_int SVE_RELAXED_GP)
5055            (match_operand:SVE_FULL_F 1 "<sve_pred_fp_rhs1_operand>")
5056            (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs2_operand>")]
5057           SVE_COND_FP_BINARY_OPTAB))]
5058   "TARGET_SVE"
5059   {
5060     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5061   }
5062 )
5063
5064 ;; Predicated floating-point binary operations that have no immediate forms.
5065 (define_insn "@aarch64_pred_<optab><mode>"
5066   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
5067         (unspec:SVE_FULL_F
5068           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5069            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5070            (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w")
5071            (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w")]
5072           SVE_COND_FP_BINARY_REG))]
5073   "TARGET_SVE"
5074   "@
5075    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5076    <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5077    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5078   [(set_attr "movprfx" "*,*,yes")]
5079 )
5080
5081 ;; Predicated floating-point operations with merging.
5082 (define_expand "@cond_<optab><mode>"
5083   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5084         (unspec:SVE_FULL_F
5085           [(match_operand:<VPRED> 1 "register_operand")
5086            (unspec:SVE_FULL_F
5087              [(match_dup 1)
5088               (const_int SVE_STRICT_GP)
5089               (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs1_operand>")
5090               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_operand>")]
5091              SVE_COND_FP_BINARY)
5092            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5093           UNSPEC_SEL))]
5094   "TARGET_SVE"
5095 )
5096
5097 ;; Predicated floating-point operations, merging with the first input.
5098 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
5099   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5100         (unspec:SVE_FULL_F
5101           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5102            (unspec:SVE_FULL_F
5103              [(match_operand 4)
5104               (const_int SVE_RELAXED_GP)
5105               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5106               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5107              SVE_COND_FP_BINARY)
5108            (match_dup 2)]
5109           UNSPEC_SEL))]
5110   "TARGET_SVE"
5111   "@
5112    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5113    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5114   "&& !rtx_equal_p (operands[1], operands[4])"
5115   {
5116     operands[4] = copy_rtx (operands[1]);
5117   }
5118   [(set_attr "movprfx" "*,yes")]
5119 )
5120
5121 (define_insn "*cond_<optab><mode>_2_strict"
5122   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5123         (unspec:SVE_FULL_F
5124           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5125            (unspec:SVE_FULL_F
5126              [(match_dup 1)
5127               (const_int SVE_STRICT_GP)
5128               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5129               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5130              SVE_COND_FP_BINARY)
5131            (match_dup 2)]
5132           UNSPEC_SEL))]
5133   "TARGET_SVE"
5134   "@
5135    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5136    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5137   [(set_attr "movprfx" "*,yes")]
5138 )
5139
5140 ;; Same for operations that take a 1-bit constant.
5141 (define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed"
5142   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5143         (unspec:SVE_FULL_F
5144           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5145            (unspec:SVE_FULL_F
5146              [(match_operand 4)
5147               (const_int SVE_RELAXED_GP)
5148               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5149               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5150              SVE_COND_FP_BINARY_I1)
5151            (match_dup 2)]
5152           UNSPEC_SEL))]
5153   "TARGET_SVE"
5154   "@
5155    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5156    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
5157   "&& !rtx_equal_p (operands[1], operands[4])"
5158   {
5159     operands[4] = copy_rtx (operands[1]);
5160   }
5161   [(set_attr "movprfx" "*,yes")]
5162 )
5163
5164 (define_insn "*cond_<optab><mode>_2_const_strict"
5165   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5166         (unspec:SVE_FULL_F
5167           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5168            (unspec:SVE_FULL_F
5169              [(match_dup 1)
5170               (const_int SVE_STRICT_GP)
5171               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5172               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5173              SVE_COND_FP_BINARY_I1)
5174            (match_dup 2)]
5175           UNSPEC_SEL))]
5176   "TARGET_SVE"
5177   "@
5178    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5179    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
5180   [(set_attr "movprfx" "*,yes")]
5181 )
5182
5183 ;; Predicated floating-point operations, merging with the second input.
5184 (define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed"
5185   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5186         (unspec:SVE_FULL_F
5187           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5188            (unspec:SVE_FULL_F
5189              [(match_operand 4)
5190               (const_int SVE_RELAXED_GP)
5191               (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
5192               (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5193              SVE_COND_FP_BINARY)
5194            (match_dup 3)]
5195           UNSPEC_SEL))]
5196   "TARGET_SVE"
5197   "@
5198    <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5199    movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
5200   "&& !rtx_equal_p (operands[1], operands[4])"
5201   {
5202     operands[4] = copy_rtx (operands[1]);
5203   }
5204   [(set_attr "movprfx" "*,yes")]
5205 )
5206
5207 (define_insn "*cond_<optab><mode>_3_strict"
5208   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5209         (unspec:SVE_FULL_F
5210           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5211            (unspec:SVE_FULL_F
5212              [(match_dup 1)
5213               (const_int SVE_STRICT_GP)
5214               (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
5215               (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5216              SVE_COND_FP_BINARY)
5217            (match_dup 3)]
5218           UNSPEC_SEL))]
5219   "TARGET_SVE"
5220   "@
5221    <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5222    movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
5223   [(set_attr "movprfx" "*,yes")]
5224 )
5225
5226 ;; Predicated floating-point operations, merging with an independent value.
5227 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
5228   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
5229         (unspec:SVE_FULL_F
5230           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5231            (unspec:SVE_FULL_F
5232              [(match_operand 5)
5233               (const_int SVE_RELAXED_GP)
5234               (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
5235               (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
5236              SVE_COND_FP_BINARY)
5237            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
5238           UNSPEC_SEL))]
5239   "TARGET_SVE
5240    && !rtx_equal_p (operands[2], operands[4])
5241    && !rtx_equal_p (operands[3], operands[4])"
5242   "@
5243    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5244    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5245    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5246    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5247    #"
5248   "&& 1"
5249   {
5250     if (reload_completed
5251         && register_operand (operands[4], <MODE>mode)
5252         && !rtx_equal_p (operands[0], operands[4]))
5253       {
5254         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5255                                                  operands[4], operands[1]));
5256         operands[4] = operands[2] = operands[0];
5257       }
5258     else if (!rtx_equal_p (operands[1], operands[5]))
5259       operands[5] = copy_rtx (operands[1]);
5260     else
5261       FAIL;
5262   }
5263   [(set_attr "movprfx" "yes")]
5264 )
5265
5266 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
5267   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
5268         (unspec:SVE_FULL_F
5269           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5270            (unspec:SVE_FULL_F
5271              [(match_dup 1)
5272               (const_int SVE_STRICT_GP)
5273               (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
5274               (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
5275              SVE_COND_FP_BINARY)
5276            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
5277           UNSPEC_SEL))]
5278   "TARGET_SVE
5279    && !rtx_equal_p (operands[2], operands[4])
5280    && !rtx_equal_p (operands[3], operands[4])"
5281   "@
5282    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5283    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5284    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5285    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5286    #"
5287   "&& reload_completed
5288    && register_operand (operands[4], <MODE>mode)
5289    && !rtx_equal_p (operands[0], operands[4])"
5290   {
5291     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5292                                              operands[4], operands[1]));
5293     operands[4] = operands[2] = operands[0];
5294   }
5295   [(set_attr "movprfx" "yes")]
5296 )
5297
5298 ;; Same for operations that take a 1-bit constant.
5299 (define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed"
5300   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5301         (unspec:SVE_FULL_F
5302           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5303            (unspec:SVE_FULL_F
5304              [(match_operand 5)
5305               (const_int SVE_RELAXED_GP)
5306               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
5307               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5308              SVE_COND_FP_BINARY_I1)
5309            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5310           UNSPEC_SEL))]
5311   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5312   "@
5313    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5314    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5315    #"
5316   "&& 1"
5317   {
5318     if (reload_completed
5319         && register_operand (operands[4], <MODE>mode)
5320         && !rtx_equal_p (operands[0], operands[4]))
5321       {
5322         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5323                                                  operands[4], operands[1]));
5324         operands[4] = operands[2] = operands[0];
5325       }
5326     else if (!rtx_equal_p (operands[1], operands[5]))
5327       operands[5] = copy_rtx (operands[1]);
5328     else
5329       FAIL;
5330   }
5331   [(set_attr "movprfx" "yes")]
5332 )
5333
5334 (define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict"
5335   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5336         (unspec:SVE_FULL_F
5337           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5338            (unspec:SVE_FULL_F
5339              [(match_dup 1)
5340               (const_int SVE_STRICT_GP)
5341               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
5342               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5343              SVE_COND_FP_BINARY_I1)
5344            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5345           UNSPEC_SEL))]
5346   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5347   "@
5348    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5349    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5350    #"
5351   "&& reload_completed
5352    && register_operand (operands[4], <MODE>mode)
5353    && !rtx_equal_p (operands[0], operands[4])"
5354   {
5355     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5356                                              operands[4], operands[1]));
5357     operands[4] = operands[2] = operands[0];
5358   }
5359   [(set_attr "movprfx" "yes")]
5360 )
5361
5362 ;; -------------------------------------------------------------------------
5363 ;; ---- [FP] Addition
5364 ;; -------------------------------------------------------------------------
5365 ;; Includes:
5366 ;; - FADD
5367 ;; - FSUB
5368 ;; -------------------------------------------------------------------------
5369
5370 ;; Predicated floating-point addition.
5371 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5372   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w, ?&w")
5373         (unspec:SVE_FULL_F
5374           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl")
5375            (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z, Ui1, i, i, Ui1")
5376            (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, 0, w, w, w")
5377            (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w, w, vsA, vsN, w")]
5378           SVE_COND_FP_ADD))]
5379   "TARGET_SVE"
5380   "@
5381    fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5382    fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5383    #
5384    fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5385    movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5386    movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5387    movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5388   ; Split the unpredicated form after reload, so that we don't have
5389   ; the unnecessary PTRUE.
5390   "&& reload_completed
5391    && register_operand (operands[3], <MODE>mode)
5392    && INTVAL (operands[4]) == SVE_RELAXED_GP"
5393   [(set (match_dup 0) (plus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5394   ""
5395   [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
5396 )
5397
5398 ;; Predicated floating-point addition of a constant, merging with the
5399 ;; first input.
5400 (define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed"
5401   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
5402         (unspec:SVE_FULL_F
5403           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5404            (unspec:SVE_FULL_F
5405              [(match_operand 4)
5406               (const_int SVE_RELAXED_GP)
5407               (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
5408               (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
5409              UNSPEC_COND_FADD)
5410            (match_dup 2)]
5411           UNSPEC_SEL))]
5412   "TARGET_SVE"
5413   "@
5414    fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5415    fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5416    movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5417    movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
5418   "&& !rtx_equal_p (operands[1], operands[4])"
5419   {
5420     operands[4] = copy_rtx (operands[1]);
5421   }
5422   [(set_attr "movprfx" "*,*,yes,yes")]
5423 )
5424
5425 (define_insn "*cond_add<mode>_2_const_strict"
5426   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
5427         (unspec:SVE_FULL_F
5428           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5429            (unspec:SVE_FULL_F
5430              [(match_dup 1)
5431               (const_int SVE_STRICT_GP)
5432               (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
5433               (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
5434              UNSPEC_COND_FADD)
5435            (match_dup 2)]
5436           UNSPEC_SEL))]
5437   "TARGET_SVE"
5438   "@
5439    fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5440    fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5441    movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5442    movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
5443   [(set_attr "movprfx" "*,*,yes,yes")]
5444 )
5445
5446 ;; Predicated floating-point addition of a constant, merging with an
5447 ;; independent value.
5448 (define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed"
5449   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
5450         (unspec:SVE_FULL_F
5451           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5452            (unspec:SVE_FULL_F
5453              [(match_operand 5)
5454               (const_int SVE_RELAXED_GP)
5455               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
5456               (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
5457              UNSPEC_COND_FADD)
5458            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
5459           UNSPEC_SEL))]
5460   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5461   "@
5462    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5463    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5464    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5465    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5466    #
5467    #"
5468   "&& 1"
5469   {
5470     if (reload_completed
5471         && register_operand (operands[4], <MODE>mode)
5472         && !rtx_equal_p (operands[0], operands[4]))
5473       {
5474         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5475                                                  operands[4], operands[1]));
5476         operands[4] = operands[2] = operands[0];
5477       }
5478     else if (!rtx_equal_p (operands[1], operands[5]))
5479       operands[5] = copy_rtx (operands[1]);
5480     else
5481       FAIL;
5482   }
5483   [(set_attr "movprfx" "yes")]
5484 )
5485
5486 (define_insn_and_rewrite "*cond_add<mode>_any_const_strict"
5487   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
5488         (unspec:SVE_FULL_F
5489           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5490            (unspec:SVE_FULL_F
5491              [(match_dup 1)
5492               (const_int SVE_STRICT_GP)
5493               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
5494               (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
5495              UNSPEC_COND_FADD)
5496            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
5497           UNSPEC_SEL))]
5498   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5499   "@
5500    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5501    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5502    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5503    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5504    #
5505    #"
5506   "&& reload_completed
5507    && register_operand (operands[4], <MODE>mode)
5508    && !rtx_equal_p (operands[0], operands[4])"
5509   {
5510     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5511                                              operands[4], operands[1]));
5512     operands[4] = operands[2] = operands[0];
5513   }
5514   [(set_attr "movprfx" "yes")]
5515 )
5516
5517 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
5518
5519 ;; -------------------------------------------------------------------------
5520 ;; ---- [FP] Complex addition
5521 ;; -------------------------------------------------------------------------
5522 ;; Includes:
5523 ;; - FCADD
5524 ;; -------------------------------------------------------------------------
5525
5526 ;; Predicated FCADD.
5527 (define_insn "@aarch64_pred_<optab><mode>"
5528   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5529         (unspec:SVE_FULL_F
5530           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5531            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5532            (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5533            (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5534           SVE_COND_FCADD))]
5535   "TARGET_SVE"
5536   "@
5537    fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5538    movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5539   [(set_attr "movprfx" "*,yes")]
5540 )
5541
5542 ;; Predicated FCADD with merging.
5543 (define_expand "@cond_<optab><mode>"
5544   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5545         (unspec:SVE_FULL_F
5546           [(match_operand:<VPRED> 1 "register_operand")
5547            (unspec:SVE_FULL_F
5548              [(match_dup 1)
5549               (const_int SVE_STRICT_GP)
5550               (match_operand:SVE_FULL_F 2 "register_operand")
5551               (match_operand:SVE_FULL_F 3 "register_operand")]
5552              SVE_COND_FCADD)
5553            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5554           UNSPEC_SEL))]
5555   "TARGET_SVE"
5556 )
5557
5558 ;; Predicated FCADD using ptrue for unpredicated optab for auto-vectorizer
5559 (define_expand "@cadd<rot><mode>3"
5560   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5561         (unspec:SVE_FULL_F
5562           [(match_dup 3)
5563            (const_int SVE_RELAXED_GP)
5564            (match_operand:SVE_FULL_F 1 "register_operand")
5565            (match_operand:SVE_FULL_F 2 "register_operand")]
5566           SVE_COND_FCADD))]
5567   "TARGET_SVE"
5568 {
5569   operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5570 })
5571
5572 ;; Predicated FCADD, merging with the first input.
5573 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
5574   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5575         (unspec:SVE_FULL_F
5576           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5577            (unspec:SVE_FULL_F
5578              [(match_operand 4)
5579               (const_int SVE_RELAXED_GP)
5580               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5581               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5582              SVE_COND_FCADD)
5583            (match_dup 2)]
5584           UNSPEC_SEL))]
5585   "TARGET_SVE"
5586   "@
5587    fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5588    movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5589   "&& !rtx_equal_p (operands[1], operands[4])"
5590   {
5591     operands[4] = copy_rtx (operands[1]);
5592   }
5593   [(set_attr "movprfx" "*,yes")]
5594 )
5595
5596 (define_insn "*cond_<optab><mode>_2_strict"
5597   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5598         (unspec:SVE_FULL_F
5599           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5600            (unspec:SVE_FULL_F
5601              [(match_dup 1)
5602               (const_int SVE_STRICT_GP)
5603               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5604               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5605              SVE_COND_FCADD)
5606            (match_dup 2)]
5607           UNSPEC_SEL))]
5608   "TARGET_SVE"
5609   "@
5610    fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5611    movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5612   [(set_attr "movprfx" "*,yes")]
5613 )
5614
5615 ;; Predicated FCADD, merging with an independent value.
5616 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
5617   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
5618         (unspec:SVE_FULL_F
5619           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5620            (unspec:SVE_FULL_F
5621              [(match_operand 5)
5622               (const_int SVE_RELAXED_GP)
5623               (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
5624               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
5625              SVE_COND_FCADD)
5626            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
5627           UNSPEC_SEL))]
5628   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5629   "@
5630    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5631    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5632    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5633    #"
5634   "&& 1"
5635   {
5636     if (reload_completed
5637         && register_operand (operands[4], <MODE>mode)
5638         && !rtx_equal_p (operands[0], operands[4]))
5639       {
5640         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5641                                                  operands[4], operands[1]));
5642         operands[4] = operands[2] = operands[0];
5643       }
5644     else if (!rtx_equal_p (operands[1], operands[5]))
5645       operands[5] = copy_rtx (operands[1]);
5646     else
5647       FAIL;
5648   }
5649   [(set_attr "movprfx" "yes")]
5650 )
5651
5652 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
5653   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
5654         (unspec:SVE_FULL_F
5655           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5656            (unspec:SVE_FULL_F
5657              [(match_dup 1)
5658               (const_int SVE_STRICT_GP)
5659               (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
5660               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
5661              SVE_COND_FCADD)
5662            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
5663           UNSPEC_SEL))]
5664   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5665   "@
5666    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5667    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5668    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5669    #"
5670   "&& reload_completed
5671    && register_operand (operands[4], <MODE>mode)
5672    && !rtx_equal_p (operands[0], operands[4])"
5673   {
5674     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5675                                              operands[4], operands[1]));
5676     operands[4] = operands[2] = operands[0];
5677   }
5678   [(set_attr "movprfx" "yes")]
5679 )
5680
5681 ;; -------------------------------------------------------------------------
5682 ;; ---- [FP] Subtraction
5683 ;; -------------------------------------------------------------------------
5684 ;; Includes:
5685 ;; - FSUB
5686 ;; - FSUBR
5687 ;; -------------------------------------------------------------------------
5688
5689 ;; Predicated floating-point subtraction.
5690 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5691   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w")
5692         (unspec:SVE_FULL_F
5693           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5694            (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, Ui1, i, Ui1")
5695            (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand" "vsA, w, 0, w, vsA, w")
5696            (match_operand:SVE_FULL_F 3 "register_operand" "0, w, w, 0, w, w")]
5697           SVE_COND_FP_SUB))]
5698   "TARGET_SVE"
5699   "@
5700    fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5701    #
5702    fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5703    fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5704    movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5705    movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5706   ; Split the unpredicated form after reload, so that we don't have
5707   ; the unnecessary PTRUE.
5708   "&& reload_completed
5709    && register_operand (operands[2], <MODE>mode)
5710    && INTVAL (operands[4]) == SVE_RELAXED_GP"
5711   [(set (match_dup 0) (minus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5712   ""
5713   [(set_attr "movprfx" "*,*,*,*,yes,yes")]
5714 )
5715
5716 ;; Predicated floating-point subtraction from a constant, merging with the
5717 ;; second input.
5718 (define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed"
5719   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5720         (unspec:SVE_FULL_F
5721           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5722            (unspec:SVE_FULL_F
5723              [(match_operand 4)
5724               (const_int SVE_RELAXED_GP)
5725               (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5726               (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5727              UNSPEC_COND_FSUB)
5728            (match_dup 3)]
5729           UNSPEC_SEL))]
5730   "TARGET_SVE"
5731   "@
5732    fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5733    movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
5734   "&& !rtx_equal_p (operands[1], operands[4])"
5735   {
5736     operands[4] = copy_rtx (operands[1]);
5737   }
5738   [(set_attr "movprfx" "*,yes")]
5739 )
5740
5741 (define_insn "*cond_sub<mode>_3_const_strict"
5742   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5743         (unspec:SVE_FULL_F
5744           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5745            (unspec:SVE_FULL_F
5746              [(match_dup 1)
5747               (const_int SVE_STRICT_GP)
5748               (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5749               (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5750              UNSPEC_COND_FSUB)
5751            (match_dup 3)]
5752           UNSPEC_SEL))]
5753   "TARGET_SVE"
5754   "@
5755    fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5756    movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
5757   [(set_attr "movprfx" "*,yes")]
5758 )
5759
5760 ;; Predicated floating-point subtraction from a constant, merging with an
5761 ;; independent value.
5762 (define_insn_and_rewrite "*cond_sub<mode>_const_relaxed"
5763   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5764         (unspec:SVE_FULL_F
5765           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5766            (unspec:SVE_FULL_F
5767              [(match_operand 5)
5768               (const_int SVE_RELAXED_GP)
5769               (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5770               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")]
5771              UNSPEC_COND_FSUB)
5772            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5773           UNSPEC_SEL))]
5774   "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
5775   "@
5776    movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5777    movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5778    #"
5779   "&& 1"
5780   {
5781     if (reload_completed
5782         && register_operand (operands[4], <MODE>mode)
5783         && !rtx_equal_p (operands[0], operands[4]))
5784       {
5785         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5786                                                  operands[4], operands[1]));
5787         operands[4] = operands[3] = operands[0];
5788       }
5789     else if (!rtx_equal_p (operands[1], operands[5]))
5790       operands[5] = copy_rtx (operands[1]);
5791     else
5792       FAIL;
5793   }
5794   [(set_attr "movprfx" "yes")]
5795 )
5796
5797 (define_insn_and_rewrite "*cond_sub<mode>_const_strict"
5798   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5799         (unspec:SVE_FULL_F
5800           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5801            (unspec:SVE_FULL_F
5802              [(match_dup 1)
5803               (const_int SVE_STRICT_GP)
5804               (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5805               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")]
5806              UNSPEC_COND_FSUB)
5807            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5808           UNSPEC_SEL))]
5809   "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
5810   "@
5811    movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5812    movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5813    #"
5814   "&& reload_completed
5815    && register_operand (operands[4], <MODE>mode)
5816    && !rtx_equal_p (operands[0], operands[4])"
5817   {
5818     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5819                                              operands[4], operands[1]));
5820     operands[4] = operands[3] = operands[0];
5821   }
5822   [(set_attr "movprfx" "yes")]
5823 )
5824 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
5825
5826 ;; -------------------------------------------------------------------------
5827 ;; ---- [FP] Absolute difference
5828 ;; -------------------------------------------------------------------------
5829 ;; Includes:
5830 ;; - FABD
5831 ;; -------------------------------------------------------------------------
5832
5833 ;; Predicated floating-point absolute difference.
5834 (define_expand "@aarch64_pred_abd<mode>"
5835   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5836         (unspec:SVE_FULL_F
5837           [(match_operand:<VPRED> 1 "register_operand")
5838            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5839            (unspec:SVE_FULL_F
5840              [(match_dup 1)
5841               (match_dup 4)
5842               (match_operand:SVE_FULL_F 2 "register_operand")
5843               (match_operand:SVE_FULL_F 3 "register_operand")]
5844              UNSPEC_COND_FSUB)]
5845           UNSPEC_COND_FABS))]
5846   "TARGET_SVE"
5847 )
5848
5849 ;; Predicated floating-point absolute difference.
5850 (define_insn_and_rewrite "*aarch64_pred_abd<mode>_relaxed"
5851   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5852         (unspec:SVE_FULL_F
5853           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5854            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5855            (unspec:SVE_FULL_F
5856              [(match_operand 5)
5857               (const_int SVE_RELAXED_GP)
5858               (match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
5859               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5860              UNSPEC_COND_FSUB)]
5861           UNSPEC_COND_FABS))]
5862   "TARGET_SVE"
5863   "@
5864    fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5865    movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5866   "&& !rtx_equal_p (operands[1], operands[5])"
5867   {
5868     operands[5] = copy_rtx (operands[1]);
5869   }
5870   [(set_attr "movprfx" "*,yes")]
5871 )
5872
5873 (define_insn "*aarch64_pred_abd<mode>_strict"
5874   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5875         (unspec:SVE_FULL_F
5876           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5877            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5878            (unspec:SVE_FULL_F
5879              [(match_dup 1)
5880               (const_int SVE_STRICT_GP)
5881               (match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
5882               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5883              UNSPEC_COND_FSUB)]
5884           UNSPEC_COND_FABS))]
5885   "TARGET_SVE"
5886   "@
5887    fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5888    movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5889   [(set_attr "movprfx" "*,yes")]
5890 )
5891
5892 (define_expand "@aarch64_cond_abd<mode>"
5893   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5894         (unspec:SVE_FULL_F
5895           [(match_operand:<VPRED> 1 "register_operand")
5896            (unspec:SVE_FULL_F
5897              [(match_dup 1)
5898               (const_int SVE_STRICT_GP)
5899               (unspec:SVE_FULL_F
5900                 [(match_dup 1)
5901                  (const_int SVE_STRICT_GP)
5902                  (match_operand:SVE_FULL_F 2 "register_operand")
5903                  (match_operand:SVE_FULL_F 3 "register_operand")]
5904                 UNSPEC_COND_FSUB)]
5905              UNSPEC_COND_FABS)
5906            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5907           UNSPEC_SEL))]
5908   "TARGET_SVE"
5909 {
5910   if (rtx_equal_p (operands[3], operands[4]))
5911     std::swap (operands[2], operands[3]);
5912 })
5913
5914 ;; Predicated floating-point absolute difference, merging with the first
5915 ;; input.
5916 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_2_relaxed"
5917   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5918         (unspec:SVE_FULL_F
5919           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5920            (unspec:SVE_FULL_F
5921              [(match_operand 4)
5922               (const_int SVE_RELAXED_GP)
5923               (unspec:SVE_FULL_F
5924                 [(match_operand 5)
5925                  (const_int SVE_RELAXED_GP)
5926                  (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5927                  (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5928                 UNSPEC_COND_FSUB)]
5929              UNSPEC_COND_FABS)
5930            (match_dup 2)]
5931           UNSPEC_SEL))]
5932   "TARGET_SVE"
5933   "@
5934    fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5935    movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5936   "&& (!rtx_equal_p (operands[1], operands[4])
5937        || !rtx_equal_p (operands[1], operands[5]))"
5938   {
5939     operands[4] = copy_rtx (operands[1]);
5940     operands[5] = copy_rtx (operands[1]);
5941   }
5942   [(set_attr "movprfx" "*,yes")]
5943 )
5944
5945 (define_insn "*aarch64_cond_abd<mode>_2_strict"
5946   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5947         (unspec:SVE_FULL_F
5948           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5949            (unspec:SVE_FULL_F
5950              [(match_dup 1)
5951               (match_operand:SI 4 "aarch64_sve_gp_strictness")
5952               (unspec:SVE_FULL_F
5953                 [(match_dup 1)
5954                  (match_operand:SI 5 "aarch64_sve_gp_strictness")
5955                  (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5956                  (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5957                 UNSPEC_COND_FSUB)]
5958              UNSPEC_COND_FABS)
5959            (match_dup 2)]
5960           UNSPEC_SEL))]
5961   "TARGET_SVE"
5962   "@
5963    fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5964    movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5965   [(set_attr "movprfx" "*,yes")]
5966 )
5967
5968 ;; Predicated floating-point absolute difference, merging with the second
5969 ;; input.
5970 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_3_relaxed"
5971   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5972         (unspec:SVE_FULL_F
5973           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5974            (unspec:SVE_FULL_F
5975              [(match_operand 4)
5976               (const_int SVE_RELAXED_GP)
5977               (unspec:SVE_FULL_F
5978                 [(match_operand 5)
5979                  (const_int SVE_RELAXED_GP)
5980                  (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
5981                  (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5982                 UNSPEC_COND_FSUB)]
5983              UNSPEC_COND_FABS)
5984            (match_dup 3)]
5985           UNSPEC_SEL))]
5986   "TARGET_SVE"
5987   "@
5988    fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5989    movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
5990   "&& (!rtx_equal_p (operands[1], operands[4])
5991        || !rtx_equal_p (operands[1], operands[5]))"
5992   {
5993     operands[4] = copy_rtx (operands[1]);
5994     operands[5] = copy_rtx (operands[1]);
5995   }
5996   [(set_attr "movprfx" "*,yes")]
5997 )
5998
5999 (define_insn "*aarch64_cond_abd<mode>_3_strict"
6000   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6001         (unspec:SVE_FULL_F
6002           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6003            (unspec:SVE_FULL_F
6004              [(match_dup 1)
6005               (match_operand:SI 4 "aarch64_sve_gp_strictness")
6006               (unspec:SVE_FULL_F
6007                 [(match_dup 1)
6008                  (match_operand:SI 5 "aarch64_sve_gp_strictness")
6009                  (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6010                  (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
6011                 UNSPEC_COND_FSUB)]
6012              UNSPEC_COND_FABS)
6013            (match_dup 3)]
6014           UNSPEC_SEL))]
6015   "TARGET_SVE"
6016   "@
6017    fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6018    movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
6019   [(set_attr "movprfx" "*,yes")]
6020 )
6021
6022 ;; Predicated floating-point absolute difference, merging with an
6023 ;; independent value.
6024 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_relaxed"
6025   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
6026         (unspec:SVE_FULL_F
6027           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
6028            (unspec:SVE_FULL_F
6029              [(match_operand 5)
6030               (const_int SVE_RELAXED_GP)
6031               (unspec:SVE_FULL_F
6032                 [(match_operand 6)
6033                  (const_int SVE_RELAXED_GP)
6034                  (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
6035                  (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
6036                 UNSPEC_COND_FSUB)]
6037              UNSPEC_COND_FABS)
6038            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
6039           UNSPEC_SEL))]
6040   "TARGET_SVE
6041    && !rtx_equal_p (operands[2], operands[4])
6042    && !rtx_equal_p (operands[3], operands[4])"
6043   "@
6044    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6045    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6046    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6047    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6048    #"
6049   "&& 1"
6050   {
6051     if (reload_completed
6052         && register_operand (operands[4], <MODE>mode)
6053         && !rtx_equal_p (operands[0], operands[4]))
6054       {
6055         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6056                                                  operands[4], operands[1]));
6057         operands[4] = operands[3] = operands[0];
6058       }
6059     else if (!rtx_equal_p (operands[1], operands[5])
6060              || !rtx_equal_p (operands[1], operands[6]))
6061       {
6062         operands[5] = copy_rtx (operands[1]);
6063         operands[6] = copy_rtx (operands[1]);
6064       }
6065     else
6066       FAIL;
6067   }
6068   [(set_attr "movprfx" "yes")]
6069 )
6070
6071 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_strict"
6072   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
6073         (unspec:SVE_FULL_F
6074           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
6075            (unspec:SVE_FULL_F
6076              [(match_dup 1)
6077               (match_operand:SI 5 "aarch64_sve_gp_strictness")
6078               (unspec:SVE_FULL_F
6079                 [(match_dup 1)
6080                  (match_operand:SI 6 "aarch64_sve_gp_strictness")
6081                  (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
6082                  (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
6083                 UNSPEC_COND_FSUB)]
6084              UNSPEC_COND_FABS)
6085            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
6086           UNSPEC_SEL))]
6087   "TARGET_SVE
6088    && !rtx_equal_p (operands[2], operands[4])
6089    && !rtx_equal_p (operands[3], operands[4])"
6090   "@
6091    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6092    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6093    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6094    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6095    #"
6096   "&& reload_completed
6097    && register_operand (operands[4], <MODE>mode)
6098    && !rtx_equal_p (operands[0], operands[4])"
6099   {
6100     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6101                                              operands[4], operands[1]));
6102     operands[4] = operands[3] = operands[0];
6103   }
6104   [(set_attr "movprfx" "yes")]
6105 )
6106
6107 ;; -------------------------------------------------------------------------
6108 ;; ---- [FP] Multiplication
6109 ;; -------------------------------------------------------------------------
6110 ;; Includes:
6111 ;; - FMUL
6112 ;; -------------------------------------------------------------------------
6113
6114 ;; Predicated floating-point multiplication.
6115 (define_insn_and_split "@aarch64_pred_<optab><mode>"
6116   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, ?&w, ?&w")
6117         (unspec:SVE_FULL_F
6118           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
6119            (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, i, Ui1")
6120            (match_operand:SVE_FULL_F 2 "register_operand" "%0, w, 0, w, w")
6121            (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand" "vsM, w, w, vsM, w")]
6122           SVE_COND_FP_MUL))]
6123   "TARGET_SVE"
6124   "@
6125    fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6126    #
6127    fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6128    movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6129    movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
6130   ; Split the unpredicated form after reload, so that we don't have
6131   ; the unnecessary PTRUE.
6132   "&& reload_completed
6133    && register_operand (operands[3], <MODE>mode)
6134    && INTVAL (operands[4]) == SVE_RELAXED_GP"
6135   [(set (match_dup 0) (mult:SVE_FULL_F (match_dup 2) (match_dup 3)))]
6136   ""
6137   [(set_attr "movprfx" "*,*,*,yes,yes")]
6138 )
6139
6140 ;; Merging forms are handled through SVE_COND_FP_BINARY and
6141 ;; SVE_COND_FP_BINARY_I1.
6142
6143 ;; Unpredicated multiplication by selected lanes.
6144 (define_insn "@aarch64_mul_lane_<mode>"
6145   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
6146         (mult:SVE_FULL_F
6147           (unspec:SVE_FULL_F
6148             [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>")
6149              (match_operand:SI 3 "const_int_operand")]
6150             UNSPEC_SVE_LANE_SELECT)
6151           (match_operand:SVE_FULL_F 1 "register_operand" "w")))]
6152   "TARGET_SVE"
6153   "fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
6154 )
6155
6156 ;; -------------------------------------------------------------------------
6157 ;; ---- [FP] Division
6158 ;; -------------------------------------------------------------------------
6159 ;; The patterns in this section are synthetic.
6160 ;; -------------------------------------------------------------------------
6161
6162 (define_expand "div<mode>3"
6163   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6164         (unspec:SVE_FULL_F
6165           [(match_dup 3)
6166            (const_int SVE_RELAXED_GP)
6167            (match_operand:SVE_FULL_F 1 "nonmemory_operand")
6168            (match_operand:SVE_FULL_F 2 "register_operand")]
6169           UNSPEC_COND_FDIV))]
6170   "TARGET_SVE"
6171   {
6172     if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
6173       DONE;
6174
6175     operands[1] = force_reg (<MODE>mode, operands[1]);
6176     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
6177   }
6178 )
6179
6180 (define_expand "@aarch64_frecpe<mode>"
6181   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6182         (unspec:SVE_FULL_F
6183           [(match_operand:SVE_FULL_F 1 "register_operand")]
6184           UNSPEC_FRECPE))]
6185   "TARGET_SVE"
6186 )
6187
6188 (define_expand "@aarch64_frecps<mode>"
6189   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6190         (unspec:SVE_FULL_F
6191           [(match_operand:SVE_FULL_F 1 "register_operand")
6192            (match_operand:SVE_FULL_F 2 "register_operand")]
6193           UNSPEC_FRECPS))]
6194   "TARGET_SVE"
6195 )
6196
6197 ;; -------------------------------------------------------------------------
6198 ;; ---- [FP] Binary logical operations
6199 ;; -------------------------------------------------------------------------
6200 ;; Includes
6201 ;; - AND
6202 ;; - EOR
6203 ;; - ORR
6204 ;; -------------------------------------------------------------------------
6205
6206 ;; Binary logical operations on floating-point modes.  We avoid subregs
6207 ;; by providing this, but we need to use UNSPECs since rtx logical ops
6208 ;; aren't defined for floating-point modes.
6209 (define_insn "*<optab><mode>3"
6210   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
6211         (unspec:SVE_FULL_F
6212           [(match_operand:SVE_FULL_F 1 "register_operand" "w")
6213            (match_operand:SVE_FULL_F 2 "register_operand" "w")]
6214           LOGICALF))]
6215   "TARGET_SVE"
6216   "<logicalf_op>\t%0.d, %1.d, %2.d"
6217 )
6218
6219 ;; -------------------------------------------------------------------------
6220 ;; ---- [FP] Sign copying
6221 ;; -------------------------------------------------------------------------
6222 ;; The patterns in this section are synthetic.
6223 ;; -------------------------------------------------------------------------
6224
6225 (define_expand "copysign<mode>3"
6226   [(match_operand:SVE_FULL_F 0 "register_operand")
6227    (match_operand:SVE_FULL_F 1 "register_operand")
6228    (match_operand:SVE_FULL_F 2 "register_operand")]
6229   "TARGET_SVE"
6230   {
6231     rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6232     rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
6233     rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6234     int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
6235
6236     rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
6237     rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6238
6239     emit_insn (gen_and<v_int_equiv>3
6240                (sign, arg2,
6241                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6242                                                    HOST_WIDE_INT_M1U
6243                                                    << bits)));
6244     emit_insn (gen_and<v_int_equiv>3
6245                (mant, arg1,
6246                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6247                                                    ~(HOST_WIDE_INT_M1U
6248                                                      << bits))));
6249     emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
6250     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6251     DONE;
6252   }
6253 )
6254
6255 (define_expand "xorsign<mode>3"
6256   [(match_operand:SVE_FULL_F 0 "register_operand")
6257    (match_operand:SVE_FULL_F 1 "register_operand")
6258    (match_operand:SVE_FULL_F 2 "register_operand")]
6259   "TARGET_SVE"
6260   {
6261     rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6262     rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6263     int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
6264
6265     rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
6266     rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6267
6268     emit_insn (gen_and<v_int_equiv>3
6269                (sign, arg2,
6270                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6271                                                    HOST_WIDE_INT_M1U
6272                                                    << bits)));
6273     emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
6274     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6275     DONE;
6276   }
6277 )
6278
6279 ;; -------------------------------------------------------------------------
6280 ;; ---- [FP] Maximum and minimum
6281 ;; -------------------------------------------------------------------------
6282 ;; Includes:
6283 ;; - FMAX
6284 ;; - FMAXNM
6285 ;; - FMIN
6286 ;; - FMINNM
6287 ;; -------------------------------------------------------------------------
6288
6289 ;; Unpredicated fmax/fmin (the libm functions).  The optabs for the
6290 ;; smax/smin rtx codes are handled in the generic section above.
6291 (define_expand "<fmaxmin><mode>3"
6292   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6293         (unspec:SVE_FULL_F
6294           [(match_dup 3)
6295            (const_int SVE_RELAXED_GP)
6296            (match_operand:SVE_FULL_F 1 "register_operand")
6297            (match_operand:SVE_FULL_F 2 "aarch64_sve_float_maxmin_operand")]
6298           SVE_COND_FP_MAXMIN_PUBLIC))]
6299   "TARGET_SVE"
6300   {
6301     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
6302   }
6303 )
6304
6305 ;; Predicated fmax/fmin (the libm functions).  The optabs for the
6306 ;; smax/smin rtx codes are handled in the generic section above.
6307 (define_expand "cond_<fmaxmin><mode>"
6308   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6309         (unspec:SVE_FULL_F
6310           [(match_operand:<VPRED> 1 "register_operand")
6311            (unspec:SVE_FULL_F
6312              [(match_dup 1)
6313               (const_int SVE_RELAXED_GP)
6314               (match_operand:SVE_FULL_F 2 "register_operand")
6315               (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")]
6316              SVE_COND_FP_MAXMIN_PUBLIC)
6317            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6318           UNSPEC_SEL))]
6319   "TARGET_SVE"
6320 )
6321
6322 ;; Predicated floating-point maximum/minimum.
6323 (define_insn "@aarch64_pred_<optab><mode>"
6324   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w, ?&w")
6325         (unspec:SVE_FULL_F
6326           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
6327            (match_operand:SI 4 "aarch64_sve_gp_strictness")
6328            (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, w")
6329            (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand" "vsB, w, vsB, w")]
6330           SVE_COND_FP_MAXMIN))]
6331   "TARGET_SVE"
6332   "@
6333    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6334    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6335    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6336    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
6337   [(set_attr "movprfx" "*,*,yes,yes")]
6338 )
6339
6340 ;; Merging forms are handled through SVE_COND_FP_BINARY and
6341 ;; SVE_COND_FP_BINARY_I1.
6342
6343 ;; -------------------------------------------------------------------------
6344 ;; ---- [PRED] Binary logical operations
6345 ;; -------------------------------------------------------------------------
6346 ;; Includes:
6347 ;; - AND
6348 ;; - ANDS
6349 ;; - EOR
6350 ;; - EORS
6351 ;; - ORR
6352 ;; - ORRS
6353 ;; -------------------------------------------------------------------------
6354
6355 ;; Predicate AND.  We can reuse one of the inputs as the GP.
6356 ;; Doubling the second operand is the preferred implementation
6357 ;; of the MOV alias, so we use that instead of %1/z, %1, %2.
6358 (define_insn "and<mode>3"
6359   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6360         (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
6361                       (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
6362   "TARGET_SVE"
6363   "and\t%0.b, %1/z, %2.b, %2.b"
6364 )
6365
6366 ;; Unpredicated predicate EOR and ORR.
6367 (define_expand "<optab><mode>3"
6368   [(set (match_operand:PRED_ALL 0 "register_operand")
6369         (and:PRED_ALL
6370           (LOGICAL_OR:PRED_ALL
6371             (match_operand:PRED_ALL 1 "register_operand")
6372             (match_operand:PRED_ALL 2 "register_operand"))
6373           (match_dup 3)))]
6374   "TARGET_SVE"
6375   {
6376     operands[3] = aarch64_ptrue_reg (<MODE>mode);
6377   }
6378 )
6379
6380 ;; Predicated predicate AND, EOR and ORR.
6381 (define_insn "@aarch64_pred_<optab><mode>_z"
6382   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6383         (and:PRED_ALL
6384           (LOGICAL:PRED_ALL
6385             (match_operand:PRED_ALL 2 "register_operand" "Upa")
6386             (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6387           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
6388   "TARGET_SVE"
6389   "<logical>\t%0.b, %1/z, %2.b, %3.b"
6390 )
6391
6392 ;; Perform a logical operation on operands 2 and 3, using operand 1 as
6393 ;; the GP.  Store the result in operand 0 and set the flags in the same
6394 ;; way as for PTEST.
6395 (define_insn "*<optab><mode>3_cc"
6396   [(set (reg:CC_NZC CC_REGNUM)
6397         (unspec:CC_NZC
6398           [(match_operand:VNx16BI 1 "register_operand" "Upa")
6399            (match_operand 4)
6400            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6401            (and:PRED_ALL
6402              (LOGICAL:PRED_ALL
6403                (match_operand:PRED_ALL 2 "register_operand" "Upa")
6404                (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6405              (match_dup 4))]
6406           UNSPEC_PTEST))
6407    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6408         (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
6409                       (match_dup 4)))]
6410   "TARGET_SVE"
6411   "<logical>s\t%0.b, %1/z, %2.b, %3.b"
6412 )
6413
6414 ;; Same with just the flags result.
6415 (define_insn "*<optab><mode>3_ptest"
6416   [(set (reg:CC_NZC CC_REGNUM)
6417         (unspec:CC_NZC
6418           [(match_operand:VNx16BI 1 "register_operand" "Upa")
6419            (match_operand 4)
6420            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6421            (and:PRED_ALL
6422              (LOGICAL:PRED_ALL
6423                (match_operand:PRED_ALL 2 "register_operand" "Upa")
6424                (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6425              (match_dup 4))]
6426           UNSPEC_PTEST))
6427    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
6428   "TARGET_SVE"
6429   "<logical>s\t%0.b, %1/z, %2.b, %3.b"
6430 )
6431
6432 ;; -------------------------------------------------------------------------
6433 ;; ---- [PRED] Binary logical operations (inverted second input)
6434 ;; -------------------------------------------------------------------------
6435 ;; Includes:
6436 ;; - BIC
6437 ;; - ORN
6438 ;; -------------------------------------------------------------------------
6439
6440 ;; Predicated predicate BIC and ORN.
6441 (define_insn "aarch64_pred_<nlogical><mode>_z"
6442   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6443         (and:PRED_ALL
6444           (NLOGICAL:PRED_ALL
6445             (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6446             (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6447           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
6448   "TARGET_SVE"
6449   "<nlogical>\t%0.b, %1/z, %2.b, %3.b"
6450 )
6451
6452 ;; Same, but set the flags as a side-effect.
6453 (define_insn "*<nlogical><mode>3_cc"
6454   [(set (reg:CC_NZC CC_REGNUM)
6455         (unspec:CC_NZC
6456           [(match_operand:VNx16BI 1 "register_operand" "Upa")
6457            (match_operand 4)
6458            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6459            (and:PRED_ALL
6460              (NLOGICAL:PRED_ALL
6461                (not:PRED_ALL
6462                  (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6463                (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6464              (match_dup 4))]
6465           UNSPEC_PTEST))
6466    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6467         (and:PRED_ALL (NLOGICAL:PRED_ALL
6468                         (not:PRED_ALL (match_dup 3))
6469                         (match_dup 2))
6470                       (match_dup 4)))]
6471   "TARGET_SVE"
6472   "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
6473 )
6474
6475 ;; Same with just the flags result.
6476 (define_insn "*<nlogical><mode>3_ptest"
6477   [(set (reg:CC_NZC CC_REGNUM)
6478         (unspec:CC_NZC
6479           [(match_operand:VNx16BI 1 "register_operand" "Upa")
6480            (match_operand 4)
6481            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6482            (and:PRED_ALL
6483              (NLOGICAL:PRED_ALL
6484                (not:PRED_ALL
6485                  (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6486                (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6487              (match_dup 4))]
6488           UNSPEC_PTEST))
6489    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
6490   "TARGET_SVE"
6491   "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
6492 )
6493
6494 ;; -------------------------------------------------------------------------
6495 ;; ---- [PRED] Binary logical operations (inverted result)
6496 ;; -------------------------------------------------------------------------
6497 ;; Includes:
6498 ;; - NAND
6499 ;; - NOR
6500 ;; -------------------------------------------------------------------------
6501
6502 ;; Predicated predicate NAND and NOR.
6503 (define_insn "aarch64_pred_<logical_nn><mode>_z"
6504   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6505         (and:PRED_ALL
6506           (NLOGICAL:PRED_ALL
6507             (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6508             (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
6509           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
6510   "TARGET_SVE"
6511   "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
6512 )
6513
6514 ;; Same, but set the flags as a side-effect.
6515 (define_insn "*<logical_nn><mode>3_cc"
6516   [(set (reg:CC_NZC CC_REGNUM)
6517         (unspec:CC_NZC
6518           [(match_operand:VNx16BI 1 "register_operand" "Upa")
6519            (match_operand 4)
6520            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6521            (and:PRED_ALL
6522              (NLOGICAL:PRED_ALL
6523                (not:PRED_ALL
6524                  (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6525                (not:PRED_ALL
6526                  (match_operand:PRED_ALL 3 "register_operand" "Upa")))
6527              (match_dup 4))]
6528           UNSPEC_PTEST))
6529    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6530         (and:PRED_ALL (NLOGICAL:PRED_ALL
6531                         (not:PRED_ALL (match_dup 2))
6532                         (not:PRED_ALL (match_dup 3)))
6533                       (match_dup 4)))]
6534   "TARGET_SVE"
6535   "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
6536 )
6537
6538 ;; Same with just the flags result.
6539 (define_insn "*<logical_nn><mode>3_ptest"
6540   [(set (reg:CC_NZC CC_REGNUM)
6541         (unspec:CC_NZC
6542           [(match_operand:VNx16BI 1 "register_operand" "Upa")
6543            (match_operand 4)
6544            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6545            (and:PRED_ALL
6546              (NLOGICAL:PRED_ALL
6547                (not:PRED_ALL
6548                  (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6549                (not:PRED_ALL
6550                  (match_operand:PRED_ALL 3 "register_operand" "Upa")))
6551              (match_dup 4))]
6552           UNSPEC_PTEST))
6553    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
6554   "TARGET_SVE"
6555   "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
6556 )
6557
6558 ;; =========================================================================
6559 ;; == Ternary arithmetic
6560 ;; =========================================================================
6561
6562 ;; -------------------------------------------------------------------------
6563 ;; ---- [INT] MLA and MAD
6564 ;; -------------------------------------------------------------------------
6565 ;; Includes:
6566 ;; - MAD
6567 ;; - MLA
6568 ;; -------------------------------------------------------------------------
6569
6570 ;; Unpredicated integer addition of product.
6571 (define_expand "fma<mode>4"
6572   [(set (match_operand:SVE_I 0 "register_operand")
6573         (plus:SVE_I
6574           (unspec:SVE_I
6575             [(match_dup 4)
6576              (mult:SVE_I
6577                (match_operand:SVE_I 1 "register_operand")
6578                (match_operand:SVE_I 2 "nonmemory_operand"))]
6579             UNSPEC_PRED_X)
6580           (match_operand:SVE_I 3 "register_operand")))]
6581   "TARGET_SVE"
6582   {
6583     if (aarch64_prepare_sve_int_fma (operands, PLUS))
6584       DONE;
6585     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6586   }
6587 )
6588
6589 ;; Predicated integer addition of product.
6590 (define_insn "@aarch64_pred_fma<mode>"
6591   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
6592         (plus:SVE_I
6593           (unspec:SVE_I
6594             [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
6595              (mult:SVE_I
6596                (match_operand:SVE_I 2 "register_operand" "%0, w, w")
6597                (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
6598             UNSPEC_PRED_X)
6599           (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
6600   "TARGET_SVE"
6601   "@
6602    mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6603    mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6604    movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6605   [(set_attr "movprfx" "*,*,yes")]
6606 )
6607
6608 ;; Predicated integer addition of product with merging.
6609 (define_expand "cond_fma<mode>"
6610   [(set (match_operand:SVE_I 0 "register_operand")
6611         (unspec:SVE_I
6612           [(match_operand:<VPRED> 1 "register_operand")
6613            (plus:SVE_I
6614              (mult:SVE_I
6615                (match_operand:SVE_I 2 "register_operand")
6616                (match_operand:SVE_I 3 "general_operand"))
6617              (match_operand:SVE_I 4 "register_operand"))
6618            (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
6619           UNSPEC_SEL))]
6620   "TARGET_SVE"
6621   {
6622     if (aarch64_prepare_sve_cond_int_fma (operands, PLUS))
6623       DONE;
6624     /* Swap the multiplication operands if the fallback value is the
6625        second of the two.  */
6626     if (rtx_equal_p (operands[3], operands[5]))
6627       std::swap (operands[2], operands[3]);
6628   }
6629 )
6630
6631 ;; Predicated integer addition of product, merging with the first input.
6632 (define_insn "*cond_fma<mode>_2"
6633   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
6634         (unspec:SVE_I
6635           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6636            (plus:SVE_I
6637              (mult:SVE_I
6638                (match_operand:SVE_I 2 "register_operand" "0, w")
6639                (match_operand:SVE_I 3 "register_operand" "w, w"))
6640              (match_operand:SVE_I 4 "register_operand" "w, w"))
6641            (match_dup 2)]
6642           UNSPEC_SEL))]
6643   "TARGET_SVE"
6644   "@
6645    mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6646    movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6647   [(set_attr "movprfx" "*,yes")]
6648 )
6649
6650 ;; Predicated integer addition of product, merging with the third input.
6651 (define_insn "*cond_fma<mode>_4"
6652   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
6653         (unspec:SVE_I
6654           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6655            (plus:SVE_I
6656              (mult:SVE_I
6657                (match_operand:SVE_I 2 "register_operand" "w, w")
6658                (match_operand:SVE_I 3 "register_operand" "w, w"))
6659              (match_operand:SVE_I 4 "register_operand" "0, w"))
6660            (match_dup 4)]
6661           UNSPEC_SEL))]
6662   "TARGET_SVE"
6663   "@
6664    mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6665    movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6666   [(set_attr "movprfx" "*,yes")]
6667 )
6668
6669 ;; Predicated integer addition of product, merging with an independent value.
6670 (define_insn_and_rewrite "*cond_fma<mode>_any"
6671   [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6672         (unspec:SVE_I
6673           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6674            (plus:SVE_I
6675              (mult:SVE_I
6676                (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w")
6677                (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w"))
6678              (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w"))
6679            (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6680           UNSPEC_SEL))]
6681   "TARGET_SVE
6682    && !rtx_equal_p (operands[2], operands[5])
6683    && !rtx_equal_p (operands[3], operands[5])
6684    && !rtx_equal_p (operands[4], operands[5])"
6685   "@
6686    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6687    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6688    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6689    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6690    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6691    #"
6692   "&& reload_completed
6693    && register_operand (operands[5], <MODE>mode)
6694    && !rtx_equal_p (operands[0], operands[5])"
6695   {
6696     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6697                                              operands[5], operands[1]));
6698     operands[5] = operands[4] = operands[0];
6699   }
6700   [(set_attr "movprfx" "yes")]
6701 )
6702
6703 ;; -------------------------------------------------------------------------
6704 ;; ---- [INT] MLS and MSB
6705 ;; -------------------------------------------------------------------------
6706 ;; Includes:
6707 ;; - MLS
6708 ;; - MSB
6709 ;; -------------------------------------------------------------------------
6710
6711 ;; Unpredicated integer subtraction of product.
6712 (define_expand "fnma<mode>4"
6713   [(set (match_operand:SVE_I 0 "register_operand")
6714         (minus:SVE_I
6715           (match_operand:SVE_I 3 "register_operand")
6716           (unspec:SVE_I
6717             [(match_dup 4)
6718              (mult:SVE_I
6719                (match_operand:SVE_I 1 "register_operand")
6720                (match_operand:SVE_I 2 "general_operand"))]
6721             UNSPEC_PRED_X)))]
6722   "TARGET_SVE"
6723   {
6724     if (aarch64_prepare_sve_int_fma (operands, MINUS))
6725       DONE;
6726     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6727   }
6728 )
6729
6730 ;; Predicated integer subtraction of product.
6731 (define_insn "@aarch64_pred_fnma<mode>"
6732   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
6733         (minus:SVE_I
6734           (match_operand:SVE_I 4 "register_operand" "w, 0, w")
6735           (unspec:SVE_I
6736             [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
6737              (mult:SVE_I
6738                (match_operand:SVE_I 2 "register_operand" "%0, w, w")
6739                (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
6740             UNSPEC_PRED_X)))]
6741   "TARGET_SVE"
6742   "@
6743    msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6744    mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6745    movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6746   [(set_attr "movprfx" "*,*,yes")]
6747 )
6748
6749 ;; Predicated integer subtraction of product with merging.
6750 (define_expand "cond_fnma<mode>"
6751   [(set (match_operand:SVE_I 0 "register_operand")
6752    (unspec:SVE_I
6753         [(match_operand:<VPRED> 1 "register_operand")
6754          (minus:SVE_I
6755            (match_operand:SVE_I 4 "register_operand")
6756            (mult:SVE_I
6757              (match_operand:SVE_I 2 "register_operand")
6758              (match_operand:SVE_I 3 "general_operand")))
6759          (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
6760         UNSPEC_SEL))]
6761   "TARGET_SVE"
6762   {
6763     if (aarch64_prepare_sve_cond_int_fma (operands, MINUS))
6764       DONE;
6765     /* Swap the multiplication operands if the fallback value is the
6766        second of the two.  */
6767     if (rtx_equal_p (operands[3], operands[5]))
6768       std::swap (operands[2], operands[3]);
6769   }
6770 )
6771
6772 ;; Predicated integer subtraction of product, merging with the first input.
6773 (define_insn "*cond_fnma<mode>_2"
6774   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
6775         (unspec:SVE_I
6776           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6777            (minus:SVE_I
6778              (match_operand:SVE_I 4 "register_operand" "w, w")
6779              (mult:SVE_I
6780                (match_operand:SVE_I 2 "register_operand" "0, w")
6781                (match_operand:SVE_I 3 "register_operand" "w, w")))
6782            (match_dup 2)]
6783           UNSPEC_SEL))]
6784   "TARGET_SVE"
6785   "@
6786    msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6787    movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6788   [(set_attr "movprfx" "*,yes")]
6789 )
6790
6791 ;; Predicated integer subtraction of product, merging with the third input.
6792 (define_insn "*cond_fnma<mode>_4"
6793   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
6794         (unspec:SVE_I
6795           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6796            (minus:SVE_I
6797              (match_operand:SVE_I 4 "register_operand" "0, w")
6798              (mult:SVE_I
6799                (match_operand:SVE_I 2 "register_operand" "w, w")
6800                (match_operand:SVE_I 3 "register_operand" "w, w")))
6801            (match_dup 4)]
6802           UNSPEC_SEL))]
6803   "TARGET_SVE"
6804   "@
6805    mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6806    movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6807   [(set_attr "movprfx" "*,yes")]
6808 )
6809
6810 ;; Predicated integer subtraction of product, merging with an
6811 ;; independent value.
6812 (define_insn_and_rewrite "*cond_fnma<mode>_any"
6813   [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6814         (unspec:SVE_I
6815           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6816            (minus:SVE_I
6817              (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w")
6818              (mult:SVE_I
6819                (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w")
6820                (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w")))
6821            (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6822           UNSPEC_SEL))]
6823   "TARGET_SVE
6824    && !rtx_equal_p (operands[2], operands[5])
6825    && !rtx_equal_p (operands[3], operands[5])
6826    && !rtx_equal_p (operands[4], operands[5])"
6827   "@
6828    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6829    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6830    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6831    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6832    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6833    #"
6834   "&& reload_completed
6835    && register_operand (operands[5], <MODE>mode)
6836    && !rtx_equal_p (operands[0], operands[5])"
6837   {
6838     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6839                                              operands[5], operands[1]));
6840     operands[5] = operands[4] = operands[0];
6841   }
6842   [(set_attr "movprfx" "yes")]
6843 )
6844
6845 ;; -------------------------------------------------------------------------
6846 ;; ---- [INT] Dot product
6847 ;; -------------------------------------------------------------------------
6848 ;; Includes:
6849 ;; - SDOT
6850 ;; - SUDOT   (I8MM)
6851 ;; - UDOT
6852 ;; - USDOT   (I8MM)
6853 ;; -------------------------------------------------------------------------
6854
6855 ;; Four-element integer dot-product with accumulation.
6856 (define_insn "<sur>dot_prod<vsi2qi>"
6857   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6858         (plus:SVE_FULL_SDI
6859           (unspec:SVE_FULL_SDI
6860             [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6861              (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
6862             DOTPROD)
6863           (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")))]
6864   "TARGET_SVE"
6865   "@
6866    <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
6867    movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
6868   [(set_attr "movprfx" "*,yes")]
6869 )
6870
6871 ;; Four-element integer dot-product by selected lanes with accumulation.
6872 (define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
6873   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6874         (plus:SVE_FULL_SDI
6875           (unspec:SVE_FULL_SDI
6876             [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6877              (unspec:<VSI2QI>
6878                [(match_operand:<VSI2QI> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
6879                 (match_operand:SI 3 "const_int_operand")]
6880                UNSPEC_SVE_LANE_SELECT)]
6881             DOTPROD)
6882           (match_operand:SVE_FULL_SDI 4 "register_operand" "0, w")))]
6883   "TARGET_SVE"
6884   "@
6885    <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]
6886    movprfx\t%0, %4\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]"
6887   [(set_attr "movprfx" "*,yes")]
6888 )
6889
6890 (define_insn "@<sur>dot_prod<vsi2qi>"
6891   [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6892         (plus:VNx4SI_ONLY
6893           (unspec:VNx4SI_ONLY
6894             [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6895              (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
6896             DOTPROD_US_ONLY)
6897           (match_operand:VNx4SI_ONLY 3 "register_operand" "0, w")))]
6898   "TARGET_SVE_I8MM"
6899   "@
6900    <sur>dot\\t%0.s, %1.b, %2.b
6901    movprfx\t%0, %3\;<sur>dot\\t%0.s, %1.b, %2.b"
6902    [(set_attr "movprfx" "*,yes")]
6903 )
6904
6905 (define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
6906   [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6907         (plus:VNx4SI_ONLY
6908           (unspec:VNx4SI_ONLY
6909             [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6910              (unspec:<VSI2QI>
6911                [(match_operand:<VSI2QI> 2 "register_operand" "y, y")
6912                 (match_operand:SI 3 "const_int_operand")]
6913                UNSPEC_SVE_LANE_SELECT)]
6914             DOTPROD_I8MM)
6915           (match_operand:VNx4SI_ONLY 4 "register_operand" "0, w")))]
6916   "TARGET_SVE_I8MM"
6917   "@
6918    <sur>dot\\t%0.s, %1.b, %2.b[%3]
6919    movprfx\t%0, %4\;<sur>dot\\t%0.s, %1.b, %2.b[%3]"
6920   [(set_attr "movprfx" "*,yes")]
6921 )
6922
6923 ;; -------------------------------------------------------------------------
6924 ;; ---- [INT] Sum of absolute differences
6925 ;; -------------------------------------------------------------------------
6926 ;; The patterns in this section are synthetic.
6927 ;; -------------------------------------------------------------------------
6928
6929 ;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
6930 ;; operands 1 and 2.  The sequence also has to perform a widening reduction of
6931 ;; the difference into a vector and accumulate that into operand 3 before
6932 ;; copying that into the result operand 0.
6933 ;; Perform that with a sequence of:
6934 ;; MOV          ones.b, #1
6935 ;; [SU]ABD      diff.b, p0/m, op1.b, op2.b
6936 ;; MOVPRFX      op0, op3        // If necessary
6937 ;; UDOT         op0.s, diff.b, ones.b
6938 (define_expand "<sur>sad<vsi2qi>"
6939   [(use (match_operand:SVE_FULL_SDI 0 "register_operand"))
6940    (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
6941                     (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
6942    (use (match_operand:SVE_FULL_SDI 3 "register_operand"))]
6943   "TARGET_SVE"
6944   {
6945     rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
6946     rtx diff = gen_reg_rtx (<VSI2QI>mode);
6947     emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
6948     emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
6949     DONE;
6950   }
6951 )
6952
6953 ;; -------------------------------------------------------------------------
6954 ;; ---- [INT] Matrix multiply-accumulate
6955 ;; -------------------------------------------------------------------------
6956 ;; Includes:
6957 ;; - SMMLA (I8MM)
6958 ;; - UMMLA (I8MM)
6959 ;; - USMMLA (I8MM)
6960 ;; -------------------------------------------------------------------------
6961
6962 (define_insn "@aarch64_sve_add_<optab><vsi2qi>"
6963   [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6964         (plus:VNx4SI_ONLY
6965           (unspec:VNx4SI_ONLY
6966             [(match_operand:<VSI2QI> 2 "register_operand" "w, w")
6967              (match_operand:<VSI2QI> 3 "register_operand" "w, w")]
6968             MATMUL)
6969           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
6970   "TARGET_SVE_I8MM"
6971   "@
6972    <sur>mmla\\t%0.s, %2.b, %3.b
6973    movprfx\t%0, %1\;<sur>mmla\\t%0.s, %2.b, %3.b"
6974   [(set_attr "movprfx" "*,yes")]
6975 )
6976
6977 ;; -------------------------------------------------------------------------
6978 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
6979 ;; -------------------------------------------------------------------------
6980 ;; Includes merging patterns for:
6981 ;; - FMAD
6982 ;; - FMLA
6983 ;; - FMLS
6984 ;; - FMSB
6985 ;; - FNMAD
6986 ;; - FNMLA
6987 ;; - FNMLS
6988 ;; - FNMSB
6989 ;; -------------------------------------------------------------------------
6990
6991 ;; Unpredicated floating-point ternary operations.
6992 (define_expand "<optab><mode>4"
6993   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6994         (unspec:SVE_FULL_F
6995           [(match_dup 4)
6996            (const_int SVE_RELAXED_GP)
6997            (match_operand:SVE_FULL_F 1 "register_operand")
6998            (match_operand:SVE_FULL_F 2 "register_operand")
6999            (match_operand:SVE_FULL_F 3 "register_operand")]
7000           SVE_COND_FP_TERNARY))]
7001   "TARGET_SVE"
7002   {
7003     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7004   }
7005 )
7006
7007 ;; Predicated floating-point ternary operations.
7008 (define_insn "@aarch64_pred_<optab><mode>"
7009   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
7010         (unspec:SVE_FULL_F
7011           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
7012            (match_operand:SI 5 "aarch64_sve_gp_strictness")
7013            (match_operand:SVE_FULL_F 2 "register_operand" "%w, 0, w")
7014            (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")
7015            (match_operand:SVE_FULL_F 4 "register_operand" "0, w, w")]
7016           SVE_COND_FP_TERNARY))]
7017   "TARGET_SVE"
7018   "@
7019    <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7020    <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7021    movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
7022   [(set_attr "movprfx" "*,*,yes")]
7023 )
7024
7025 ;; Predicated floating-point ternary operations with merging.
7026 (define_expand "@cond_<optab><mode>"
7027   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7028         (unspec:SVE_FULL_F
7029           [(match_operand:<VPRED> 1 "register_operand")
7030            (unspec:SVE_FULL_F
7031              [(match_dup 1)
7032               (const_int SVE_STRICT_GP)
7033               (match_operand:SVE_FULL_F 2 "register_operand")
7034               (match_operand:SVE_FULL_F 3 "register_operand")
7035               (match_operand:SVE_FULL_F 4 "register_operand")]
7036              SVE_COND_FP_TERNARY)
7037            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7038           UNSPEC_SEL))]
7039   "TARGET_SVE"
7040 {
7041   /* Swap the multiplication operands if the fallback value is the
7042      second of the two.  */
7043   if (rtx_equal_p (operands[3], operands[5]))
7044     std::swap (operands[2], operands[3]);
7045 })
7046
7047 ;; Predicated floating-point ternary operations, merging with the
7048 ;; first input.
7049 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
7050   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7051         (unspec:SVE_FULL_F
7052           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7053            (unspec:SVE_FULL_F
7054              [(match_operand 5)
7055               (const_int SVE_RELAXED_GP)
7056               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
7057               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7058               (match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
7059              SVE_COND_FP_TERNARY)
7060            (match_dup 2)]
7061           UNSPEC_SEL))]
7062   "TARGET_SVE"
7063   "@
7064    <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7065    movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
7066   "&& !rtx_equal_p (operands[1], operands[5])"
7067   {
7068     operands[5] = copy_rtx (operands[1]);
7069   }
7070   [(set_attr "movprfx" "*,yes")]
7071 )
7072
7073 (define_insn "*cond_<optab><mode>_2_strict"
7074   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7075         (unspec:SVE_FULL_F
7076           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7077            (unspec:SVE_FULL_F
7078              [(match_dup 1)
7079               (const_int SVE_STRICT_GP)
7080               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
7081               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7082               (match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
7083              SVE_COND_FP_TERNARY)
7084            (match_dup 2)]
7085           UNSPEC_SEL))]
7086   "TARGET_SVE"
7087   "@
7088    <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7089    movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
7090   [(set_attr "movprfx" "*,yes")]
7091 )
7092
7093 ;; Predicated floating-point ternary operations, merging with the
7094 ;; third input.
7095 (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
7096   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7097         (unspec:SVE_FULL_F
7098           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7099            (unspec:SVE_FULL_F
7100              [(match_operand 5)
7101               (const_int SVE_RELAXED_GP)
7102               (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7103               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7104               (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
7105              SVE_COND_FP_TERNARY)
7106            (match_dup 4)]
7107           UNSPEC_SEL))]
7108   "TARGET_SVE"
7109   "@
7110    <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7111    movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
7112   "&& !rtx_equal_p (operands[1], operands[5])"
7113   {
7114     operands[5] = copy_rtx (operands[1]);
7115   }
7116   [(set_attr "movprfx" "*,yes")]
7117 )
7118
7119 (define_insn "*cond_<optab><mode>_4_strict"
7120   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7121         (unspec:SVE_FULL_F
7122           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7123            (unspec:SVE_FULL_F
7124              [(match_dup 1)
7125               (const_int SVE_STRICT_GP)
7126               (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7127               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7128               (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
7129              SVE_COND_FP_TERNARY)
7130            (match_dup 4)]
7131           UNSPEC_SEL))]
7132   "TARGET_SVE"
7133   "@
7134    <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7135    movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
7136   [(set_attr "movprfx" "*,yes")]
7137 )
7138
7139 ;; Predicated floating-point ternary operations, merging with an
7140 ;; independent value.
7141 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
7142   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
7143         (unspec:SVE_FULL_F
7144           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
7145            (unspec:SVE_FULL_F
7146              [(match_operand 6)
7147               (const_int SVE_RELAXED_GP)
7148               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
7149               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
7150               (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
7151              SVE_COND_FP_TERNARY)
7152            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
7153           UNSPEC_SEL))]
7154   "TARGET_SVE
7155    && !rtx_equal_p (operands[2], operands[5])
7156    && !rtx_equal_p (operands[3], operands[5])
7157    && !rtx_equal_p (operands[4], operands[5])"
7158   "@
7159    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7160    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7161    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7162    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7163    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7164    #"
7165   "&& 1"
7166   {
7167     if (reload_completed
7168         && register_operand (operands[5], <MODE>mode)
7169         && !rtx_equal_p (operands[0], operands[5]))
7170       {
7171         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7172                                                  operands[5], operands[1]));
7173         operands[5] = operands[4] = operands[0];
7174       }
7175     else if (!rtx_equal_p (operands[1], operands[6]))
7176       operands[6] = copy_rtx (operands[1]);
7177     else
7178       FAIL;
7179   }
7180   [(set_attr "movprfx" "yes")]
7181 )
7182
7183 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
7184   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
7185         (unspec:SVE_FULL_F
7186           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
7187            (unspec:SVE_FULL_F
7188              [(match_dup 1)
7189               (const_int SVE_STRICT_GP)
7190               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
7191               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
7192               (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
7193              SVE_COND_FP_TERNARY)
7194            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
7195           UNSPEC_SEL))]
7196   "TARGET_SVE
7197    && !rtx_equal_p (operands[2], operands[5])
7198    && !rtx_equal_p (operands[3], operands[5])
7199    && !rtx_equal_p (operands[4], operands[5])"
7200   "@
7201    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7202    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7203    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7204    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7205    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7206    #"
7207   "&& reload_completed
7208    && register_operand (operands[5], <MODE>mode)
7209    && !rtx_equal_p (operands[0], operands[5])"
7210   {
7211     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7212                                              operands[5], operands[1]));
7213     operands[5] = operands[4] = operands[0];
7214   }
7215   [(set_attr "movprfx" "yes")]
7216 )
7217
7218 ;; Unpredicated FMLA and FMLS by selected lanes.  It doesn't seem worth using
7219 ;; (fma ...) since target-independent code won't understand the indexing.
7220 (define_insn "@aarch64_<optab>_lane_<mode>"
7221   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7222         (unspec:SVE_FULL_F
7223           [(match_operand:SVE_FULL_F 1 "register_operand" "w, w")
7224            (unspec:SVE_FULL_F
7225              [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
7226               (match_operand:SI 3 "const_int_operand")]
7227              UNSPEC_SVE_LANE_SELECT)
7228            (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
7229           SVE_FP_TERNARY_LANE))]
7230   "TARGET_SVE"
7231   "@
7232    <sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
7233    movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
7234   [(set_attr "movprfx" "*,yes")]
7235 )
7236
7237 ;; -------------------------------------------------------------------------
7238 ;; ---- [FP] Complex multiply-add
7239 ;; -------------------------------------------------------------------------
7240 ;; Includes merging patterns for:
7241 ;; - FCMLA
7242 ;; -------------------------------------------------------------------------
7243
7244 ;; Predicated FCMLA.
7245 (define_insn "@aarch64_pred_<optab><mode>"
7246   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7247         (unspec:SVE_FULL_F
7248           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7249            (match_operand:SI 5 "aarch64_sve_gp_strictness")
7250            (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7251            (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7252            (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
7253           SVE_COND_FCMLA))]
7254   "TARGET_SVE"
7255   "@
7256    fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7257    movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
7258   [(set_attr "movprfx" "*,yes")]
7259 )
7260
7261 ;; unpredicated optab pattern for auto-vectorizer
7262 ;; The complex mla/mls operations always need to expand to two instructions.
7263 ;; The first operation does half the computation and the second does the
7264 ;; remainder.  Because of this, expand early.
7265 (define_expand "cml<fcmac1><conj_op><mode>4"
7266   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7267         (unspec:SVE_FULL_F
7268           [(match_dup 4)
7269            (match_dup 5)
7270            (match_operand:SVE_FULL_F 1 "register_operand")
7271            (match_operand:SVE_FULL_F 2 "register_operand")
7272            (match_operand:SVE_FULL_F 3 "register_operand")]
7273           FCMLA_OP))]
7274   "TARGET_SVE"
7275 {
7276   operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7277   operands[5] = gen_int_mode (SVE_RELAXED_GP, SImode);
7278   rtx tmp = gen_reg_rtx (<MODE>mode);
7279   emit_insn
7280     (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, operands[4],
7281                                              operands[3], operands[2],
7282                                              operands[1], operands[5]));
7283   emit_insn
7284     (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], operands[4],
7285                                              operands[3], operands[2],
7286                                              tmp, operands[5]));
7287   DONE;
7288 })
7289
7290 ;; unpredicated optab pattern for auto-vectorizer
7291 ;; The complex mul operations always need to expand to two instructions.
7292 ;; The first operation does half the computation and the second does the
7293 ;; remainder.  Because of this, expand early.
7294 (define_expand "cmul<conj_op><mode>3"
7295   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7296         (unspec:SVE_FULL_F
7297            [(match_operand:SVE_FULL_F 1 "register_operand")
7298             (match_operand:SVE_FULL_F 2 "register_operand")]
7299           FCMUL_OP))]
7300   "TARGET_SVE"
7301 {
7302   rtx pred_reg = aarch64_ptrue_reg (<VPRED>mode);
7303   rtx gp_mode = gen_int_mode (SVE_RELAXED_GP, SImode);
7304   rtx accum = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
7305   rtx tmp = gen_reg_rtx (<MODE>mode);
7306   emit_insn
7307     (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, pred_reg,
7308                                              operands[2], operands[1],
7309                                              accum, gp_mode));
7310   emit_insn
7311     (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], pred_reg,
7312                                              operands[2], operands[1],
7313                                              tmp, gp_mode));
7314   DONE;
7315 })
7316
7317 ;; Predicated FCMLA with merging.
7318 (define_expand "@cond_<optab><mode>"
7319   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7320         (unspec:SVE_FULL_F
7321           [(match_operand:<VPRED> 1 "register_operand")
7322            (unspec:SVE_FULL_F
7323              [(match_dup 1)
7324               (const_int SVE_STRICT_GP)
7325               (match_operand:SVE_FULL_F 2 "register_operand")
7326               (match_operand:SVE_FULL_F 3 "register_operand")
7327               (match_operand:SVE_FULL_F 4 "register_operand")]
7328              SVE_COND_FCMLA)
7329            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7330           UNSPEC_SEL))]
7331   "TARGET_SVE"
7332 )
7333
7334 ;; Predicated FCMLA, merging with the third input.
7335 (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
7336   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7337         (unspec:SVE_FULL_F
7338           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7339            (unspec:SVE_FULL_F
7340              [(match_operand 5)
7341               (const_int SVE_RELAXED_GP)
7342               (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7343               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7344               (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
7345              SVE_COND_FCMLA)
7346            (match_dup 4)]
7347           UNSPEC_SEL))]
7348   "TARGET_SVE"
7349   "@
7350    fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7351    movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
7352   "&& !rtx_equal_p (operands[1], operands[5])"
7353   {
7354     operands[5] = copy_rtx (operands[1]);
7355   }
7356   [(set_attr "movprfx" "*,yes")]
7357 )
7358
7359 (define_insn "*cond_<optab><mode>_4_strict"
7360   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7361         (unspec:SVE_FULL_F
7362           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7363            (unspec:SVE_FULL_F
7364              [(match_dup 1)
7365               (const_int SVE_STRICT_GP)
7366               (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7367               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7368               (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
7369              SVE_COND_FCMLA)
7370            (match_dup 4)]
7371           UNSPEC_SEL))]
7372   "TARGET_SVE"
7373   "@
7374    fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7375    movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
7376   [(set_attr "movprfx" "*,yes")]
7377 )
7378
7379 ;; Predicated FCMLA, merging with an independent value.
7380 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
7381   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
7382         (unspec:SVE_FULL_F
7383           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
7384            (unspec:SVE_FULL_F
7385              [(match_operand 6)
7386               (const_int SVE_RELAXED_GP)
7387               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
7388               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
7389               (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
7390              SVE_COND_FCMLA)
7391            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
7392           UNSPEC_SEL))]
7393   "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
7394   "@
7395    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7396    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7397    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7398    #"
7399   "&& 1"
7400   {
7401     if (reload_completed
7402         && register_operand (operands[5], <MODE>mode)
7403         && !rtx_equal_p (operands[0], operands[5]))
7404       {
7405         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7406                                                  operands[5], operands[1]));
7407         operands[5] = operands[4] = operands[0];
7408       }
7409     else if (!rtx_equal_p (operands[1], operands[6]))
7410       operands[6] = copy_rtx (operands[1]);
7411     else
7412       FAIL;
7413   }
7414   [(set_attr "movprfx" "yes")]
7415 )
7416
7417 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
7418   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
7419         (unspec:SVE_FULL_F
7420           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
7421            (unspec:SVE_FULL_F
7422              [(match_dup 1)
7423               (const_int SVE_STRICT_GP)
7424               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
7425               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
7426               (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
7427              SVE_COND_FCMLA)
7428            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
7429           UNSPEC_SEL))]
7430   "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
7431   "@
7432    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7433    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7434    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7435    #"
7436   "&& reload_completed
7437    && register_operand (operands[5], <MODE>mode)
7438    && !rtx_equal_p (operands[0], operands[5])"
7439   {
7440     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7441                                              operands[5], operands[1]));
7442     operands[5] = operands[4] = operands[0];
7443   }
7444   [(set_attr "movprfx" "yes")]
7445 )
7446
7447 ;; Unpredicated FCMLA with indexing.
7448 (define_insn "@aarch64_<optab>_lane_<mode>"
7449   [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w")
7450         (unspec:SVE_FULL_HSF
7451           [(match_operand:SVE_FULL_HSF 1 "register_operand" "w, w")
7452            (unspec:SVE_FULL_HSF
7453              [(match_operand:SVE_FULL_HSF 2 "register_operand" "<sve_lane_pair_con>, <sve_lane_pair_con>")
7454               (match_operand:SI 3 "const_int_operand")]
7455              UNSPEC_SVE_LANE_SELECT)
7456            (match_operand:SVE_FULL_HSF 4 "register_operand" "0, w")]
7457           FCMLA))]
7458   "TARGET_SVE"
7459   "@
7460    fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>
7461    movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>"
7462   [(set_attr "movprfx" "*,yes")]
7463 )
7464
7465 ;; -------------------------------------------------------------------------
7466 ;; ---- [FP] Trigonometric multiply-add
7467 ;; -------------------------------------------------------------------------
7468 ;; Includes:
7469 ;; - FTMAD
7470 ;; -------------------------------------------------------------------------
7471
7472 (define_insn "@aarch64_sve_tmad<mode>"
7473   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7474         (unspec:SVE_FULL_F
7475           [(match_operand:SVE_FULL_F 1 "register_operand" "0, w")
7476            (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7477            (match_operand:DI 3 "const_int_operand")]
7478           UNSPEC_FTMAD))]
7479   "TARGET_SVE"
7480   "@
7481    ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
7482    movprfx\t%0, %1\;ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3"
7483   [(set_attr "movprfx" "*,yes")]
7484 )
7485
7486 ;; -------------------------------------------------------------------------
7487 ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
7488 ;; -------------------------------------------------------------------------
7489 ;; Includes:
7490 ;; - BFDOT (BF16)
7491 ;; - BFMLALB (BF16)
7492 ;; - BFMLALT (BF16)
7493 ;; - BFMMLA (BF16)
7494 ;; -------------------------------------------------------------------------
7495
7496 (define_insn "@aarch64_sve_<sve_fp_op>vnx4sf"
7497   [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
7498         (unspec:VNx4SF
7499           [(match_operand:VNx4SF 1 "register_operand" "0, w")
7500            (match_operand:VNx8BF 2 "register_operand" "w, w")
7501            (match_operand:VNx8BF 3 "register_operand" "w, w")]
7502           SVE_BFLOAT_TERNARY_LONG))]
7503   "TARGET_SVE_BF16"
7504   "@
7505    <sve_fp_op>\t%0.s, %2.h, %3.h
7506    movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h"
7507   [(set_attr "movprfx" "*,yes")]
7508 )
7509
7510 ;; The immediate range is enforced before generating the instruction.
7511 (define_insn "@aarch64_sve_<sve_fp_op>_lanevnx4sf"
7512   [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
7513         (unspec:VNx4SF
7514           [(match_operand:VNx4SF 1 "register_operand" "0, w")
7515            (match_operand:VNx8BF 2 "register_operand" "w, w")
7516            (match_operand:VNx8BF 3 "register_operand" "y, y")
7517            (match_operand:SI 4 "const_int_operand")]
7518           SVE_BFLOAT_TERNARY_LONG_LANE))]
7519   "TARGET_SVE_BF16"
7520   "@
7521    <sve_fp_op>\t%0.s, %2.h, %3.h[%4]
7522    movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]"
7523   [(set_attr "movprfx" "*,yes")]
7524 )
7525
7526 ;; -------------------------------------------------------------------------
7527 ;; ---- [FP] Matrix multiply-accumulate
7528 ;; -------------------------------------------------------------------------
7529 ;; Includes:
7530 ;; - FMMLA (F32MM,F64MM)
7531 ;; -------------------------------------------------------------------------
7532
7533 ;; The mode iterator enforces the target requirements.
7534 (define_insn "@aarch64_sve_<sve_fp_op><mode>"
7535   [(set (match_operand:SVE_MATMULF 0 "register_operand" "=w, ?&w")
7536         (unspec:SVE_MATMULF
7537           [(match_operand:SVE_MATMULF 2 "register_operand" "w, w")
7538            (match_operand:SVE_MATMULF 3 "register_operand" "w, w")
7539            (match_operand:SVE_MATMULF 1 "register_operand" "0, w")]
7540           FMMLA))]
7541   "TARGET_SVE"
7542   "@
7543    <sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
7544    movprfx\t%0, %1\;<sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
7545   [(set_attr "movprfx" "*,yes")]
7546 )
7547
7548 ;; =========================================================================
7549 ;; == Comparisons and selects
7550 ;; =========================================================================
7551
7552 ;; -------------------------------------------------------------------------
7553 ;; ---- [INT,FP] Select based on predicates
7554 ;; -------------------------------------------------------------------------
7555 ;; Includes merging patterns for:
7556 ;; - FMOV
7557 ;; - MOV
7558 ;; - SEL
7559 ;; -------------------------------------------------------------------------
7560
7561 ;; vcond_mask operand order: true, false, mask
7562 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
7563 ;; SEL operand order:        mask, true, false
7564 (define_expand "@vcond_mask_<mode><vpred>"
7565   [(set (match_operand:SVE_ALL 0 "register_operand")
7566         (unspec:SVE_ALL
7567           [(match_operand:<VPRED> 3 "register_operand")
7568            (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
7569            (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
7570           UNSPEC_SEL))]
7571   "TARGET_SVE"
7572   {
7573     if (register_operand (operands[1], <MODE>mode))
7574       operands[2] = force_reg (<MODE>mode, operands[2]);
7575   }
7576 )
7577
7578 ;; Selects between:
7579 ;; - two registers
7580 ;; - a duplicated immediate and a register
7581 ;; - a duplicated immediate and zero
7582 ;;
7583 ;; For unpacked vectors, it doesn't really matter whether SEL uses the
7584 ;; the container size or the element size.  If SEL used the container size,
7585 ;; it would ignore undefined bits of the predicate but would copy the
7586 ;; upper (undefined) bits of each container along with the defined bits.
7587 ;; If SEL used the element size, it would use undefined bits of the predicate
7588 ;; to select between undefined elements in each input vector.  Thus the only
7589 ;; difference is whether the undefined bits in a container always come from
7590 ;; the same input as the defined bits, or whether the choice can vary
7591 ;; independently of the defined bits.
7592 ;;
7593 ;; For the other instructions, using the element size is more natural,
7594 ;; so we do that for SEL as well.
7595 (define_insn "*vcond_mask_<mode><vpred>"
7596   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
7597         (unspec:SVE_ALL
7598           [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl")
7599            (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
7600            (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
7601           UNSPEC_SEL))]
7602   "TARGET_SVE
7603    && (!register_operand (operands[1], <MODE>mode)
7604        || register_operand (operands[2], <MODE>mode))"
7605   "@
7606    sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
7607    mov\t%0.<Vetype>, %3/m, #%I1
7608    mov\t%0.<Vetype>, %3/z, #%I1
7609    fmov\t%0.<Vetype>, %3/m, #%1
7610    movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
7611    movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
7612    movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1"
7613   [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
7614 )
7615
7616 ;; Optimize selects between a duplicated scalar variable and another vector,
7617 ;; the latter of which can be a zero constant or a variable.  Treat duplicates
7618 ;; of GPRs as being more expensive than duplicates of FPRs, since they
7619 ;; involve a cross-file move.
7620 (define_insn "@aarch64_sel_dup<mode>"
7621   [(set (match_operand:SVE_ALL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w")
7622         (unspec:SVE_ALL
7623           [(match_operand:<VPRED> 3 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
7624            (vec_duplicate:SVE_ALL
7625              (match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w"))
7626            (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")]
7627           UNSPEC_SEL))]
7628   "TARGET_SVE"
7629   "@
7630    mov\t%0.<Vetype>, %3/m, %<vwcore>1
7631    mov\t%0.<Vetype>, %3/m, %<Vetype>1
7632    movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
7633    movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
7634    movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
7635    movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1"
7636   [(set_attr "movprfx" "*,*,yes,yes,yes,yes")]
7637 )
7638
7639 ;; -------------------------------------------------------------------------
7640 ;; ---- [INT,FP] Compare and select
7641 ;; -------------------------------------------------------------------------
7642 ;; The patterns in this section are synthetic.
7643 ;; -------------------------------------------------------------------------
7644
7645 ;; Integer (signed) vcond.  Don't enforce an immediate range here, since it
7646 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
7647 (define_expand "vcond<SVE_ALL:mode><SVE_I:mode>"
7648   [(set (match_operand:SVE_ALL 0 "register_operand")
7649         (if_then_else:SVE_ALL
7650           (match_operator 3 "comparison_operator"
7651             [(match_operand:SVE_I 4 "register_operand")
7652              (match_operand:SVE_I 5 "nonmemory_operand")])
7653           (match_operand:SVE_ALL 1 "nonmemory_operand")
7654           (match_operand:SVE_ALL 2 "nonmemory_operand")))]
7655   "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>"
7656   {
7657     aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands);
7658     DONE;
7659   }
7660 )
7661
7662 ;; Integer vcondu.  Don't enforce an immediate range here, since it
7663 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
7664 (define_expand "vcondu<SVE_ALL:mode><SVE_I:mode>"
7665   [(set (match_operand:SVE_ALL 0 "register_operand")
7666         (if_then_else:SVE_ALL
7667           (match_operator 3 "comparison_operator"
7668             [(match_operand:SVE_I 4 "register_operand")
7669              (match_operand:SVE_I 5 "nonmemory_operand")])
7670           (match_operand:SVE_ALL 1 "nonmemory_operand")
7671           (match_operand:SVE_ALL 2 "nonmemory_operand")))]
7672   "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>"
7673   {
7674     aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands);
7675     DONE;
7676   }
7677 )
7678
7679 ;; Floating-point vcond.  All comparisons except FCMUO allow a zero operand;
7680 ;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
7681 (define_expand "vcond<mode><v_fp_equiv>"
7682   [(set (match_operand:SVE_FULL_HSD 0 "register_operand")
7683         (if_then_else:SVE_FULL_HSD
7684           (match_operator 3 "comparison_operator"
7685             [(match_operand:<V_FP_EQUIV> 4 "register_operand")
7686              (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
7687           (match_operand:SVE_FULL_HSD 1 "nonmemory_operand")
7688           (match_operand:SVE_FULL_HSD 2 "nonmemory_operand")))]
7689   "TARGET_SVE"
7690   {
7691     aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
7692     DONE;
7693   }
7694 )
7695
7696 ;; -------------------------------------------------------------------------
7697 ;; ---- [INT] Comparisons
7698 ;; -------------------------------------------------------------------------
7699 ;; Includes:
7700 ;; - CMPEQ
7701 ;; - CMPGE
7702 ;; - CMPGT
7703 ;; - CMPHI
7704 ;; - CMPHS
7705 ;; - CMPLE
7706 ;; - CMPLO
7707 ;; - CMPLS
7708 ;; - CMPLT
7709 ;; - CMPNE
7710 ;; -------------------------------------------------------------------------
7711
7712 ;; Signed integer comparisons.  Don't enforce an immediate range here, since
7713 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
7714 ;; instead.
7715 (define_expand "vec_cmp<mode><vpred>"
7716   [(parallel
7717     [(set (match_operand:<VPRED> 0 "register_operand")
7718           (match_operator:<VPRED> 1 "comparison_operator"
7719             [(match_operand:SVE_I 2 "register_operand")
7720              (match_operand:SVE_I 3 "nonmemory_operand")]))
7721      (clobber (reg:CC_NZC CC_REGNUM))])]
7722   "TARGET_SVE"
7723   {
7724     aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
7725                                     operands[2], operands[3]);
7726     DONE;
7727   }
7728 )
7729
7730 ;; Unsigned integer comparisons.  Don't enforce an immediate range here, since
7731 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
7732 ;; instead.
7733 (define_expand "vec_cmpu<mode><vpred>"
7734   [(parallel
7735     [(set (match_operand:<VPRED> 0 "register_operand")
7736           (match_operator:<VPRED> 1 "comparison_operator"
7737             [(match_operand:SVE_I 2 "register_operand")
7738              (match_operand:SVE_I 3 "nonmemory_operand")]))
7739      (clobber (reg:CC_NZC CC_REGNUM))])]
7740   "TARGET_SVE"
7741   {
7742     aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
7743                                     operands[2], operands[3]);
7744     DONE;
7745   }
7746 )
7747
7748 ;; Predicated integer comparisons.
7749 ;;
7750 ;; For unpacked vectors, only the lowpart element in each input container
7751 ;; has a defined value, and only the predicate bits associated with
7752 ;; those elements are defined.  For example, when comparing two VNx2SIs:
7753 ;;
7754 ;; - The VNx2SIs can be seem as VNx2DIs in which the low halves of each
7755 ;;   DI container store an SI element.  The upper bits of each DI container
7756 ;;   are undefined.
7757 ;;
7758 ;; - Alternatively, the VNx2SIs can be seen as VNx4SIs in which the
7759 ;;   even elements are defined and the odd elements are undefined.
7760 ;;
7761 ;; - The associated predicate mode is VNx2BI.  This means that only the
7762 ;;   low bit in each predicate byte is defined (on input and on output).
7763 ;;
7764 ;; - We use a .s comparison to compare VNx2SIs, under the control of a
7765 ;;   VNx2BI governing predicate, to produce a VNx2BI result.  If we view
7766 ;;   the .s operation as operating on VNx4SIs then for odd lanes:
7767 ;;
7768 ;;   - the input governing predicate bit is undefined
7769 ;;   - the SI elements being compared are undefined
7770 ;;   - the predicate result bit is therefore undefined, but
7771 ;;   - the predicate result bit is in the undefined part of a VNx2BI,
7772 ;;     so its value doesn't matter anyway.
7773 (define_insn "@aarch64_pred_cmp<cmp_op><mode>"
7774   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7775         (unspec:<VPRED>
7776           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7777            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7778            (SVE_INT_CMP:<VPRED>
7779              (match_operand:SVE_I 3 "register_operand" "w, w")
7780              (match_operand:SVE_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
7781           UNSPEC_PRED_Z))
7782    (clobber (reg:CC_NZC CC_REGNUM))]
7783   "TARGET_SVE"
7784   "@
7785    cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
7786    cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
7787 )
7788
7789 ;; Predicated integer comparisons in which both the flag and predicate
7790 ;; results are interesting.
7791 (define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
7792   [(set (reg:CC_NZC CC_REGNUM)
7793         (unspec:CC_NZC
7794           [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
7795            (match_operand 4)
7796            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7797            (unspec:<VPRED>
7798              [(match_operand 6)
7799               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7800               (SVE_INT_CMP:<VPRED>
7801                 (match_operand:SVE_I 2 "register_operand" "w, w")
7802                 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
7803              UNSPEC_PRED_Z)]
7804           UNSPEC_PTEST))
7805    (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7806         (unspec:<VPRED>
7807           [(match_dup 6)
7808            (match_dup 7)
7809            (SVE_INT_CMP:<VPRED>
7810              (match_dup 2)
7811              (match_dup 3))]
7812           UNSPEC_PRED_Z))]
7813   "TARGET_SVE
7814    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7815   "@
7816    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
7817    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
7818   "&& !rtx_equal_p (operands[4], operands[6])"
7819   {
7820     operands[6] = copy_rtx (operands[4]);
7821     operands[7] = operands[5];
7822   }
7823 )
7824
7825 ;; Predicated integer comparisons in which only the flags result is
7826 ;; interesting.
7827 (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
7828   [(set (reg:CC_NZC CC_REGNUM)
7829         (unspec:CC_NZC
7830           [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
7831            (match_operand 4)
7832            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7833            (unspec:<VPRED>
7834              [(match_operand 6)
7835               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7836               (SVE_INT_CMP:<VPRED>
7837                 (match_operand:SVE_I 2 "register_operand" "w, w")
7838                 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
7839              UNSPEC_PRED_Z)]
7840           UNSPEC_PTEST))
7841    (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
7842   "TARGET_SVE
7843    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7844   "@
7845    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
7846    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
7847   "&& !rtx_equal_p (operands[4], operands[6])"
7848   {
7849     operands[6] = copy_rtx (operands[4]);
7850     operands[7] = operands[5];
7851   }
7852 )
7853
7854 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated
7855 ;; comparison with an AND.  Split the instruction into its preferred form
7856 ;; at the earliest opportunity, in order to get rid of the redundant
7857 ;; operand 4.
7858 (define_insn_and_split "*cmp<cmp_op><mode>_and"
7859   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7860         (and:<VPRED>
7861           (unspec:<VPRED>
7862             [(match_operand 4)
7863              (const_int SVE_KNOWN_PTRUE)
7864              (SVE_INT_CMP:<VPRED>
7865                (match_operand:SVE_I 2 "register_operand" "w, w")
7866                (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
7867             UNSPEC_PRED_Z)
7868           (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
7869    (clobber (reg:CC_NZC CC_REGNUM))]
7870   "TARGET_SVE"
7871   "#"
7872   "&& 1"
7873   [(parallel
7874      [(set (match_dup 0)
7875            (unspec:<VPRED>
7876              [(match_dup 1)
7877               (const_int SVE_MAYBE_NOT_PTRUE)
7878               (SVE_INT_CMP:<VPRED>
7879                 (match_dup 2)
7880                 (match_dup 3))]
7881              UNSPEC_PRED_Z))
7882       (clobber (reg:CC_NZC CC_REGNUM))])]
7883 )
7884
7885 ;; Predicated integer wide comparisons.
7886 (define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide"
7887   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7888         (unspec:<VPRED>
7889           [(match_operand:VNx16BI 1 "register_operand" "Upl")
7890            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7891            (unspec:<VPRED>
7892              [(match_operand:SVE_FULL_BHSI 3 "register_operand" "w")
7893               (match_operand:VNx2DI 4 "register_operand" "w")]
7894              SVE_COND_INT_CMP_WIDE)]
7895           UNSPEC_PRED_Z))
7896    (clobber (reg:CC_NZC CC_REGNUM))]
7897   "TARGET_SVE"
7898   "cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d"
7899 )
7900
7901 ;; Predicated integer wide comparisons in which both the flag and
7902 ;; predicate results are interesting.
7903 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
7904   [(set (reg:CC_NZC CC_REGNUM)
7905         (unspec:CC_NZC
7906           [(match_operand:VNx16BI 1 "register_operand" "Upl")
7907            (match_operand 4)
7908            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7909            (unspec:<VPRED>
7910              [(match_operand:VNx16BI 6 "register_operand" "Upl")
7911               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7912               (unspec:<VPRED>
7913                 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
7914                  (match_operand:VNx2DI 3 "register_operand" "w")]
7915                 SVE_COND_INT_CMP_WIDE)]
7916              UNSPEC_PRED_Z)]
7917           UNSPEC_PTEST))
7918    (set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7919         (unspec:<VPRED>
7920           [(match_dup 6)
7921            (match_dup 7)
7922            (unspec:<VPRED>
7923              [(match_dup 2)
7924               (match_dup 3)]
7925              SVE_COND_INT_CMP_WIDE)]
7926           UNSPEC_PRED_Z))]
7927   "TARGET_SVE
7928    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7929   "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
7930 )
7931
7932 ;; Predicated integer wide comparisons in which only the flags result
7933 ;; is interesting.
7934 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
7935   [(set (reg:CC_NZC CC_REGNUM)
7936         (unspec:CC_NZC
7937           [(match_operand:VNx16BI 1 "register_operand" "Upl")
7938            (match_operand 4)
7939            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7940            (unspec:<VPRED>
7941              [(match_operand:VNx16BI 6 "register_operand" "Upl")
7942               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7943               (unspec:<VPRED>
7944                 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
7945                  (match_operand:VNx2DI 3 "register_operand" "w")]
7946                 SVE_COND_INT_CMP_WIDE)]
7947              UNSPEC_PRED_Z)]
7948           UNSPEC_PTEST))
7949    (clobber (match_scratch:<VPRED> 0 "=Upa"))]
7950   "TARGET_SVE
7951    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7952   "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
7953 )
7954
7955 ;; -------------------------------------------------------------------------
7956 ;; ---- [INT] While tests
7957 ;; -------------------------------------------------------------------------
7958 ;; Includes:
7959 ;; - WHILEGE (SVE2)
7960 ;; - WHILEGT (SVE2)
7961 ;; - WHILEHI (SVE2)
7962 ;; - WHILEHS (SVE2)
7963 ;; - WHILELE
7964 ;; - WHILELO
7965 ;; - WHILELS
7966 ;; - WHILELT
7967 ;; - WHILERW (SVE2)
7968 ;; - WHILEWR (SVE2)
7969 ;; -------------------------------------------------------------------------
7970
7971 ;; Set element I of the result if (cmp (plus operand1 J) operand2) is
7972 ;; true for all J in [0, I].
7973 (define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>"
7974   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7975         (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
7976                           (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
7977                          SVE_WHILE))
7978    (clobber (reg:CC_NZC CC_REGNUM))]
7979   "TARGET_SVE"
7980   "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
7981 )
7982
7983 ;; The WHILE instructions set the flags in the same way as a PTEST with
7984 ;; a PTRUE GP.  Handle the case in which both results are useful.  The GP
7985 ;; operands to the PTEST aren't needed, so we allow them to be anything.
7986 (define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_cc"
7987   [(set (reg:CC_NZC CC_REGNUM)
7988         (unspec:CC_NZC
7989           [(match_operand 3)
7990            (match_operand 4)
7991            (const_int SVE_KNOWN_PTRUE)
7992            (unspec:PRED_ALL
7993              [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
7994               (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
7995              SVE_WHILE)]
7996           UNSPEC_PTEST))
7997    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7998         (unspec:PRED_ALL [(match_dup 1)
7999                           (match_dup 2)]
8000                          SVE_WHILE))]
8001   "TARGET_SVE"
8002   "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
8003   ;; Force the compiler to drop the unused predicate operand, so that we
8004   ;; don't have an unnecessary PTRUE.
8005   "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
8006   {
8007     operands[3] = CONSTM1_RTX (VNx16BImode);
8008     operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8009   }
8010 )
8011
8012 ;; Same, but handle the case in which only the flags result is useful.
8013 (define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest"
8014   [(set (reg:CC_NZC CC_REGNUM)
8015         (unspec:CC_NZC
8016           [(match_operand 3)
8017            (match_operand 4)
8018            (const_int SVE_KNOWN_PTRUE)
8019            (unspec:PRED_ALL
8020              [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
8021               (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
8022              SVE_WHILE)]
8023           UNSPEC_PTEST))
8024    (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
8025   "TARGET_SVE"
8026   "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
8027   ;; Force the compiler to drop the unused predicate operand, so that we
8028   ;; don't have an unnecessary PTRUE.
8029   "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
8030   {
8031     operands[3] = CONSTM1_RTX (VNx16BImode);
8032     operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8033   }
8034 )
8035
8036 ;; -------------------------------------------------------------------------
8037 ;; ---- [FP] Direct comparisons
8038 ;; -------------------------------------------------------------------------
8039 ;; Includes:
8040 ;; - FCMEQ
8041 ;; - FCMGE
8042 ;; - FCMGT
8043 ;; - FCMLE
8044 ;; - FCMLT
8045 ;; - FCMNE
8046 ;; - FCMUO
8047 ;; -------------------------------------------------------------------------
8048
8049 ;; Floating-point comparisons.  All comparisons except FCMUO allow a zero
8050 ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
8051 ;; with zero.
8052 (define_expand "vec_cmp<mode><vpred>"
8053   [(set (match_operand:<VPRED> 0 "register_operand")
8054         (match_operator:<VPRED> 1 "comparison_operator"
8055           [(match_operand:SVE_FULL_F 2 "register_operand")
8056            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))]
8057   "TARGET_SVE"
8058   {
8059     aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
8060                                       operands[2], operands[3], false);
8061     DONE;
8062   }
8063 )
8064
8065 ;; Predicated floating-point comparisons.
8066 (define_insn "@aarch64_pred_fcm<cmp_op><mode>"
8067   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
8068         (unspec:<VPRED>
8069           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
8070            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8071            (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
8072            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, w")]
8073           SVE_COND_FP_CMP_I0))]
8074   "TARGET_SVE"
8075   "@
8076    fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0
8077    fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
8078 )
8079
8080 ;; Same for unordered comparisons.
8081 (define_insn "@aarch64_pred_fcmuo<mode>"
8082   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8083         (unspec:<VPRED>
8084           [(match_operand:<VPRED> 1 "register_operand" "Upl")
8085            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8086            (match_operand:SVE_FULL_F 3 "register_operand" "w")
8087            (match_operand:SVE_FULL_F 4 "register_operand" "w")]
8088           UNSPEC_COND_FCMUO))]
8089   "TARGET_SVE"
8090   "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
8091 )
8092
8093 ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
8094 ;; with another predicate P.  This does not have the same trapping behavior
8095 ;; as predicating the comparison itself on P, but it's a legitimate fold,
8096 ;; since we can drop any potentially-trapping operations whose results
8097 ;; are not needed.
8098 ;;
8099 ;; Split the instruction into its preferred form (below) at the earliest
8100 ;; opportunity, in order to get rid of the redundant operand 1.
8101 (define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
8102   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
8103         (and:<VPRED>
8104           (unspec:<VPRED>
8105             [(match_operand:<VPRED> 1)
8106              (const_int SVE_KNOWN_PTRUE)
8107              (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
8108              (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
8109             SVE_COND_FP_CMP_I0)
8110           (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
8111   "TARGET_SVE"
8112   "#"
8113   "&& 1"
8114   [(set (match_dup 0)
8115         (unspec:<VPRED>
8116           [(match_dup 4)
8117            (const_int SVE_MAYBE_NOT_PTRUE)
8118            (match_dup 2)
8119            (match_dup 3)]
8120           SVE_COND_FP_CMP_I0))]
8121 )
8122
8123 ;; Same for unordered comparisons.
8124 (define_insn_and_split "*fcmuo<mode>_and_combine"
8125   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8126         (and:<VPRED>
8127           (unspec:<VPRED>
8128             [(match_operand:<VPRED> 1)
8129              (const_int SVE_KNOWN_PTRUE)
8130              (match_operand:SVE_FULL_F 2 "register_operand" "w")
8131              (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8132             UNSPEC_COND_FCMUO)
8133           (match_operand:<VPRED> 4 "register_operand" "Upl")))]
8134   "TARGET_SVE"
8135   "#"
8136   "&& 1"
8137   [(set (match_dup 0)
8138         (unspec:<VPRED>
8139           [(match_dup 4)
8140            (const_int SVE_MAYBE_NOT_PTRUE)
8141            (match_dup 2)
8142            (match_dup 3)]
8143           UNSPEC_COND_FCMUO))]
8144 )
8145
8146 ;; Similar to *fcm<cmp_op><mode>_and_combine, but for BIC rather than AND.
8147 ;; In this case, we still need a separate NOT/BIC operation, but predicating
8148 ;; the comparison on the BIC operand removes the need for a PTRUE.
8149 (define_insn_and_split "*fcm<cmp_op><mode>_bic_combine"
8150   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8151         (and:<VPRED>
8152           (and:<VPRED>
8153             (not:<VPRED>
8154               (unspec:<VPRED>
8155                 [(match_operand:<VPRED> 1)
8156                  (const_int SVE_KNOWN_PTRUE)
8157                  (match_operand:SVE_FULL_F 2 "register_operand" "w")
8158                  (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8159                 SVE_COND_FP_CMP_I0))
8160             (match_operand:<VPRED> 4 "register_operand" "Upa"))
8161           (match_dup:<VPRED> 1)))
8162    (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8163   "TARGET_SVE"
8164   "#"
8165   "&& 1"
8166   [(set (match_dup 5)
8167         (unspec:<VPRED>
8168           [(match_dup 4)
8169            (const_int SVE_MAYBE_NOT_PTRUE)
8170            (match_dup 2)
8171            (match_dup 3)]
8172           SVE_COND_FP_CMP_I0))
8173    (set (match_dup 0)
8174         (and:<VPRED>
8175           (not:<VPRED>
8176             (match_dup 5))
8177           (match_dup 4)))]
8178 {
8179   if (can_create_pseudo_p ())
8180     operands[5] = gen_reg_rtx (<VPRED>mode);
8181 }
8182 )
8183
8184 ;; Make sure that we expand to a nor when the operand 4 of
8185 ;; *fcm<cmp_op><mode>_bic_combine is a not.
8186 (define_insn_and_split "*fcm<cmp_op><mode>_nor_combine"
8187   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8188         (and:<VPRED>
8189           (and:<VPRED>
8190             (not:<VPRED>
8191               (unspec:<VPRED>
8192                 [(match_operand:<VPRED> 1)
8193                  (const_int SVE_KNOWN_PTRUE)
8194                  (match_operand:SVE_FULL_F 2 "register_operand" "w")
8195                  (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8196                 SVE_COND_FP_CMP_I0))
8197             (not:<VPRED>
8198               (match_operand:<VPRED> 4 "register_operand" "Upa")))
8199           (match_dup:<VPRED> 1)))
8200    (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8201   "TARGET_SVE"
8202   "#"
8203   "&& 1"
8204   [(set (match_dup 5)
8205         (unspec:<VPRED>
8206           [(match_dup 1)
8207            (const_int SVE_KNOWN_PTRUE)
8208            (match_dup 2)
8209            (match_dup 3)]
8210           SVE_COND_FP_CMP_I0))
8211    (set (match_dup 0)
8212         (and:<VPRED>
8213           (and:<VPRED>
8214             (not:<VPRED>
8215               (match_dup 5))
8216             (not:<VPRED>
8217               (match_dup 4)))
8218           (match_dup 1)))]
8219 {
8220   if (can_create_pseudo_p ())
8221     operands[5] = gen_reg_rtx (<VPRED>mode);
8222 }
8223 )
8224
8225 (define_insn_and_split "*fcmuo<mode>_bic_combine"
8226   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8227         (and:<VPRED>
8228           (and:<VPRED>
8229             (not:<VPRED>
8230               (unspec:<VPRED>
8231                 [(match_operand:<VPRED> 1)
8232                  (const_int SVE_KNOWN_PTRUE)
8233                  (match_operand:SVE_FULL_F 2 "register_operand" "w")
8234                  (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8235                 UNSPEC_COND_FCMUO))
8236             (match_operand:<VPRED> 4 "register_operand" "Upa"))
8237           (match_dup:<VPRED> 1)))
8238    (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8239   "TARGET_SVE"
8240   "#"
8241   "&& 1"
8242   [(set (match_dup 5)
8243         (unspec:<VPRED>
8244           [(match_dup 4)
8245            (const_int SVE_MAYBE_NOT_PTRUE)
8246            (match_dup 2)
8247            (match_dup 3)]
8248           UNSPEC_COND_FCMUO))
8249    (set (match_dup 0)
8250         (and:<VPRED>
8251           (not:<VPRED>
8252             (match_dup 5))
8253           (match_dup 4)))]
8254 {
8255   if (can_create_pseudo_p ())
8256     operands[5] = gen_reg_rtx (<VPRED>mode);
8257 }
8258 )
8259
8260 ;; Same for unordered comparisons.
8261 (define_insn_and_split "*fcmuo<mode>_nor_combine"
8262   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8263         (and:<VPRED>
8264           (and:<VPRED>
8265             (not:<VPRED>
8266               (unspec:<VPRED>
8267                 [(match_operand:<VPRED> 1)
8268                  (const_int SVE_KNOWN_PTRUE)
8269                  (match_operand:SVE_FULL_F 2 "register_operand" "w")
8270                  (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8271                 UNSPEC_COND_FCMUO))
8272             (not:<VPRED>
8273               (match_operand:<VPRED> 4 "register_operand" "Upa")))
8274           (match_dup:<VPRED> 1)))
8275    (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8276   "TARGET_SVE"
8277   "#"
8278   "&& 1"
8279   [(set (match_dup 5)
8280         (unspec:<VPRED>
8281           [(match_dup 1)
8282            (const_int SVE_KNOWN_PTRUE)
8283            (match_dup 2)
8284            (match_dup 3)]
8285           UNSPEC_COND_FCMUO))
8286    (set (match_dup 0)
8287         (and:<VPRED>
8288           (and:<VPRED>
8289             (not:<VPRED>
8290               (match_dup 5))
8291             (not:<VPRED>
8292               (match_dup 4)))
8293           (match_dup 1)))]
8294 {
8295   if (can_create_pseudo_p ())
8296     operands[5] = gen_reg_rtx (<VPRED>mode);
8297 }
8298 )
8299
8300 ;; -------------------------------------------------------------------------
8301 ;; ---- [FP] Absolute comparisons
8302 ;; -------------------------------------------------------------------------
8303 ;; Includes:
8304 ;; - FACGE
8305 ;; - FACGT
8306 ;; - FACLE
8307 ;; - FACLT
8308 ;; -------------------------------------------------------------------------
8309
8310 ;; Predicated floating-point absolute comparisons.
8311 (define_expand "@aarch64_pred_fac<cmp_op><mode>"
8312   [(set (match_operand:<VPRED> 0 "register_operand")
8313         (unspec:<VPRED>
8314           [(match_operand:<VPRED> 1 "register_operand")
8315            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8316            (unspec:SVE_FULL_F
8317              [(match_dup 1)
8318               (match_dup 2)
8319               (match_operand:SVE_FULL_F 3 "register_operand")]
8320              UNSPEC_COND_FABS)
8321            (unspec:SVE_FULL_F
8322              [(match_dup 1)
8323               (match_dup 2)
8324               (match_operand:SVE_FULL_F 4 "register_operand")]
8325              UNSPEC_COND_FABS)]
8326           SVE_COND_FP_ABS_CMP))]
8327   "TARGET_SVE"
8328 )
8329
8330 (define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>_relaxed"
8331   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8332         (unspec:<VPRED>
8333           [(match_operand:<VPRED> 1 "register_operand" "Upl")
8334            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8335            (unspec:SVE_FULL_F
8336              [(match_operand 5)
8337               (const_int SVE_RELAXED_GP)
8338               (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8339              UNSPEC_COND_FABS)
8340            (unspec:SVE_FULL_F
8341              [(match_operand 6)
8342               (const_int SVE_RELAXED_GP)
8343               (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8344              UNSPEC_COND_FABS)]
8345           SVE_COND_FP_ABS_CMP))]
8346   "TARGET_SVE"
8347   "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
8348   "&& (!rtx_equal_p (operands[1], operands[5])
8349        || !rtx_equal_p (operands[1], operands[6]))"
8350   {
8351     operands[5] = copy_rtx (operands[1]);
8352     operands[6] = copy_rtx (operands[1]);
8353   }
8354 )
8355
8356 (define_insn "*aarch64_pred_fac<cmp_op><mode>_strict"
8357   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8358         (unspec:<VPRED>
8359           [(match_operand:<VPRED> 1 "register_operand" "Upl")
8360            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8361            (unspec:SVE_FULL_F
8362              [(match_dup 1)
8363               (match_operand:SI 5 "aarch64_sve_gp_strictness")
8364               (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8365              UNSPEC_COND_FABS)
8366            (unspec:SVE_FULL_F
8367              [(match_dup 1)
8368               (match_operand:SI 6 "aarch64_sve_gp_strictness")
8369               (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8370              UNSPEC_COND_FABS)]
8371           SVE_COND_FP_ABS_CMP))]
8372   "TARGET_SVE"
8373   "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
8374 )
8375
8376 ;; -------------------------------------------------------------------------
8377 ;; ---- [PRED] Select
8378 ;; -------------------------------------------------------------------------
8379 ;; Includes:
8380 ;; - SEL
8381 ;; -------------------------------------------------------------------------
8382
8383 (define_insn "@vcond_mask_<mode><mode>"
8384   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8385         (ior:PRED_ALL
8386           (and:PRED_ALL
8387             (match_operand:PRED_ALL 3 "register_operand" "Upa")
8388             (match_operand:PRED_ALL 1 "register_operand" "Upa"))
8389           (and:PRED_ALL
8390             (not (match_dup 3))
8391             (match_operand:PRED_ALL 2 "register_operand" "Upa"))))]
8392   "TARGET_SVE"
8393   "sel\t%0.b, %3, %1.b, %2.b"
8394 )
8395
8396 ;; -------------------------------------------------------------------------
8397 ;; ---- [PRED] Test bits
8398 ;; -------------------------------------------------------------------------
8399 ;; Includes:
8400 ;; - PTEST
8401 ;; -------------------------------------------------------------------------
8402
8403 ;; Branch based on predicate equality or inequality.
8404 (define_expand "cbranch<mode>4"
8405   [(set (pc)
8406         (if_then_else
8407           (match_operator 0 "aarch64_equality_operator"
8408             [(match_operand:PRED_ALL 1 "register_operand")
8409              (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
8410           (label_ref (match_operand 3 ""))
8411           (pc)))]
8412   ""
8413   {
8414     rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
8415     rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
8416     rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
8417     rtx pred;
8418     if (operands[2] == CONST0_RTX (<MODE>mode))
8419       pred = operands[1];
8420     else
8421       {
8422         pred = gen_reg_rtx (<MODE>mode);
8423         emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
8424                                                  operands[2]));
8425       }
8426     emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
8427     operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
8428     operands[2] = const0_rtx;
8429   }
8430 )
8431
8432 ;; See "Description of UNSPEC_PTEST" above for details.
8433 (define_insn "aarch64_ptest<mode>"
8434   [(set (reg:CC_NZC CC_REGNUM)
8435         (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
8436                         (match_operand 1)
8437                         (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8438                         (match_operand:PRED_ALL 3 "register_operand" "Upa")]
8439                        UNSPEC_PTEST))]
8440   "TARGET_SVE"
8441   "ptest\t%0, %3.b"
8442 )
8443
8444 ;; =========================================================================
8445 ;; == Reductions
8446 ;; =========================================================================
8447
8448 ;; -------------------------------------------------------------------------
8449 ;; ---- [INT,FP] Conditional reductions
8450 ;; -------------------------------------------------------------------------
8451 ;; Includes:
8452 ;; - CLASTA
8453 ;; - CLASTB
8454 ;; -------------------------------------------------------------------------
8455
8456 ;; Set operand 0 to the last active element in operand 3, or to tied
8457 ;; operand 1 if no elements are active.
8458 (define_insn "@fold_extract_<last_op>_<mode>"
8459   [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
8460         (unspec:<VEL>
8461           [(match_operand:<VEL> 1 "register_operand" "0, 0")
8462            (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
8463            (match_operand:SVE_FULL 3 "register_operand" "w, w")]
8464           CLAST))]
8465   "TARGET_SVE"
8466   "@
8467    clast<ab>\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
8468    clast<ab>\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
8469 )
8470
8471 (define_insn "@aarch64_fold_extract_vector_<last_op>_<mode>"
8472   [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
8473         (unspec:SVE_FULL
8474           [(match_operand:SVE_FULL 1 "register_operand" "0, w")
8475            (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
8476            (match_operand:SVE_FULL 3 "register_operand" "w, w")]
8477           CLAST))]
8478   "TARGET_SVE"
8479   "@
8480    clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
8481    movprfx\t%0, %1\;clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>"
8482 )
8483
8484 ;; -------------------------------------------------------------------------
8485 ;; ---- [INT] Tree reductions
8486 ;; -------------------------------------------------------------------------
8487 ;; Includes:
8488 ;; - ANDV
8489 ;; - EORV
8490 ;; - ORV
8491 ;; - SADDV
8492 ;; - SMAXV
8493 ;; - SMINV
8494 ;; - UADDV
8495 ;; - UMAXV
8496 ;; - UMINV
8497 ;; -------------------------------------------------------------------------
8498
8499 ;; Unpredicated integer add reduction.
8500 (define_expand "reduc_plus_scal_<mode>"
8501   [(match_operand:<VEL> 0 "register_operand")
8502    (match_operand:SVE_FULL_I 1 "register_operand")]
8503   "TARGET_SVE"
8504   {
8505     rtx pred = aarch64_ptrue_reg (<VPRED>mode);
8506     rtx tmp = <VEL>mode == DImode ? operands[0] : gen_reg_rtx (DImode);
8507     emit_insn (gen_aarch64_pred_reduc_uadd_<mode> (tmp, pred, operands[1]));
8508     if (tmp != operands[0])
8509       emit_move_insn (operands[0], gen_lowpart (<VEL>mode, tmp));
8510     DONE;
8511   }
8512 )
8513
8514 ;; Predicated integer add reduction.  The result is always 64-bits.
8515 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8516   [(set (match_operand:DI 0 "register_operand" "=w")
8517         (unspec:DI [(match_operand:<VPRED> 1 "register_operand" "Upl")
8518                     (match_operand:SVE_FULL_I 2 "register_operand" "w")]
8519                    SVE_INT_ADDV))]
8520   "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
8521   "<su>addv\t%d0, %1, %2.<Vetype>"
8522 )
8523
8524 ;; Unpredicated integer reductions.
8525 (define_expand "reduc_<optab>_scal_<mode>"
8526   [(set (match_operand:<VEL> 0 "register_operand")
8527         (unspec:<VEL> [(match_dup 2)
8528                        (match_operand:SVE_FULL_I 1 "register_operand")]
8529                       SVE_INT_REDUCTION))]
8530   "TARGET_SVE"
8531   {
8532     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8533   }
8534 )
8535
8536 ;; Predicated integer reductions.
8537 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8538   [(set (match_operand:<VEL> 0 "register_operand" "=w")
8539         (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
8540                        (match_operand:SVE_FULL_I 2 "register_operand" "w")]
8541                       SVE_INT_REDUCTION))]
8542   "TARGET_SVE"
8543   "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>"
8544 )
8545
8546 ;; -------------------------------------------------------------------------
8547 ;; ---- [FP] Tree reductions
8548 ;; -------------------------------------------------------------------------
8549 ;; Includes:
8550 ;; - FADDV
8551 ;; - FMAXNMV
8552 ;; - FMAXV
8553 ;; - FMINNMV
8554 ;; - FMINV
8555 ;; -------------------------------------------------------------------------
8556
8557 ;; Unpredicated floating-point tree reductions.
8558 (define_expand "reduc_<optab>_scal_<mode>"
8559   [(set (match_operand:<VEL> 0 "register_operand")
8560         (unspec:<VEL> [(match_dup 2)
8561                        (match_operand:SVE_FULL_F 1 "register_operand")]
8562                       SVE_FP_REDUCTION))]
8563   "TARGET_SVE"
8564   {
8565     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8566   }
8567 )
8568
8569 (define_expand "reduc_<fmaxmin>_scal_<mode>"
8570   [(match_operand:<VEL> 0 "register_operand")
8571    (unspec:<VEL> [(match_operand:SVE_FULL_F 1 "register_operand")]
8572                  FMAXMINNMV)]
8573   "TARGET_SVE"
8574   {
8575     emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
8576     DONE;
8577   }
8578 )
8579
8580 ;; Predicated floating-point tree reductions.
8581 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8582   [(set (match_operand:<VEL> 0 "register_operand" "=w")
8583         (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
8584                        (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8585                       SVE_FP_REDUCTION))]
8586   "TARGET_SVE"
8587   "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>"
8588 )
8589
8590 ;; -------------------------------------------------------------------------
8591 ;; ---- [FP] Left-to-right reductions
8592 ;; -------------------------------------------------------------------------
8593 ;; Includes:
8594 ;; - FADDA
8595 ;; -------------------------------------------------------------------------
8596
8597 ;; Unpredicated in-order FP reductions.
8598 (define_expand "fold_left_plus_<mode>"
8599   [(set (match_operand:<VEL> 0 "register_operand")
8600         (unspec:<VEL> [(match_dup 3)
8601                        (match_operand:<VEL> 1 "register_operand")
8602                        (match_operand:SVE_FULL_F 2 "register_operand")]
8603                       UNSPEC_FADDA))]
8604   "TARGET_SVE"
8605   {
8606     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
8607   }
8608 )
8609
8610 ;; Predicated in-order FP reductions.
8611 (define_insn "mask_fold_left_plus_<mode>"
8612   [(set (match_operand:<VEL> 0 "register_operand" "=w")
8613         (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
8614                        (match_operand:<VEL> 1 "register_operand" "0")
8615                        (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8616                       UNSPEC_FADDA))]
8617   "TARGET_SVE"
8618   "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
8619 )
8620
8621 ;; =========================================================================
8622 ;; == Permutes
8623 ;; =========================================================================
8624
8625 ;; -------------------------------------------------------------------------
8626 ;; ---- [INT,FP] General permutes
8627 ;; -------------------------------------------------------------------------
8628 ;; Includes:
8629 ;; - TBL
8630 ;; -------------------------------------------------------------------------
8631
8632 (define_expand "vec_perm<mode>"
8633   [(match_operand:SVE_FULL 0 "register_operand")
8634    (match_operand:SVE_FULL 1 "register_operand")
8635    (match_operand:SVE_FULL 2 "register_operand")
8636    (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
8637   "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
8638   {
8639     aarch64_expand_sve_vec_perm (operands[0], operands[1],
8640                                  operands[2], operands[3]);
8641     DONE;
8642   }
8643 )
8644
8645 (define_insn "@aarch64_sve_tbl<mode>"
8646   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
8647         (unspec:SVE_FULL
8648           [(match_operand:SVE_FULL 1 "register_operand" "w")
8649            (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
8650           UNSPEC_TBL))]
8651   "TARGET_SVE"
8652   "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
8653 )
8654
8655 ;; -------------------------------------------------------------------------
8656 ;; ---- [INT,FP] Special-purpose unary permutes
8657 ;; -------------------------------------------------------------------------
8658 ;; Includes:
8659 ;; - COMPACT
8660 ;; - DUP
8661 ;; - REV
8662 ;; -------------------------------------------------------------------------
8663
8664 ;; Compact active elements and pad with zeros.
8665 (define_insn "@aarch64_sve_compact<mode>"
8666   [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w")
8667         (unspec:SVE_FULL_SD
8668           [(match_operand:<VPRED> 1 "register_operand" "Upl")
8669            (match_operand:SVE_FULL_SD 2 "register_operand" "w")]
8670           UNSPEC_SVE_COMPACT))]
8671   "TARGET_SVE"
8672   "compact\t%0.<Vetype>, %1, %2.<Vetype>"
8673 )
8674
8675 ;; Duplicate one element of a vector.
8676 (define_insn "@aarch64_sve_dup_lane<mode>"
8677   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
8678         (vec_duplicate:SVE_ALL
8679           (vec_select:<VEL>
8680             (match_operand:SVE_ALL 1 "register_operand" "w")
8681             (parallel [(match_operand:SI 2 "const_int_operand")]))))]
8682   "TARGET_SVE
8683    && IN_RANGE (INTVAL (operands[2]) * <container_bits> / 8, 0, 63)"
8684   "dup\t%0.<Vctype>, %1.<Vctype>[%2]"
8685 )
8686
8687 ;; Use DUP.Q to duplicate a 128-bit segment of a register.
8688 ;;
8689 ;; The vec_select:<V128> sets memory lane number N of the V128 to lane
8690 ;; number op2 + N of op1.  (We don't need to distinguish between memory
8691 ;; and architectural register lane numbering for op1 or op0, since the
8692 ;; two numbering schemes are the same for SVE.)
8693 ;;
8694 ;; The vec_duplicate:SVE_FULL then copies memory lane number N of the
8695 ;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP
8696 ;; of op0.  We therefore get the correct result for both endiannesses.
8697 ;;
8698 ;; The wrinkle is that for big-endian V128 registers, memory lane numbering
8699 ;; is in the opposite order to architectural register lane numbering.
8700 ;; Thus if we were to do this operation via a V128 temporary register,
8701 ;; the vec_select and vec_duplicate would both involve a reverse operation
8702 ;; for big-endian targets.  In this fused pattern the two reverses cancel
8703 ;; each other out.
8704 (define_insn "@aarch64_sve_dupq_lane<mode>"
8705   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
8706         (vec_duplicate:SVE_FULL
8707           (vec_select:<V128>
8708             (match_operand:SVE_FULL 1 "register_operand" "w")
8709             (match_operand 2 "ascending_int_parallel"))))]
8710   "TARGET_SVE
8711    && (INTVAL (XVECEXP (operands[2], 0, 0))
8712        * GET_MODE_SIZE (<VEL>mode)) % 16 == 0
8713    && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0))
8714                 * GET_MODE_SIZE (<VEL>mode), 0, 63)"
8715   {
8716     unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0))
8717                          * GET_MODE_SIZE (<VEL>mode));
8718     operands[2] = gen_int_mode (byte / 16, DImode);
8719     return "dup\t%0.q, %1.q[%2]";
8720   }
8721 )
8722
8723 ;; Reverse the order of elements within a full vector.
8724 (define_insn "@aarch64_sve_rev<mode>"
8725   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
8726         (unspec:SVE_ALL
8727           [(match_operand:SVE_ALL 1 "register_operand" "w")]
8728           UNSPEC_REV))]
8729   "TARGET_SVE"
8730   "rev\t%0.<Vctype>, %1.<Vctype>")
8731
8732 ;; -------------------------------------------------------------------------
8733 ;; ---- [INT,FP] Special-purpose binary permutes
8734 ;; -------------------------------------------------------------------------
8735 ;; Includes:
8736 ;; - EXT
8737 ;; - SPLICE
8738 ;; - TRN1
8739 ;; - TRN2
8740 ;; - UZP1
8741 ;; - UZP2
8742 ;; - ZIP1
8743 ;; - ZIP2
8744 ;; -------------------------------------------------------------------------
8745
8746 ;; Like EXT, but start at the first active element.
8747 (define_insn "@aarch64_sve_splice<mode>"
8748   [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
8749         (unspec:SVE_FULL
8750           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
8751            (match_operand:SVE_FULL 2 "register_operand" "0, w")
8752            (match_operand:SVE_FULL 3 "register_operand" "w, w")]
8753           UNSPEC_SVE_SPLICE))]
8754   "TARGET_SVE"
8755   "@
8756    splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>
8757    movprfx\t%0, %2\;splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>"
8758   [(set_attr "movprfx" "*, yes")]
8759 )
8760
8761 ;; Permutes that take half the elements from one vector and half the
8762 ;; elements from the other.
8763 (define_insn "@aarch64_sve_<perm_insn><mode>"
8764   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
8765         (unspec:SVE_ALL
8766           [(match_operand:SVE_ALL 1 "register_operand" "w")
8767            (match_operand:SVE_ALL 2 "register_operand" "w")]
8768           PERMUTE))]
8769   "TARGET_SVE"
8770   "<perm_insn>\t%0.<Vctype>, %1.<Vctype>, %2.<Vctype>"
8771 )
8772
8773 ;; Apply PERMUTE to 128-bit sequences.  The behavior of these patterns
8774 ;; doesn't depend on the mode.
8775 (define_insn "@aarch64_sve_<optab><mode>"
8776   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
8777         (unspec:SVE_FULL
8778           [(match_operand:SVE_FULL 1 "register_operand" "w")
8779            (match_operand:SVE_FULL 2 "register_operand" "w")]
8780           PERMUTEQ))]
8781   "TARGET_SVE_F64MM"
8782   "<perm_insn>\t%0.q, %1.q, %2.q"
8783 )
8784
8785 ;; Concatenate two vectors and extract a subvector.  Note that the
8786 ;; immediate (third) operand is the lane index not the byte index.
8787 (define_insn "@aarch64_sve_ext<mode>"
8788   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w")
8789         (unspec:SVE_ALL
8790           [(match_operand:SVE_ALL 1 "register_operand" "0, w")
8791            (match_operand:SVE_ALL 2 "register_operand" "w, w")
8792            (match_operand:SI 3 "const_int_operand")]
8793           UNSPEC_EXT))]
8794   "TARGET_SVE
8795    && IN_RANGE (INTVAL (operands[3]) * <container_bits> / 8, 0, 255)"
8796   {
8797     operands[3] = GEN_INT (INTVAL (operands[3]) * <container_bits> / 8);
8798     return (which_alternative == 0
8799             ? "ext\\t%0.b, %0.b, %2.b, #%3"
8800             : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3");
8801   }
8802   [(set_attr "movprfx" "*,yes")]
8803 )
8804
8805 ;; -------------------------------------------------------------------------
8806 ;; ---- [PRED] Special-purpose unary permutes
8807 ;; -------------------------------------------------------------------------
8808 ;; Includes:
8809 ;; - REV
8810 ;; -------------------------------------------------------------------------
8811
8812 (define_insn "@aarch64_sve_rev<mode>"
8813   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8814         (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")]
8815                          UNSPEC_REV))]
8816   "TARGET_SVE"
8817   "rev\t%0.<Vetype>, %1.<Vetype>")
8818
8819 ;; -------------------------------------------------------------------------
8820 ;; ---- [PRED] Special-purpose binary permutes
8821 ;; -------------------------------------------------------------------------
8822 ;; Includes:
8823 ;; - TRN1
8824 ;; - TRN2
8825 ;; - UZP1
8826 ;; - UZP2
8827 ;; - ZIP1
8828 ;; - ZIP2
8829 ;; -------------------------------------------------------------------------
8830
8831 ;; Permutes that take half the elements from one vector and half the
8832 ;; elements from the other.
8833 (define_insn "@aarch64_sve_<perm_insn><mode>"
8834   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8835         (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
8836                           (match_operand:PRED_ALL 2 "register_operand" "Upa")]
8837                          PERMUTE))]
8838   "TARGET_SVE"
8839   "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
8840 )
8841
8842 ;; Special purpose permute used by the predicate generation instructions.
8843 ;; Unlike the normal permute patterns, these instructions operate on VNx16BI
8844 ;; regardless of the element size, so that all input and output bits are
8845 ;; well-defined.  Operand 3 then indicates the size of the permute.
8846 (define_insn "@aarch64_sve_trn1_conv<mode>"
8847   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
8848         (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa")
8849                          (match_operand:VNx16BI 2 "register_operand" "Upa")
8850                          (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")]
8851                         UNSPEC_TRN1_CONV))]
8852   "TARGET_SVE"
8853   "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
8854 )
8855
8856 ;; =========================================================================
8857 ;; == Conversions
8858 ;; =========================================================================
8859
8860 ;; -------------------------------------------------------------------------
8861 ;; ---- [INT<-INT] Packs
8862 ;; -------------------------------------------------------------------------
8863 ;; Includes:
8864 ;; - UZP1
8865 ;; -------------------------------------------------------------------------
8866
8867 ;; Integer pack.  Use UZP1 on the narrower type, which discards
8868 ;; the high part of each wide element.
8869 (define_insn "vec_pack_trunc_<Vwide>"
8870   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
8871         (unspec:SVE_FULL_BHSI
8872           [(match_operand:<VWIDE> 1 "register_operand" "w")
8873            (match_operand:<VWIDE> 2 "register_operand" "w")]
8874           UNSPEC_PACK))]
8875   "TARGET_SVE"
8876   "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
8877 )
8878
8879 ;; -------------------------------------------------------------------------
8880 ;; ---- [INT<-INT] Unpacks
8881 ;; -------------------------------------------------------------------------
8882 ;; Includes:
8883 ;; - SUNPKHI
8884 ;; - SUNPKLO
8885 ;; - UUNPKHI
8886 ;; - UUNPKLO
8887 ;; -------------------------------------------------------------------------
8888
8889 ;; Unpack the low or high half of a vector, where "high" refers to
8890 ;; the low-numbered lanes for big-endian and the high-numbered lanes
8891 ;; for little-endian.
8892 (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_FULL_BHSI:mode>"
8893   [(match_operand:<VWIDE> 0 "register_operand")
8894    (unspec:<VWIDE>
8895      [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)]
8896   "TARGET_SVE"
8897   {
8898     emit_insn ((<hi_lanes_optab>
8899                 ? gen_aarch64_sve_<su>unpkhi_<SVE_FULL_BHSI:mode>
8900                 : gen_aarch64_sve_<su>unpklo_<SVE_FULL_BHSI:mode>)
8901                (operands[0], operands[1]));
8902     DONE;
8903   }
8904 )
8905
8906 (define_insn "@aarch64_sve_<su>unpk<perm_hilo>_<SVE_FULL_BHSI:mode>"
8907   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
8908         (unspec:<VWIDE>
8909           [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")]
8910           UNPACK))]
8911   "TARGET_SVE"
8912   "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
8913 )
8914
8915 ;; -------------------------------------------------------------------------
8916 ;; ---- [INT<-FP] Conversions
8917 ;; -------------------------------------------------------------------------
8918 ;; Includes:
8919 ;; - FCVTZS
8920 ;; - FCVTZU
8921 ;; -------------------------------------------------------------------------
8922
8923 ;; Unpredicated conversion of floats to integers of the same size (HF to HI,
8924 ;; SF to SI or DF to DI).
8925 (define_expand "<optab><mode><v_int_equiv>2"
8926   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
8927         (unspec:<V_INT_EQUIV>
8928           [(match_dup 2)
8929            (const_int SVE_RELAXED_GP)
8930            (match_operand:SVE_FULL_F 1 "register_operand")]
8931           SVE_COND_FCVTI))]
8932   "TARGET_SVE"
8933   {
8934     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8935   }
8936 )
8937
8938 ;; Predicated float-to-integer conversion, either to the same width or wider.
8939 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
8940   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
8941         (unspec:SVE_FULL_HSDI
8942           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
8943            (match_operand:SI 3 "aarch64_sve_gp_strictness")
8944            (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
8945           SVE_COND_FCVTI))]
8946   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
8947   "@
8948    fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
8949    movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
8950   [(set_attr "movprfx" "*,yes")]
8951 )
8952
8953 ;; Predicated narrowing float-to-integer conversion.
8954 (define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
8955   [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
8956         (unspec:VNx4SI_ONLY
8957           [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl")
8958            (match_operand:SI 3 "aarch64_sve_gp_strictness")
8959            (match_operand:VNx2DF_ONLY 2 "register_operand" "0, w")]
8960           SVE_COND_FCVTI))]
8961   "TARGET_SVE"
8962   "@
8963    fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
8964    movprfx\t%0, %2\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
8965   [(set_attr "movprfx" "*,yes")]
8966 )
8967
8968 ;; Predicated float-to-integer conversion with merging, either to the same
8969 ;; width or wider.
8970 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
8971   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
8972         (unspec:SVE_FULL_HSDI
8973           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
8974            (unspec:SVE_FULL_HSDI
8975              [(match_dup 1)
8976               (const_int SVE_STRICT_GP)
8977               (match_operand:SVE_FULL_F 2 "register_operand")]
8978              SVE_COND_FCVTI)
8979            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
8980           UNSPEC_SEL))]
8981   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
8982 )
8983
8984 ;; The first alternative doesn't need the earlyclobber, but the only case
8985 ;; it would help is the uninteresting one in which operands 2 and 3 are
8986 ;; the same register (despite having different modes).  Making all the
8987 ;; alternatives earlyclobber makes things more consistent for the
8988 ;; register allocator.
8989 (define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_relaxed"
8990   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
8991         (unspec:SVE_FULL_HSDI
8992           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8993            (unspec:SVE_FULL_HSDI
8994              [(match_operand 4)
8995               (const_int SVE_RELAXED_GP)
8996               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
8997              SVE_COND_FCVTI)
8998            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8999           UNSPEC_SEL))]
9000   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9001   "@
9002    fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9003    movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9004    movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
9005   "&& !rtx_equal_p (operands[1], operands[4])"
9006   {
9007     operands[4] = copy_rtx (operands[1]);
9008   }
9009   [(set_attr "movprfx" "*,yes,yes")]
9010 )
9011
9012 (define_insn "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_strict"
9013   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
9014         (unspec:SVE_FULL_HSDI
9015           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
9016            (unspec:SVE_FULL_HSDI
9017              [(match_dup 1)
9018               (const_int SVE_STRICT_GP)
9019               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
9020              SVE_COND_FCVTI)
9021            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9022           UNSPEC_SEL))]
9023   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9024   "@
9025    fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9026    movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9027    movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
9028   [(set_attr "movprfx" "*,yes,yes")]
9029 )
9030
9031 ;; Predicated narrowing float-to-integer conversion with merging.
9032 (define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
9033   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
9034         (unspec:VNx4SI_ONLY
9035           [(match_operand:VNx2BI 1 "register_operand")
9036            (unspec:VNx4SI_ONLY
9037              [(match_dup 1)
9038               (const_int SVE_STRICT_GP)
9039               (match_operand:VNx2DF_ONLY 2 "register_operand")]
9040              SVE_COND_FCVTI)
9041            (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
9042           UNSPEC_SEL))]
9043   "TARGET_SVE"
9044 )
9045
9046 (define_insn "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
9047   [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=&w, &w, ?&w")
9048         (unspec:VNx4SI_ONLY
9049           [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
9050            (unspec:VNx4SI_ONLY
9051              [(match_dup 1)
9052               (match_operand:SI 4 "aarch64_sve_gp_strictness")
9053               (match_operand:VNx2DF_ONLY 2 "register_operand" "w, w, w")]
9054              SVE_COND_FCVTI)
9055            (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9056           UNSPEC_SEL))]
9057   "TARGET_SVE"
9058   "@
9059    fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9060    movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9061    movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
9062   [(set_attr "movprfx" "*,yes,yes")]
9063 )
9064
9065 ;; -------------------------------------------------------------------------
9066 ;; ---- [INT<-FP] Packs
9067 ;; -------------------------------------------------------------------------
9068 ;; The patterns in this section are synthetic.
9069 ;; -------------------------------------------------------------------------
9070
9071 ;; Convert two vectors of DF to SI and pack the results into a single vector.
9072 (define_expand "vec_pack_<su>fix_trunc_vnx2df"
9073   [(set (match_dup 4)
9074         (unspec:VNx4SI
9075           [(match_dup 3)
9076            (const_int SVE_RELAXED_GP)
9077            (match_operand:VNx2DF 1 "register_operand")]
9078           SVE_COND_FCVTI))
9079    (set (match_dup 5)
9080         (unspec:VNx4SI
9081           [(match_dup 3)
9082            (const_int SVE_RELAXED_GP)
9083            (match_operand:VNx2DF 2 "register_operand")]
9084           SVE_COND_FCVTI))
9085    (set (match_operand:VNx4SI 0 "register_operand")
9086         (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
9087   "TARGET_SVE"
9088   {
9089     operands[3] = aarch64_ptrue_reg (VNx2BImode);
9090     operands[4] = gen_reg_rtx (VNx4SImode);
9091     operands[5] = gen_reg_rtx (VNx4SImode);
9092   }
9093 )
9094
9095 ;; -------------------------------------------------------------------------
9096 ;; ---- [INT<-FP] Unpacks
9097 ;; -------------------------------------------------------------------------
9098 ;; No patterns here yet!
9099 ;; -------------------------------------------------------------------------
9100
9101 ;; -------------------------------------------------------------------------
9102 ;; ---- [FP<-INT] Conversions
9103 ;; -------------------------------------------------------------------------
9104 ;; Includes:
9105 ;; - SCVTF
9106 ;; - UCVTF
9107 ;; -------------------------------------------------------------------------
9108
9109 ;; Unpredicated conversion of integers to floats of the same size
9110 ;; (HI to HF, SI to SF or DI to DF).
9111 (define_expand "<optab><v_int_equiv><mode>2"
9112   [(set (match_operand:SVE_FULL_F 0 "register_operand")
9113         (unspec:SVE_FULL_F
9114           [(match_dup 2)
9115            (const_int SVE_RELAXED_GP)
9116            (match_operand:<V_INT_EQUIV> 1 "register_operand")]
9117           SVE_COND_ICVTF))]
9118   "TARGET_SVE"
9119   {
9120     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
9121   }
9122 )
9123
9124 ;; Predicated integer-to-float conversion, either to the same width or
9125 ;; narrower.
9126 (define_insn "@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
9127   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
9128         (unspec:SVE_FULL_F
9129           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
9130            (match_operand:SI 3 "aarch64_sve_gp_strictness")
9131            (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")]
9132           SVE_COND_ICVTF))]
9133   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9134   "@
9135    <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9136    movprfx\t%0, %2\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
9137   [(set_attr "movprfx" "*,yes")]
9138 )
9139
9140 ;; Predicated widening integer-to-float conversion.
9141 (define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9142   [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w")
9143         (unspec:VNx2DF_ONLY
9144           [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl")
9145            (match_operand:SI 3 "aarch64_sve_gp_strictness")
9146            (match_operand:VNx4SI_ONLY 2 "register_operand" "0, w")]
9147           SVE_COND_ICVTF))]
9148   "TARGET_SVE"
9149   "@
9150    <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9151    movprfx\t%0, %2\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
9152   [(set_attr "movprfx" "*,yes")]
9153 )
9154
9155 ;; Predicated integer-to-float conversion with merging, either to the same
9156 ;; width or narrower.
9157 (define_expand "@cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
9158   [(set (match_operand:SVE_FULL_F 0 "register_operand")
9159         (unspec:SVE_FULL_F
9160           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9161            (unspec:SVE_FULL_F
9162              [(match_dup 1)
9163               (const_int SVE_STRICT_GP)
9164               (match_operand:SVE_FULL_HSDI 2 "register_operand")]
9165              SVE_COND_ICVTF)
9166            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
9167           UNSPEC_SEL))]
9168   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9169 )
9170
9171 ;; The first alternative doesn't need the earlyclobber, but the only case
9172 ;; it would help is the uninteresting one in which operands 2 and 3 are
9173 ;; the same register (despite having different modes).  Making all the
9174 ;; alternatives earlyclobber makes things more consistent for the
9175 ;; register allocator.
9176 (define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_relaxed"
9177   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
9178         (unspec:SVE_FULL_F
9179           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
9180            (unspec:SVE_FULL_F
9181              [(match_operand 4)
9182               (const_int SVE_RELAXED_GP)
9183               (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
9184              SVE_COND_ICVTF)
9185            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9186           UNSPEC_SEL))]
9187   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9188   "@
9189    <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9190    movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9191    movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
9192   "&& !rtx_equal_p (operands[1], operands[4])"
9193   {
9194     operands[4] = copy_rtx (operands[1]);
9195   }
9196   [(set_attr "movprfx" "*,yes,yes")]
9197 )
9198
9199 (define_insn "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_strict"
9200   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
9201         (unspec:SVE_FULL_F
9202           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
9203            (unspec:SVE_FULL_F
9204              [(match_dup 1)
9205               (const_int SVE_STRICT_GP)
9206               (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
9207              SVE_COND_ICVTF)
9208            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9209           UNSPEC_SEL))]
9210   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9211   "@
9212    <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9213    movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9214    movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
9215   [(set_attr "movprfx" "*,yes,yes")]
9216 )
9217
9218 ;; Predicated widening integer-to-float conversion with merging.
9219 (define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9220   [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
9221         (unspec:VNx2DF_ONLY
9222           [(match_operand:VNx2BI 1 "register_operand")
9223            (unspec:VNx2DF_ONLY
9224              [(match_dup 1)
9225               (const_int SVE_STRICT_GP)
9226               (match_operand:VNx4SI_ONLY 2 "register_operand")]
9227              SVE_COND_ICVTF)
9228            (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")]
9229           UNSPEC_SEL))]
9230   "TARGET_SVE"
9231 )
9232
9233 (define_insn "*cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9234   [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
9235         (unspec:VNx2DF_ONLY
9236           [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
9237            (unspec:VNx2DF_ONLY
9238              [(match_dup 1)
9239               (match_operand:SI 4 "aarch64_sve_gp_strictness")
9240               (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")]
9241              SVE_COND_ICVTF)
9242            (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9243           UNSPEC_SEL))]
9244   "TARGET_SVE"
9245   "@
9246    <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9247    movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9248    movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
9249   [(set_attr "movprfx" "*,yes,yes")]
9250 )
9251
9252 ;; -------------------------------------------------------------------------
9253 ;; ---- [FP<-INT] Packs
9254 ;; -------------------------------------------------------------------------
9255 ;; No patterns here yet!
9256 ;; -------------------------------------------------------------------------
9257
9258 ;; -------------------------------------------------------------------------
9259 ;; ---- [FP<-INT] Unpacks
9260 ;; -------------------------------------------------------------------------
9261 ;; The patterns in this section are synthetic.
9262 ;; -------------------------------------------------------------------------
9263
9264 ;; Unpack one half of a VNx4SI to VNx2DF.  First unpack from VNx4SI
9265 ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
9266 ;; unpacked VNx4SI to VNx2DF.
9267 (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
9268   [(match_operand:VNx2DF 0 "register_operand")
9269    (FLOATUORS:VNx2DF
9270      (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
9271                     UNPACK_UNSIGNED))]
9272   "TARGET_SVE"
9273   {
9274     /* Use ZIP to do the unpack, since we don't care about the upper halves
9275        and since it has the nice property of not needing any subregs.
9276        If using UUNPK* turns out to be preferable, we could model it as
9277        a ZIP whose first operand is zero.  */
9278     rtx temp = gen_reg_rtx (VNx4SImode);
9279     emit_insn ((<hi_lanes_optab>
9280                 ? gen_aarch64_sve_zip2vnx4si
9281                 : gen_aarch64_sve_zip1vnx4si)
9282                (temp, operands[1], operands[1]));
9283     rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
9284     rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
9285     emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df
9286                (operands[0], ptrue, temp, strictness));
9287     DONE;
9288   }
9289 )
9290
9291 ;; -------------------------------------------------------------------------
9292 ;; ---- [FP<-FP] Packs
9293 ;; -------------------------------------------------------------------------
9294 ;; Includes:
9295 ;; - FCVT
9296 ;; -------------------------------------------------------------------------
9297
9298 ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
9299 ;; the results into a single vector.
9300 (define_expand "vec_pack_trunc_<Vwide>"
9301   [(set (match_dup 4)
9302         (unspec:SVE_FULL_HSF
9303           [(match_dup 3)
9304            (const_int SVE_RELAXED_GP)
9305            (match_operand:<VWIDE> 1 "register_operand")]
9306           UNSPEC_COND_FCVT))
9307    (set (match_dup 5)
9308         (unspec:SVE_FULL_HSF
9309           [(match_dup 3)
9310            (const_int SVE_RELAXED_GP)
9311            (match_operand:<VWIDE> 2 "register_operand")]
9312           UNSPEC_COND_FCVT))
9313    (set (match_operand:SVE_FULL_HSF 0 "register_operand")
9314         (unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
9315   "TARGET_SVE"
9316   {
9317     operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
9318     operands[4] = gen_reg_rtx (<MODE>mode);
9319     operands[5] = gen_reg_rtx (<MODE>mode);
9320   }
9321 )
9322
9323 ;; Predicated float-to-float truncation.
9324 (define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9325   [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w")
9326         (unspec:SVE_FULL_HSF
9327           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl")
9328            (match_operand:SI 3 "aarch64_sve_gp_strictness")
9329            (match_operand:SVE_FULL_SDF 2 "register_operand" "0, w")]
9330           SVE_COND_FCVT))]
9331   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9332   "@
9333    fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9334    movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
9335   [(set_attr "movprfx" "*,yes")]
9336 )
9337
9338 ;; Predicated float-to-float truncation with merging.
9339 (define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9340   [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
9341         (unspec:SVE_FULL_HSF
9342           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9343            (unspec:SVE_FULL_HSF
9344              [(match_dup 1)
9345               (const_int SVE_STRICT_GP)
9346               (match_operand:SVE_FULL_SDF 2 "register_operand")]
9347              SVE_COND_FCVT)
9348            (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")]
9349           UNSPEC_SEL))]
9350   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9351 )
9352
9353 (define_insn "*cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9354   [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w, ?&w")
9355         (unspec:SVE_FULL_HSF
9356           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
9357            (unspec:SVE_FULL_HSF
9358              [(match_dup 1)
9359               (match_operand:SI 4 "aarch64_sve_gp_strictness")
9360               (match_operand:SVE_FULL_SDF 2 "register_operand" "w, w, w")]
9361              SVE_COND_FCVT)
9362            (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9363           UNSPEC_SEL))]
9364   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9365   "@
9366    fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9367    movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9368    movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
9369   [(set_attr "movprfx" "*,yes,yes")]
9370 )
9371
9372 ;; -------------------------------------------------------------------------
9373 ;; ---- [FP<-FP] Packs (bfloat16)
9374 ;; -------------------------------------------------------------------------
9375 ;; Includes:
9376 ;; - BFCVT (BF16)
9377 ;; - BFCVTNT (BF16)
9378 ;; -------------------------------------------------------------------------
9379
9380 ;; Predicated BFCVT.
9381 (define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9382   [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w")
9383         (unspec:VNx8BF_ONLY
9384           [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl")
9385            (match_operand:SI 3 "aarch64_sve_gp_strictness")
9386            (match_operand:VNx4SF_ONLY 2 "register_operand" "0, w")]
9387           SVE_COND_FCVT))]
9388   "TARGET_SVE_BF16"
9389   "@
9390    bfcvt\t%0.h, %1/m, %2.s
9391    movprfx\t%0, %2\;bfcvt\t%0.h, %1/m, %2.s"
9392   [(set_attr "movprfx" "*,yes")]
9393 )
9394
9395 ;; Predicated BFCVT with merging.
9396 (define_expand "@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9397   [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
9398         (unspec:VNx8BF_ONLY
9399           [(match_operand:VNx4BI 1 "register_operand")
9400            (unspec:VNx8BF_ONLY
9401              [(match_dup 1)
9402               (const_int SVE_STRICT_GP)
9403               (match_operand:VNx4SF_ONLY 2 "register_operand")]
9404              SVE_COND_FCVT)
9405            (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
9406           UNSPEC_SEL))]
9407   "TARGET_SVE_BF16"
9408 )
9409
9410 (define_insn "*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9411   [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
9412         (unspec:VNx8BF_ONLY
9413           [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl, Upl")
9414            (unspec:VNx8BF_ONLY
9415              [(match_dup 1)
9416               (match_operand:SI 4 "aarch64_sve_gp_strictness")
9417               (match_operand:VNx4SF_ONLY 2 "register_operand" "w, w, w")]
9418              SVE_COND_FCVT)
9419            (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9420           UNSPEC_SEL))]
9421   "TARGET_SVE_BF16"
9422   "@
9423    bfcvt\t%0.h, %1/m, %2.s
9424    movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s
9425    movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s"
9426   [(set_attr "movprfx" "*,yes,yes")]
9427 )
9428
9429 ;; Predicated BFCVTNT.  This doesn't give a natural aarch64_pred_*/cond_*
9430 ;; pair because the even elements always have to be supplied for active
9431 ;; elements, even if the inactive elements don't matter.
9432 ;;
9433 ;; This instructions does not take MOVPRFX.
9434 (define_insn "@aarch64_sve_cvtnt<mode>"
9435   [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
9436         (unspec:VNx8BF_ONLY
9437           [(match_operand:VNx4BI 2 "register_operand" "Upl")
9438            (const_int SVE_STRICT_GP)
9439            (match_operand:VNx8BF_ONLY 1 "register_operand" "0")
9440            (match_operand:VNx4SF 3 "register_operand" "w")]
9441           UNSPEC_COND_FCVTNT))]
9442   "TARGET_SVE_BF16"
9443   "bfcvtnt\t%0.h, %2/m, %3.s"
9444 )
9445
9446 ;; -------------------------------------------------------------------------
9447 ;; ---- [FP<-FP] Unpacks
9448 ;; -------------------------------------------------------------------------
9449 ;; Includes:
9450 ;; - FCVT
9451 ;; -------------------------------------------------------------------------
9452
9453 ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
9454 ;; First unpack the source without conversion, then float-convert the
9455 ;; unpacked source.
9456 (define_expand "vec_unpacks_<perm_hilo>_<mode>"
9457   [(match_operand:<VWIDE> 0 "register_operand")
9458    (unspec:SVE_FULL_HSF
9459      [(match_operand:SVE_FULL_HSF 1 "register_operand")]
9460      UNPACK_UNSIGNED)]
9461   "TARGET_SVE"
9462   {
9463     /* Use ZIP to do the unpack, since we don't care about the upper halves
9464        and since it has the nice property of not needing any subregs.
9465        If using UUNPK* turns out to be preferable, we could model it as
9466        a ZIP whose first operand is zero.  */
9467     rtx temp = gen_reg_rtx (<MODE>mode);
9468     emit_insn ((<hi_lanes_optab>
9469                 ? gen_aarch64_sve_zip2<mode>
9470                 : gen_aarch64_sve_zip1<mode>)
9471                 (temp, operands[1], operands[1]));
9472     rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
9473     rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
9474     emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide>
9475                (operands[0], ptrue, temp, strictness));
9476     DONE;
9477   }
9478 )
9479
9480 ;; Predicated float-to-float extension.
9481 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9482   [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w")
9483         (unspec:SVE_FULL_SDF
9484           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl")
9485            (match_operand:SI 3 "aarch64_sve_gp_strictness")
9486            (match_operand:SVE_FULL_HSF 2 "register_operand" "0, w")]
9487           SVE_COND_FCVT))]
9488   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9489   "@
9490    fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9491    movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
9492   [(set_attr "movprfx" "*,yes")]
9493 )
9494
9495 ;; Predicated float-to-float extension with merging.
9496 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9497   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
9498         (unspec:SVE_FULL_SDF
9499           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9500            (unspec:SVE_FULL_SDF
9501              [(match_dup 1)
9502               (const_int SVE_STRICT_GP)
9503               (match_operand:SVE_FULL_HSF 2 "register_operand")]
9504              SVE_COND_FCVT)
9505            (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
9506           UNSPEC_SEL))]
9507   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9508 )
9509
9510 (define_insn "*cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9511   [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w, ?&w")
9512         (unspec:SVE_FULL_SDF
9513           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
9514            (unspec:SVE_FULL_SDF
9515              [(match_dup 1)
9516               (match_operand:SI 4 "aarch64_sve_gp_strictness")
9517               (match_operand:SVE_FULL_HSF 2 "register_operand" "w, w, w")]
9518              SVE_COND_FCVT)
9519            (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9520           UNSPEC_SEL))]
9521   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9522   "@
9523    fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9524    movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9525    movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
9526   [(set_attr "movprfx" "*,yes,yes")]
9527 )
9528
9529 ;; -------------------------------------------------------------------------
9530 ;; ---- [PRED<-PRED] Packs
9531 ;; -------------------------------------------------------------------------
9532 ;; Includes:
9533 ;; - UZP1
9534 ;; -------------------------------------------------------------------------
9535
9536 ;; Predicate pack.  Use UZP1 on the narrower type, which discards
9537 ;; the high part of each wide element.
9538 (define_insn "vec_pack_trunc_<Vwide>"
9539   [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
9540         (unspec:PRED_BHS
9541           [(match_operand:<VWIDE> 1 "register_operand" "Upa")
9542            (match_operand:<VWIDE> 2 "register_operand" "Upa")]
9543           UNSPEC_PACK))]
9544   "TARGET_SVE"
9545   "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
9546 )
9547
9548 ;; -------------------------------------------------------------------------
9549 ;; ---- [PRED<-PRED] Unpacks
9550 ;; -------------------------------------------------------------------------
9551 ;; Includes:
9552 ;; - PUNPKHI
9553 ;; - PUNPKLO
9554 ;; -------------------------------------------------------------------------
9555
9556 ;; Unpack the low or high half of a predicate, where "high" refers to
9557 ;; the low-numbered lanes for big-endian and the high-numbered lanes
9558 ;; for little-endian.
9559 (define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
9560   [(match_operand:<VWIDE> 0 "register_operand")
9561    (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
9562                    UNPACK)]
9563   "TARGET_SVE"
9564   {
9565     emit_insn ((<hi_lanes_optab>
9566                 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
9567                 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
9568                (operands[0], operands[1]));
9569     DONE;
9570   }
9571 )
9572
9573 (define_insn "@aarch64_sve_punpk<perm_hilo>_<mode>"
9574   [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
9575         (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
9576                         UNPACK_UNSIGNED))]
9577   "TARGET_SVE"
9578   "punpk<perm_hilo>\t%0.h, %1.b"
9579 )
9580
9581 ;; =========================================================================
9582 ;; == Vector partitioning
9583 ;; =========================================================================
9584
9585 ;; -------------------------------------------------------------------------
9586 ;; ---- [PRED] Unary partitioning
9587 ;; -------------------------------------------------------------------------
9588 ;; Includes:
9589 ;; - BRKA
9590 ;; - BRKAS
9591 ;; - BRKB
9592 ;; - BRKBS
9593 ;; -------------------------------------------------------------------------
9594
9595 ;; Note that unlike most other instructions that have both merging and
9596 ;; zeroing forms, these instructions don't operate elementwise and so
9597 ;; don't fit the IFN_COND model.
9598 (define_insn "@aarch64_brk<brk_op>"
9599   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
9600         (unspec:VNx16BI
9601           [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
9602            (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
9603            (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
9604           SVE_BRK_UNARY))]
9605   "TARGET_SVE"
9606   "@
9607    brk<brk_op>\t%0.b, %1/z, %2.b
9608    brk<brk_op>\t%0.b, %1/m, %2.b"
9609 )
9610
9611 ;; Same, but also producing a flags result.
9612 (define_insn "*aarch64_brk<brk_op>_cc"
9613   [(set (reg:CC_NZC CC_REGNUM)
9614         (unspec:CC_NZC
9615           [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
9616            (match_dup 1)
9617            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
9618            (unspec:VNx16BI
9619              [(match_dup 1)
9620               (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
9621               (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
9622              SVE_BRK_UNARY)]
9623           UNSPEC_PTEST))
9624    (set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
9625         (unspec:VNx16BI
9626           [(match_dup 1)
9627            (match_dup 2)
9628            (match_dup 3)]
9629           SVE_BRK_UNARY))]
9630   "TARGET_SVE"
9631   "@
9632    brk<brk_op>s\t%0.b, %1/z, %2.b
9633    brk<brk_op>s\t%0.b, %1/m, %2.b"
9634 )
9635
9636 ;; Same, but with only the flags result being interesting.
9637 (define_insn "*aarch64_brk<brk_op>_ptest"
9638   [(set (reg:CC_NZC CC_REGNUM)
9639         (unspec:CC_NZC
9640           [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
9641            (match_dup 1)
9642            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
9643            (unspec:VNx16BI
9644              [(match_dup 1)
9645               (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
9646               (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
9647              SVE_BRK_UNARY)]
9648           UNSPEC_PTEST))
9649    (clobber (match_scratch:VNx16BI 0 "=Upa, Upa"))]
9650   "TARGET_SVE"
9651   "@
9652    brk<brk_op>s\t%0.b, %1/z, %2.b
9653    brk<brk_op>s\t%0.b, %1/m, %2.b"
9654 )
9655
9656 ;; -------------------------------------------------------------------------
9657 ;; ---- [PRED] Binary partitioning
9658 ;; -------------------------------------------------------------------------
9659 ;; Includes:
9660 ;; - BRKN
9661 ;; - BRKNS
9662 ;; - BRKPA
9663 ;; - BRKPAS
9664 ;; - BRKPB
9665 ;; - BRKPBS
9666 ;; -------------------------------------------------------------------------
9667
9668 ;; Binary BRKs (BRKN, BRKPA, BRKPB).
9669 (define_insn "@aarch64_brk<brk_op>"
9670   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
9671         (unspec:VNx16BI
9672           [(match_operand:VNx16BI 1 "register_operand" "Upa")
9673            (match_operand:VNx16BI 2 "register_operand" "Upa")
9674            (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
9675           SVE_BRK_BINARY))]
9676   "TARGET_SVE"
9677   "brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
9678 )
9679
9680 ;; Same, but also producing a flags result.
9681 (define_insn "*aarch64_brk<brk_op>_cc"
9682   [(set (reg:CC_NZC CC_REGNUM)
9683         (unspec:CC_NZC
9684           [(match_operand:VNx16BI 1 "register_operand" "Upa")
9685            (match_dup 1)
9686            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
9687            (unspec:VNx16BI
9688              [(match_dup 1)
9689               (match_operand:VNx16BI 2 "register_operand" "Upa")
9690               (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
9691              SVE_BRK_BINARY)]
9692           UNSPEC_PTEST))
9693    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
9694         (unspec:VNx16BI
9695           [(match_dup 1)
9696            (match_dup 2)
9697            (match_dup 3)]
9698           SVE_BRK_BINARY))]
9699   "TARGET_SVE"
9700   "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
9701 )
9702
9703 ;; Same, but with only the flags result being interesting.
9704 (define_insn "*aarch64_brk<brk_op>_ptest"
9705   [(set (reg:CC_NZC CC_REGNUM)
9706         (unspec:CC_NZC
9707           [(match_operand:VNx16BI 1 "register_operand" "Upa")
9708            (match_dup 1)
9709            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
9710            (unspec:VNx16BI
9711              [(match_dup 1)
9712               (match_operand:VNx16BI 2 "register_operand" "Upa")
9713               (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
9714              SVE_BRK_BINARY)]
9715           UNSPEC_PTEST))
9716    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
9717   "TARGET_SVE"
9718   "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
9719 )
9720
9721 ;; -------------------------------------------------------------------------
9722 ;; ---- [PRED] Scalarization
9723 ;; -------------------------------------------------------------------------
9724 ;; Includes:
9725 ;; - PFIRST
9726 ;; - PNEXT
9727 ;; -------------------------------------------------------------------------
9728
9729 (define_insn "@aarch64_sve_<sve_pred_op><mode>"
9730   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
9731         (unspec:PRED_ALL
9732           [(match_operand:PRED_ALL 1 "register_operand" "Upa")
9733            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
9734            (match_operand:PRED_ALL 3 "register_operand" "0")]
9735           SVE_PITER))
9736    (clobber (reg:CC_NZC CC_REGNUM))]
9737   "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
9738   "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
9739 )
9740
9741 ;; Same, but also producing a flags result.
9742 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc"
9743   [(set (reg:CC_NZC CC_REGNUM)
9744         (unspec:CC_NZC
9745           [(match_operand:VNx16BI 1 "register_operand" "Upa")
9746            (match_operand 2)
9747            (match_operand:SI 3 "aarch64_sve_ptrue_flag")
9748            (unspec:PRED_ALL
9749              [(match_operand 4)
9750               (match_operand:SI 5 "aarch64_sve_ptrue_flag")
9751               (match_operand:PRED_ALL 6 "register_operand" "0")]
9752              SVE_PITER)]
9753           UNSPEC_PTEST))
9754    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
9755         (unspec:PRED_ALL
9756           [(match_dup 4)
9757            (match_dup 5)
9758            (match_dup 6)]
9759           SVE_PITER))]
9760   "TARGET_SVE
9761    && <max_elem_bits> >= <elem_bits>
9762    && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
9763   "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
9764   "&& !rtx_equal_p (operands[2], operands[4])"
9765   {
9766     operands[4] = operands[2];
9767     operands[5] = operands[3];
9768   }
9769 )
9770
9771 ;; Same, but with only the flags result being interesting.
9772 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest"
9773   [(set (reg:CC_NZC CC_REGNUM)
9774         (unspec:CC_NZC
9775           [(match_operand:VNx16BI 1 "register_operand" "Upa")
9776            (match_operand 2)
9777            (match_operand:SI 3 "aarch64_sve_ptrue_flag")
9778            (unspec:PRED_ALL
9779              [(match_operand 4)
9780               (match_operand:SI 5 "aarch64_sve_ptrue_flag")
9781               (match_operand:PRED_ALL 6 "register_operand" "0")]
9782              SVE_PITER)]
9783           UNSPEC_PTEST))
9784    (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
9785   "TARGET_SVE
9786    && <max_elem_bits> >= <elem_bits>
9787    && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
9788   "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
9789   "&& !rtx_equal_p (operands[2], operands[4])"
9790   {
9791     operands[4] = operands[2];
9792     operands[5] = operands[3];
9793   }
9794 )
9795
9796 ;; =========================================================================
9797 ;; == Counting elements
9798 ;; =========================================================================
9799
9800 ;; -------------------------------------------------------------------------
9801 ;; ---- [INT] Count elements in a pattern (scalar)
9802 ;; -------------------------------------------------------------------------
9803 ;; Includes:
9804 ;; - CNTB
9805 ;; - CNTD
9806 ;; - CNTH
9807 ;; - CNTW
9808 ;; -------------------------------------------------------------------------
9809
9810 ;; Count the number of elements in an svpattern.  Operand 1 is the pattern,
9811 ;; operand 2 is the number of elements that fit in a 128-bit block, and
9812 ;; operand 3 is a multiplier in the range [1, 16].
9813 ;;
9814 ;; Note that this pattern isn't used for SV_ALL (but would work for that too).
9815 (define_insn "aarch64_sve_cnt_pat"
9816   [(set (match_operand:DI 0 "register_operand" "=r")
9817         (zero_extend:DI
9818           (unspec:SI [(match_operand:DI 1 "const_int_operand")
9819                       (match_operand:DI 2 "const_int_operand")
9820                       (match_operand:DI 3 "const_int_operand")]
9821                      UNSPEC_SVE_CNT_PAT)))]
9822   "TARGET_SVE"
9823   {
9824     return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1);
9825   }
9826 )
9827
9828 ;; -------------------------------------------------------------------------
9829 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
9830 ;; -------------------------------------------------------------------------
9831 ;; Includes:
9832 ;; - INC
9833 ;; - SQINC
9834 ;; - UQINC
9835 ;; -------------------------------------------------------------------------
9836
9837 ;; Increment a DImode register by the number of elements in an svpattern.
9838 ;; See aarch64_sve_cnt_pat for the counting behavior.
9839 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9840   [(set (match_operand:DI 0 "register_operand" "=r")
9841         (ANY_PLUS:DI (zero_extend:DI
9842                        (unspec:SI [(match_operand:DI 2 "const_int_operand")
9843                                    (match_operand:DI 3 "const_int_operand")
9844                                    (match_operand:DI 4 "const_int_operand")]
9845                                   UNSPEC_SVE_CNT_PAT))
9846                      (match_operand:DI_ONLY 1 "register_operand" "0")))]
9847   "TARGET_SVE"
9848   {
9849     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
9850                                                  operands + 2);
9851   }
9852 )
9853
9854 ;; Increment an SImode register by the number of elements in an svpattern
9855 ;; using modular arithmetic.  See aarch64_sve_cnt_pat for the counting
9856 ;; behavior.
9857 (define_insn "*aarch64_sve_incsi_pat"
9858   [(set (match_operand:SI 0 "register_operand" "=r")
9859         (plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand")
9860                              (match_operand:DI 3 "const_int_operand")
9861                              (match_operand:DI 4 "const_int_operand")]
9862                             UNSPEC_SVE_CNT_PAT)
9863                  (match_operand:SI 1 "register_operand" "0")))]
9864   "TARGET_SVE"
9865   {
9866     return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2);
9867   }
9868 )
9869
9870 ;; Increment an SImode register by the number of elements in an svpattern
9871 ;; using saturating arithmetic, extending the result to 64 bits.
9872 ;;
9873 ;; See aarch64_sve_cnt_pat for the counting behavior.
9874 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9875   [(set (match_operand:DI 0 "register_operand" "=r")
9876         (<paired_extend>:DI
9877           (SAT_PLUS:SI
9878             (unspec:SI [(match_operand:DI 2 "const_int_operand")
9879                         (match_operand:DI 3 "const_int_operand")
9880                         (match_operand:DI 4 "const_int_operand")]
9881                        UNSPEC_SVE_CNT_PAT)
9882             (match_operand:SI_ONLY 1 "register_operand" "0"))))]
9883   "TARGET_SVE"
9884   {
9885     const char *registers = (<CODE> == SS_PLUS ? "%x0, %w0" : "%w0");
9886     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
9887                                                  operands + 2);
9888   }
9889 )
9890
9891 ;; -------------------------------------------------------------------------
9892 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
9893 ;; -------------------------------------------------------------------------
9894 ;; Includes:
9895 ;; - INC
9896 ;; - SQINC
9897 ;; - UQINC
9898 ;; -------------------------------------------------------------------------
9899
9900 ;; Increment a vector of DIs by the number of elements in an svpattern.
9901 ;; See aarch64_sve_cnt_pat for the counting behavior.
9902 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9903   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
9904         (ANY_PLUS:VNx2DI
9905           (vec_duplicate:VNx2DI
9906             (zero_extend:DI
9907               (unspec:SI [(match_operand:DI 2 "const_int_operand")
9908                           (match_operand:DI 3 "const_int_operand")
9909                           (match_operand:DI 4 "const_int_operand")]
9910                          UNSPEC_SVE_CNT_PAT)))
9911           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
9912   "TARGET_SVE"
9913   {
9914     if (which_alternative == 1)
9915       output_asm_insn ("movprfx\t%0, %1", operands);
9916     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
9917                                                  operands + 2);
9918   }
9919   [(set_attr "movprfx" "*,yes")]
9920 )
9921
9922 ;; Increment a vector of SIs by the number of elements in an svpattern.
9923 ;; See aarch64_sve_cnt_pat for the counting behavior.
9924 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9925   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
9926         (ANY_PLUS:VNx4SI
9927           (vec_duplicate:VNx4SI
9928             (unspec:SI [(match_operand:DI 2 "const_int_operand")
9929                         (match_operand:DI 3 "const_int_operand")
9930                         (match_operand:DI 4 "const_int_operand")]
9931                        UNSPEC_SVE_CNT_PAT))
9932           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
9933   "TARGET_SVE"
9934   {
9935     if (which_alternative == 1)
9936       output_asm_insn ("movprfx\t%0, %1", operands);
9937     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
9938                                                  operands + 2);
9939   }
9940   [(set_attr "movprfx" "*,yes")]
9941 )
9942
9943 ;; Increment a vector of HIs by the number of elements in an svpattern.
9944 ;; See aarch64_sve_cnt_pat for the counting behavior.
9945 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
9946   [(set (match_operand:VNx8HI 0 "register_operand")
9947         (ANY_PLUS:VNx8HI
9948           (vec_duplicate:VNx8HI
9949             (truncate:HI
9950               (unspec:SI [(match_operand:DI 2 "const_int_operand")
9951                           (match_operand:DI 3 "const_int_operand")
9952                           (match_operand:DI 4 "const_int_operand")]
9953                          UNSPEC_SVE_CNT_PAT)))
9954           (match_operand:VNx8HI_ONLY 1 "register_operand")))]
9955   "TARGET_SVE"
9956 )
9957
9958 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
9959   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
9960         (ANY_PLUS:VNx8HI
9961           (vec_duplicate:VNx8HI
9962             (match_operator:HI 5 "subreg_lowpart_operator"
9963               [(unspec:SI [(match_operand:DI 2 "const_int_operand")
9964                            (match_operand:DI 3 "const_int_operand")
9965                            (match_operand:DI 4 "const_int_operand")]
9966                           UNSPEC_SVE_CNT_PAT)]))
9967           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
9968   "TARGET_SVE"
9969   {
9970     if (which_alternative == 1)
9971       output_asm_insn ("movprfx\t%0, %1", operands);
9972     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
9973                                                  operands + 2);
9974   }
9975   [(set_attr "movprfx" "*,yes")]
9976 )
9977
9978 ;; -------------------------------------------------------------------------
9979 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
9980 ;; -------------------------------------------------------------------------
9981 ;; Includes:
9982 ;; - DEC
9983 ;; - SQDEC
9984 ;; - UQDEC
9985 ;; -------------------------------------------------------------------------
9986
9987 ;; Decrement a DImode register by the number of elements in an svpattern.
9988 ;; See aarch64_sve_cnt_pat for the counting behavior.
9989 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9990   [(set (match_operand:DI 0 "register_operand" "=r")
9991         (ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0")
9992                       (zero_extend:DI
9993                         (unspec:SI [(match_operand:DI 2 "const_int_operand")
9994                                     (match_operand:DI 3 "const_int_operand")
9995                                     (match_operand:DI 4 "const_int_operand")]
9996                                    UNSPEC_SVE_CNT_PAT))))]
9997   "TARGET_SVE"
9998   {
9999     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
10000                                                  operands + 2);
10001   }
10002 )
10003
10004 ;; Decrement an SImode register by the number of elements in an svpattern
10005 ;; using modular arithmetic.  See aarch64_sve_cnt_pat for the counting
10006 ;; behavior.
10007 (define_insn "*aarch64_sve_decsi_pat"
10008   [(set (match_operand:SI 0 "register_operand" "=r")
10009         (minus:SI (match_operand:SI 1 "register_operand" "0")
10010                   (unspec:SI [(match_operand:DI 2 "const_int_operand")
10011                               (match_operand:DI 3 "const_int_operand")
10012                               (match_operand:DI 4 "const_int_operand")]
10013                              UNSPEC_SVE_CNT_PAT)))]
10014   "TARGET_SVE"
10015   {
10016     return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2);
10017   }
10018 )
10019
10020 ;; Decrement an SImode register by the number of elements in an svpattern
10021 ;; using saturating arithmetic, extending the result to 64 bits.
10022 ;;
10023 ;; See aarch64_sve_cnt_pat for the counting behavior.
10024 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10025   [(set (match_operand:DI 0 "register_operand" "=r")
10026         (<paired_extend>:DI
10027           (SAT_MINUS:SI
10028             (match_operand:SI_ONLY 1 "register_operand" "0")
10029             (unspec:SI [(match_operand:DI 2 "const_int_operand")
10030                         (match_operand:DI 3 "const_int_operand")
10031                         (match_operand:DI 4 "const_int_operand")]
10032                        UNSPEC_SVE_CNT_PAT))))]
10033   "TARGET_SVE"
10034   {
10035     const char *registers = (<CODE> == SS_MINUS ? "%x0, %w0" : "%w0");
10036     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
10037                                                  operands + 2);
10038   }
10039 )
10040
10041 ;; -------------------------------------------------------------------------
10042 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
10043 ;; -------------------------------------------------------------------------
10044 ;; Includes:
10045 ;; - DEC
10046 ;; - SQDEC
10047 ;; - UQDEC
10048 ;; -------------------------------------------------------------------------
10049
10050 ;; Decrement a vector of DIs by the number of elements in an svpattern.
10051 ;; See aarch64_sve_cnt_pat for the counting behavior.
10052 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10053   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
10054         (ANY_MINUS:VNx2DI
10055           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
10056           (vec_duplicate:VNx2DI
10057             (zero_extend:DI
10058               (unspec:SI [(match_operand:DI 2 "const_int_operand")
10059                           (match_operand:DI 3 "const_int_operand")
10060                           (match_operand:DI 4 "const_int_operand")]
10061                          UNSPEC_SVE_CNT_PAT)))))]
10062   "TARGET_SVE"
10063   {
10064     if (which_alternative == 1)
10065       output_asm_insn ("movprfx\t%0, %1", operands);
10066     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10067                                                  operands + 2);
10068   }
10069   [(set_attr "movprfx" "*,yes")]
10070 )
10071
10072 ;; Decrement a vector of SIs by the number of elements in an svpattern.
10073 ;; See aarch64_sve_cnt_pat for the counting behavior.
10074 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10075   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
10076         (ANY_MINUS:VNx4SI
10077           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
10078           (vec_duplicate:VNx4SI
10079             (unspec:SI [(match_operand:DI 2 "const_int_operand")
10080                         (match_operand:DI 3 "const_int_operand")
10081                         (match_operand:DI 4 "const_int_operand")]
10082                        UNSPEC_SVE_CNT_PAT))))]
10083   "TARGET_SVE"
10084   {
10085     if (which_alternative == 1)
10086       output_asm_insn ("movprfx\t%0, %1", operands);
10087     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10088                                                  operands + 2);
10089   }
10090   [(set_attr "movprfx" "*,yes")]
10091 )
10092
10093 ;; Decrement a vector of HIs by the number of elements in an svpattern.
10094 ;; See aarch64_sve_cnt_pat for the counting behavior.
10095 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
10096   [(set (match_operand:VNx8HI 0 "register_operand")
10097         (ANY_MINUS:VNx8HI
10098           (match_operand:VNx8HI_ONLY 1 "register_operand")
10099           (vec_duplicate:VNx8HI
10100             (truncate:HI
10101               (unspec:SI [(match_operand:DI 2 "const_int_operand")
10102                           (match_operand:DI 3 "const_int_operand")
10103                           (match_operand:DI 4 "const_int_operand")]
10104                          UNSPEC_SVE_CNT_PAT)))))]
10105   "TARGET_SVE"
10106 )
10107
10108 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
10109   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
10110         (ANY_MINUS:VNx8HI
10111           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
10112           (vec_duplicate:VNx8HI
10113             (match_operator:HI 5 "subreg_lowpart_operator"
10114               [(unspec:SI [(match_operand:DI 2 "const_int_operand")
10115                            (match_operand:DI 3 "const_int_operand")
10116                            (match_operand:DI 4 "const_int_operand")]
10117                           UNSPEC_SVE_CNT_PAT)]))))]
10118   "TARGET_SVE"
10119   {
10120     if (which_alternative == 1)
10121       output_asm_insn ("movprfx\t%0, %1", operands);
10122     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10123                                                  operands + 2);
10124   }
10125   [(set_attr "movprfx" "*,yes")]
10126 )
10127
10128 ;; -------------------------------------------------------------------------
10129 ;; ---- [INT] Count elements in a predicate (scalar)
10130 ;; -------------------------------------------------------------------------
10131 ;; Includes:
10132 ;; - CNTP
10133 ;; -------------------------------------------------------------------------
10134
10135 ;; Count the number of set bits in a predicate.  Operand 3 is true if
10136 ;; operand 1 is known to be all-true.
10137 (define_insn "@aarch64_pred_cntp<mode>"
10138   [(set (match_operand:DI 0 "register_operand" "=r")
10139         (zero_extend:DI
10140           (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
10141                       (match_operand:SI 2 "aarch64_sve_ptrue_flag")
10142                       (match_operand:PRED_ALL 3 "register_operand" "Upa")]
10143                      UNSPEC_CNTP)))]
10144   "TARGET_SVE"
10145   "cntp\t%x0, %1, %3.<Vetype>")
10146
10147 ;; -------------------------------------------------------------------------
10148 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
10149 ;; -------------------------------------------------------------------------
10150 ;; Includes:
10151 ;; - INCP
10152 ;; - SQINCP
10153 ;; - UQINCP
10154 ;; -------------------------------------------------------------------------
10155
10156 ;; Increment a DImode register by the number of set bits in a predicate.
10157 ;; See aarch64_sve_cntp for a description of the operands.
10158 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10159   [(set (match_operand:DI 0 "register_operand")
10160         (ANY_PLUS:DI
10161           (zero_extend:DI
10162             (unspec:SI [(match_dup 3)
10163                         (const_int SVE_KNOWN_PTRUE)
10164                         (match_operand:PRED_ALL 2 "register_operand")]
10165                        UNSPEC_CNTP))
10166           (match_operand:DI_ONLY 1 "register_operand")))]
10167   "TARGET_SVE"
10168   {
10169     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10170   }
10171 )
10172
10173 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10174   [(set (match_operand:DI 0 "register_operand" "=r")
10175         (ANY_PLUS:DI
10176           (zero_extend:DI
10177             (unspec:SI [(match_operand 3)
10178                         (const_int SVE_KNOWN_PTRUE)
10179                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10180                        UNSPEC_CNTP))
10181           (match_operand:DI_ONLY 1 "register_operand" "0")))]
10182   "TARGET_SVE"
10183   "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
10184   "&& !CONSTANT_P (operands[3])"
10185   {
10186     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10187   }
10188 )
10189
10190 ;; Increment an SImode register by the number of set bits in a predicate
10191 ;; using modular arithmetic.  See aarch64_sve_cntp for a description of
10192 ;; the operands.
10193 (define_insn_and_rewrite "*aarch64_incsi<mode>_cntp"
10194   [(set (match_operand:SI 0 "register_operand" "=r")
10195         (plus:SI
10196           (unspec:SI [(match_operand 3)
10197                       (const_int SVE_KNOWN_PTRUE)
10198                       (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10199                      UNSPEC_CNTP)
10200           (match_operand:SI 1 "register_operand" "0")))]
10201   "TARGET_SVE"
10202   "incp\t%x0, %2.<Vetype>"
10203   "&& !CONSTANT_P (operands[3])"
10204   {
10205     operands[3] = CONSTM1_RTX (<MODE>mode);
10206   }
10207 )
10208
10209 ;; Increment an SImode register by the number of set bits in a predicate
10210 ;; using saturating arithmetic, extending the result to 64 bits.
10211 ;;
10212 ;; See aarch64_sve_cntp for a description of the operands.
10213 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10214   [(set (match_operand:DI 0 "register_operand")
10215         (<paired_extend>:DI
10216           (SAT_PLUS:SI
10217             (unspec:SI [(match_dup 3)
10218                         (const_int SVE_KNOWN_PTRUE)
10219                         (match_operand:PRED_ALL 2 "register_operand")]
10220                        UNSPEC_CNTP)
10221             (match_operand:SI_ONLY 1 "register_operand"))))]
10222   "TARGET_SVE"
10223   {
10224     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10225   }
10226 )
10227
10228 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10229   [(set (match_operand:DI 0 "register_operand" "=r")
10230         (<paired_extend>:DI
10231           (SAT_PLUS:SI
10232             (unspec:SI [(match_operand 3)
10233                         (const_int SVE_KNOWN_PTRUE)
10234                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10235                        UNSPEC_CNTP)
10236             (match_operand:SI_ONLY 1 "register_operand" "0"))))]
10237   "TARGET_SVE"
10238   {
10239     if (<CODE> == SS_PLUS)
10240       return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
10241     else
10242       return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
10243   }
10244   "&& !CONSTANT_P (operands[3])"
10245   {
10246     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10247   }
10248 )
10249
10250 ;; -------------------------------------------------------------------------
10251 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
10252 ;; -------------------------------------------------------------------------
10253 ;; Includes:
10254 ;; - INCP
10255 ;; - SQINCP
10256 ;; - UQINCP
10257 ;; -------------------------------------------------------------------------
10258
10259 ;; Increment a vector of DIs by the number of set bits in a predicate.
10260 ;; See aarch64_sve_cntp for a description of the operands.
10261 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10262   [(set (match_operand:VNx2DI 0 "register_operand")
10263         (ANY_PLUS:VNx2DI
10264           (vec_duplicate:VNx2DI
10265             (zero_extend:DI
10266               (unspec:SI
10267                 [(match_dup 3)
10268                  (const_int SVE_KNOWN_PTRUE)
10269                  (match_operand:<VPRED> 2 "register_operand")]
10270                 UNSPEC_CNTP)))
10271           (match_operand:VNx2DI_ONLY 1 "register_operand")))]
10272   "TARGET_SVE"
10273   {
10274     operands[3] = CONSTM1_RTX (<VPRED>mode);
10275   }
10276 )
10277
10278 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10279   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
10280         (ANY_PLUS:VNx2DI
10281           (vec_duplicate:VNx2DI
10282             (zero_extend:DI
10283               (unspec:SI
10284                 [(match_operand 3)
10285                  (const_int SVE_KNOWN_PTRUE)
10286                  (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10287                 UNSPEC_CNTP)))
10288           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
10289   "TARGET_SVE"
10290   "@
10291    <inc_dec>p\t%0.d, %2
10292    movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
10293   "&& !CONSTANT_P (operands[3])"
10294   {
10295     operands[3] = CONSTM1_RTX (<VPRED>mode);
10296   }
10297   [(set_attr "movprfx" "*,yes")]
10298 )
10299
10300 ;; Increment a vector of SIs by the number of set bits in a predicate.
10301 ;; See aarch64_sve_cntp for a description of the operands.
10302 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10303   [(set (match_operand:VNx4SI 0 "register_operand")
10304         (ANY_PLUS:VNx4SI
10305           (vec_duplicate:VNx4SI
10306             (unspec:SI
10307               [(match_dup 3)
10308                (const_int SVE_KNOWN_PTRUE)
10309                (match_operand:<VPRED> 2 "register_operand")]
10310               UNSPEC_CNTP))
10311           (match_operand:VNx4SI_ONLY 1 "register_operand")))]
10312   "TARGET_SVE"
10313   {
10314     operands[3] = CONSTM1_RTX (<VPRED>mode);
10315   }
10316 )
10317
10318 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10319   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
10320         (ANY_PLUS:VNx4SI
10321           (vec_duplicate:VNx4SI
10322             (unspec:SI
10323               [(match_operand 3)
10324                (const_int SVE_KNOWN_PTRUE)
10325                (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10326               UNSPEC_CNTP))
10327           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
10328   "TARGET_SVE"
10329   "@
10330    <inc_dec>p\t%0.s, %2
10331    movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
10332   "&& !CONSTANT_P (operands[3])"
10333   {
10334     operands[3] = CONSTM1_RTX (<VPRED>mode);
10335   }
10336   [(set_attr "movprfx" "*,yes")]
10337 )
10338
10339 ;; Increment a vector of HIs by the number of set bits in a predicate.
10340 ;; See aarch64_sve_cntp for a description of the operands.
10341 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10342   [(set (match_operand:VNx8HI 0 "register_operand")
10343         (ANY_PLUS:VNx8HI
10344           (vec_duplicate:VNx8HI
10345             (truncate:HI
10346               (unspec:SI
10347                 [(match_dup 3)
10348                  (const_int SVE_KNOWN_PTRUE)
10349                  (match_operand:<VPRED> 2 "register_operand")]
10350                 UNSPEC_CNTP)))
10351           (match_operand:VNx8HI_ONLY 1 "register_operand")))]
10352   "TARGET_SVE"
10353   {
10354     operands[3] = CONSTM1_RTX (<VPRED>mode);
10355   }
10356 )
10357
10358 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10359   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
10360         (ANY_PLUS:VNx8HI
10361           (vec_duplicate:VNx8HI
10362             (match_operator:HI 3 "subreg_lowpart_operator"
10363               [(unspec:SI
10364                  [(match_operand 4)
10365                   (const_int SVE_KNOWN_PTRUE)
10366                   (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10367                  UNSPEC_CNTP)]))
10368           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
10369   "TARGET_SVE"
10370   "@
10371    <inc_dec>p\t%0.h, %2
10372    movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
10373   "&& !CONSTANT_P (operands[4])"
10374   {
10375     operands[4] = CONSTM1_RTX (<VPRED>mode);
10376   }
10377   [(set_attr "movprfx" "*,yes")]
10378 )
10379
10380 ;; -------------------------------------------------------------------------
10381 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
10382 ;; -------------------------------------------------------------------------
10383 ;; Includes:
10384 ;; - DECP
10385 ;; - SQDECP
10386 ;; - UQDECP
10387 ;; -------------------------------------------------------------------------
10388
10389 ;; Decrement a DImode register by the number of set bits in a predicate.
10390 ;; See aarch64_sve_cntp for a description of the operands.
10391 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10392   [(set (match_operand:DI 0 "register_operand")
10393         (ANY_MINUS:DI
10394           (match_operand:DI_ONLY 1 "register_operand")
10395           (zero_extend:DI
10396             (unspec:SI [(match_dup 3)
10397                         (const_int SVE_KNOWN_PTRUE)
10398                         (match_operand:PRED_ALL 2 "register_operand")]
10399                        UNSPEC_CNTP))))]
10400   "TARGET_SVE"
10401   {
10402     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10403   }
10404 )
10405
10406 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10407   [(set (match_operand:DI 0 "register_operand" "=r")
10408         (ANY_MINUS:DI
10409           (match_operand:DI_ONLY 1 "register_operand" "0")
10410           (zero_extend:DI
10411             (unspec:SI [(match_operand 3)
10412                         (const_int SVE_KNOWN_PTRUE)
10413                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10414                        UNSPEC_CNTP))))]
10415   "TARGET_SVE"
10416   "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
10417   "&& !CONSTANT_P (operands[3])"
10418   {
10419     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10420   }
10421 )
10422
10423 ;; Decrement an SImode register by the number of set bits in a predicate
10424 ;; using modular arithmetic.  See aarch64_sve_cntp for a description of the
10425 ;; operands.
10426 (define_insn_and_rewrite "*aarch64_decsi<mode>_cntp"
10427   [(set (match_operand:SI 0 "register_operand" "=r")
10428         (minus:SI
10429           (match_operand:SI 1 "register_operand" "0")
10430           (unspec:SI [(match_operand 3)
10431                       (const_int SVE_KNOWN_PTRUE)
10432                       (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10433                      UNSPEC_CNTP)))]
10434   "TARGET_SVE"
10435   "decp\t%x0, %2.<Vetype>"
10436   "&& !CONSTANT_P (operands[3])"
10437   {
10438     operands[3] = CONSTM1_RTX (<MODE>mode);
10439   }
10440 )
10441
10442 ;; Decrement an SImode register by the number of set bits in a predicate
10443 ;; using saturating arithmetic, extending the result to 64 bits.
10444 ;;
10445 ;; See aarch64_sve_cntp for a description of the operands.
10446 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10447   [(set (match_operand:DI 0 "register_operand")
10448         (<paired_extend>:DI
10449           (SAT_MINUS:SI
10450             (match_operand:SI_ONLY 1 "register_operand")
10451             (unspec:SI [(match_dup 3)
10452                         (const_int SVE_KNOWN_PTRUE)
10453                         (match_operand:PRED_ALL 2 "register_operand")]
10454                        UNSPEC_CNTP))))]
10455   "TARGET_SVE"
10456   {
10457     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10458   }
10459 )
10460
10461 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10462   [(set (match_operand:DI 0 "register_operand" "=r")
10463         (<paired_extend>:DI
10464           (SAT_MINUS:SI
10465             (match_operand:SI_ONLY 1 "register_operand" "0")
10466             (unspec:SI [(match_operand 3)
10467                         (const_int SVE_KNOWN_PTRUE)
10468                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10469                        UNSPEC_CNTP))))]
10470   "TARGET_SVE"
10471   {
10472     if (<CODE> == SS_MINUS)
10473       return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
10474     else
10475       return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
10476   }
10477   "&& !CONSTANT_P (operands[3])"
10478   {
10479     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10480   }
10481 )
10482
10483 ;; -------------------------------------------------------------------------
10484 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
10485 ;; -------------------------------------------------------------------------
10486 ;; Includes:
10487 ;; - DECP
10488 ;; - SQDECP
10489 ;; - UQDECP
10490 ;; -------------------------------------------------------------------------
10491
10492 ;; Decrement a vector of DIs by the number of set bits in a predicate.
10493 ;; See aarch64_sve_cntp for a description of the operands.
10494 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10495   [(set (match_operand:VNx2DI 0 "register_operand")
10496         (ANY_MINUS:VNx2DI
10497           (match_operand:VNx2DI_ONLY 1 "register_operand")
10498           (vec_duplicate:VNx2DI
10499             (zero_extend:DI
10500               (unspec:SI
10501                 [(match_dup 3)
10502                  (const_int SVE_KNOWN_PTRUE)
10503                  (match_operand:<VPRED> 2 "register_operand")]
10504                 UNSPEC_CNTP)))))]
10505   "TARGET_SVE"
10506   {
10507     operands[3] = CONSTM1_RTX (<VPRED>mode);
10508   }
10509 )
10510
10511 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10512   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
10513         (ANY_MINUS:VNx2DI
10514           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
10515           (vec_duplicate:VNx2DI
10516             (zero_extend:DI
10517               (unspec:SI
10518                 [(match_operand 3)
10519                  (const_int SVE_KNOWN_PTRUE)
10520                  (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10521                 UNSPEC_CNTP)))))]
10522   "TARGET_SVE"
10523   "@
10524    <inc_dec>p\t%0.d, %2
10525    movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
10526   "&& !CONSTANT_P (operands[3])"
10527   {
10528     operands[3] = CONSTM1_RTX (<VPRED>mode);
10529   }
10530   [(set_attr "movprfx" "*,yes")]
10531 )
10532
10533 ;; Decrement a vector of SIs by the number of set bits in a predicate.
10534 ;; See aarch64_sve_cntp for a description of the operands.
10535 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10536   [(set (match_operand:VNx4SI 0 "register_operand")
10537         (ANY_MINUS:VNx4SI
10538           (match_operand:VNx4SI_ONLY 1 "register_operand")
10539           (vec_duplicate:VNx4SI
10540             (unspec:SI
10541               [(match_dup 3)
10542                (const_int SVE_KNOWN_PTRUE)
10543                (match_operand:<VPRED> 2 "register_operand")]
10544               UNSPEC_CNTP))))]
10545   "TARGET_SVE"
10546   {
10547     operands[3] = CONSTM1_RTX (<VPRED>mode);
10548   }
10549 )
10550
10551 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10552   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
10553         (ANY_MINUS:VNx4SI
10554           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
10555           (vec_duplicate:VNx4SI
10556             (unspec:SI
10557               [(match_operand 3)
10558                (const_int SVE_KNOWN_PTRUE)
10559                (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10560               UNSPEC_CNTP))))]
10561   "TARGET_SVE"
10562   "@
10563    <inc_dec>p\t%0.s, %2
10564    movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
10565   "&& !CONSTANT_P (operands[3])"
10566   {
10567     operands[3] = CONSTM1_RTX (<VPRED>mode);
10568   }
10569   [(set_attr "movprfx" "*,yes")]
10570 )
10571
10572 ;; Decrement a vector of HIs by the number of set bits in a predicate.
10573 ;; See aarch64_sve_cntp for a description of the operands.
10574 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10575   [(set (match_operand:VNx8HI 0 "register_operand")
10576         (ANY_MINUS:VNx8HI
10577           (match_operand:VNx8HI_ONLY 1 "register_operand")
10578           (vec_duplicate:VNx8HI
10579             (truncate:HI
10580               (unspec:SI
10581                 [(match_dup 3)
10582                  (const_int SVE_KNOWN_PTRUE)
10583                  (match_operand:<VPRED> 2 "register_operand")]
10584                 UNSPEC_CNTP)))))]
10585   "TARGET_SVE"
10586   {
10587     operands[3] = CONSTM1_RTX (<VPRED>mode);
10588   }
10589 )
10590
10591 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10592   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
10593         (ANY_MINUS:VNx8HI
10594           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
10595           (vec_duplicate:VNx8HI
10596             (match_operator:HI 3 "subreg_lowpart_operator"
10597               [(unspec:SI
10598                  [(match_operand 4)
10599                   (const_int SVE_KNOWN_PTRUE)
10600                   (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10601                  UNSPEC_CNTP)]))))]
10602   "TARGET_SVE"
10603   "@
10604    <inc_dec>p\t%0.h, %2
10605    movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
10606   "&& !CONSTANT_P (operands[4])"
10607   {
10608     operands[4] = CONSTM1_RTX (<VPRED>mode);
10609   }
10610   [(set_attr "movprfx" "*,yes")]
10611 )